]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Preserve link local IP allocations for DVR fip ns across restart
authorCarl Baldwin <carl.baldwin@hp.com>
Fri, 25 Jul 2014 03:57:40 +0000 (03:57 +0000)
committerCarl Baldwin <carl.baldwin@hp.com>
Tue, 12 Aug 2014 20:30:10 +0000 (20:30 +0000)
The L3 agent allocates link local address pairs used in connecting the
routers to the floating ip namespace.  When those allocations are
forgetten by restarting the L3 agent they all get rewired on restart.
This change preserves the allocations using a file in the local file
system.  Storing them in the database would be overkill and would
affect system performance.

Change-Id: I39614c7ea2a7dcc35bf969c90045adc5926ea9df
Closes-Bug: #1348306
Partially-Implements: blueprint neutron-ovs-dvr
Co-Authored-By: Rajeev Grover <rajeev.grover@hp.com>
neutron/agent/l3_agent.py
neutron/tests/unit/services/firewall/agents/varmour/test_varmour_router.py
neutron/tests/unit/services/firewall/drivers/varmour/test_varmour_fwaas.py
neutron/tests/unit/test_l3_agent.py

index dc308ca351f39b3fe1f45a209afd624796454c21..bacb21b676999366cd54b2efe072f1422fe81fc3 100644 (file)
@@ -20,6 +20,7 @@ import eventlet
 eventlet.monkey_patch()
 
 import netaddr
+import os
 from oslo.config import cfg
 import Queue
 
@@ -58,7 +59,7 @@ SNAT_NS_PREFIX = 'snat-'
 FIP_2_ROUTER_DEV_PREFIX = 'fpr-'
 ROUTER_2_FIP_DEV_PREFIX = 'rfp-'
 FIP_EXT_DEV_PREFIX = 'fg-'
-FIP_LL_PREFIX = '169.254.30.'
+FIP_LL_SUBNET = '169.254.30.0/23'
 # Route Table index for FIPs
 FIP_RT_TBL = 16
 # Rule priority range for FIPs
@@ -142,6 +143,99 @@ class L3PluginApi(n_rpc.RpcProxy):
                          version='1.3')
 
 
+class LinkLocalAddressPair(netaddr.IPNetwork):
+    def __init__(self, addr):
+        super(LinkLocalAddressPair, self).__init__(addr)
+
+    def get_pair(self):
+        """Builds an address pair from the first and last addresses. """
+        return (netaddr.IPNetwork("%s/%s" % (self.network, self.prefixlen)),
+                netaddr.IPNetwork("%s/%s" % (self.broadcast, self.prefixlen)))
+
+
+class LinkLocalAllocator(object):
+    """Manages allocation of link local IP addresses.
+
+    These link local addresses are used for routing inside the fip namespaces.
+    The associations need to persist across agent restarts to maintain
+    consistency.  Without this, there is disruption in network connectivity
+    as the agent rewires the connections with the new IP address assocations.
+
+    Persisting these in the database is unnecessary and would degrade
+    performance.
+    """
+    def __init__(self, state_file, subnet):
+        """Read the file with previous allocations recorded.
+
+        See the note in the allocate method for more detail.
+        """
+        self.state_file = state_file
+        subnet = netaddr.IPNetwork(subnet)
+
+        self.allocations = {}
+
+        self.remembered = {}
+        for line in self._read():
+            key, cidr = line.strip().split(',')
+            self.remembered[key] = LinkLocalAddressPair(cidr)
+
+        self.pool = set(LinkLocalAddressPair(s) for s in subnet.subnet(31))
+        self.pool.difference_update(self.remembered.values())
+
+    def allocate(self, key):
+        """Try to allocate a link local address pair.
+
+        I expect this to work in all cases because I expect the pool size to be
+        large enough for any situation.  Nonetheless, there is some defensive
+        programming in here.
+
+        Since the allocations are persisted, there is the chance to leak
+        allocations which should have been released but were not.  This leak
+        could eventually exhaust the pool.
+
+        So, if a new allocation is needed, the code first checks to see if
+        there are any remembered allocations for the key.  If not, it checks
+        the free pool.  If the free pool is empty then it dumps the remembered
+        allocations to free the pool.  This final desparate step will not
+        happen often in practice.
+        """
+        if key in self.remembered:
+            self.allocations[key] = self.remembered.pop(key)
+            return self.allocations[key]
+
+        if not self.pool:
+            # Desparate times.  Try to get more in the pool.
+            self.pool.update(self.remembered.values())
+            self.remembered.clear()
+            if not self.pool:
+                # More than 256 routers on a compute node!
+                raise RuntimeError(_("Cannot allocate link local address"))
+
+        self.allocations[key] = self.pool.pop()
+        self._write_allocations()
+        return self.allocations[key]
+
+    def release(self, key):
+        self.pool.add(self.allocations.pop(key))
+        self._write_allocations()
+
+    def _write_allocations(self):
+        current = ["%s,%s\n" % (k, v) for k, v in self.allocations.items()]
+        remembered = ["%s,%s\n" % (k, v) for k, v in self.remembered.items()]
+        current.extend(remembered)
+        self._write(current)
+
+    def _write(self, lines):
+        with open(self.state_file, "w") as f:
+            f.writelines(lines)
+
+    def _read(self):
+        if not os.path.exists(self.state_file):
+            return []
+        with open(self.state_file) as f:
+            return f.readlines()
+
+
 class RouterInfo(object):
 
     def __init__(self, router_id, root_helper, use_namespaces, router):
@@ -164,10 +258,8 @@ class RouterInfo(object):
             namespace=self.ns_name)
         self.routes = []
         # DVR Data
-        # Linklocal router to floating IP addr
-        self.rtr_2_fip = None
-        # Linklocal floating to router IP addr
-        self.fip_2_rtr = None
+        # Linklocal subnet for router and floating IP namespace link
+        self.rtr_fip_subnet = None
         self.dist_fip_count = 0
 
     @property
@@ -433,7 +525,9 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
         # dvr data
         self.agent_gateway_port = None
         self.agent_fip_count = 0
-        self.local_ips = set(range(2, 251))
+        self.local_subnets = LinkLocalAllocator(
+            os.path.join(self.conf.state_path, 'fip-linklocal-networks'),
+            FIP_LL_SUBNET)
         self.fip_priorities = set(range(FIP_PR_START, FIP_PR_END))
 
         self._queue = RouterProcessingQueue()
@@ -1373,24 +1467,23 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
         fip_ns_name = self.get_fip_ns_name(str(network_id))
 
         # add link local IP to interface
-        if ri.rtr_2_fip is None:
-            ri.rtr_2_fip = FIP_LL_PREFIX + str(self.local_ips.pop())
-        if ri.fip_2_rtr is None:
-            ri.fip_2_rtr = FIP_LL_PREFIX + str(self.local_ips.pop())
+        if ri.rtr_fip_subnet is None:
+            ri.rtr_fip_subnet = self.local_subnets.allocate(ri.router_id)
+        rtr_2_fip, fip_2_rtr = ri.rtr_fip_subnet.get_pair()
         ip_wrapper = ip_lib.IPWrapper(self.root_helper,
                                       namespace=ri.ns_name)
         int_dev = ip_wrapper.add_veth(rtr_2_fip_name,
                                       fip_2_rtr_name, fip_ns_name)
-        self.internal_ns_interface_added(ri.rtr_2_fip + '/31',
+        self.internal_ns_interface_added(str(rtr_2_fip),
                                          rtr_2_fip_name, ri.ns_name)
-        self.internal_ns_interface_added(ri.fip_2_rtr + '/31',
+        self.internal_ns_interface_added(str(fip_2_rtr),
                                          fip_2_rtr_name, fip_ns_name)
         int_dev[0].link.set_up()
         int_dev[1].link.set_up()
         # add default route for the link local interface
         device = ip_lib.IPDevice(rtr_2_fip_name, self.root_helper,
                                  namespace=ri.ns_name)
-        device.route.add_gateway(ri.fip_2_rtr, table=FIP_RT_TBL)
+        device.route.add_gateway(str(fip_2_rtr.ip), table=FIP_RT_TBL)
         #setup the NAT rules and chains
         self._handle_router_fip_nat_rules(ri, rtr_2_fip_name, 'add_rules')
 
@@ -1407,9 +1500,10 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
         #Add routing rule in fip namespace
         fip_cidr = str(floating_ip) + FLOATING_IP_CIDR_SUFFIX
         fip_ns_name = self.get_fip_ns_name(str(fip['floating_network_id']))
+        rtr_2_fip, _ = ri.rtr_fip_subnet.get_pair()
         device = ip_lib.IPDevice(fip_2_rtr_name, self.root_helper,
                                  namespace=fip_ns_name)
-        device.route.add_route(fip_cidr, ri.rtr_2_fip)
+        device.route.add_route(fip_cidr, str(rtr_2_fip.ip))
         interface_name = (
             self.get_fip_ext_device_name(self.agent_gateway_port['id']))
         self._send_gratuitous_arp_packet(fip_ns_name,
@@ -1424,6 +1518,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
         floating_ip = fip_cidr.split('/')[0]
         rtr_2_fip_name = self.get_rtr_int_device_name(ri.router_id)
         fip_2_rtr_name = self.get_fip_int_device_name(ri.router_id)
+        rtr_2_fip, fip_2_rtr = ri.rtr_fip_subnet.get_pair()
         fip_ns_name = self.get_fip_ns_name(str(self._fetch_external_net_id()))
         ip_rule_rtr = ip_lib.IpRule(self.root_helper, namespace=ri.ns_name)
         if floating_ip in ri.floating_ips_dict:
@@ -1437,7 +1532,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
         device = ip_lib.IPDevice(fip_2_rtr_name, self.root_helper,
                                  namespace=fip_ns_name)
 
-        device.route.delete_route(fip_cidr, ri.rtr_2_fip)
+        device.route.delete_route(fip_cidr, str(rtr_2_fip.ip))
         # check if this is the last FIP for this router
         ri.dist_fip_count = ri.dist_fip_count - 1
         if ri.dist_fip_count == 0:
@@ -1445,11 +1540,9 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
             device = ip_lib.IPDevice(rtr_2_fip_name, self.root_helper,
                                      namespace=ri.ns_name)
             ns_ip = ip_lib.IPWrapper(self.root_helper, namespace=fip_ns_name)
-            device.route.delete_gateway(ri.fip_2_rtr, table=FIP_RT_TBL)
-            self.local_ips.add(ri.rtr_2_fip.rsplit('.', 1)[1])
-            ri.rtr_2_fip = None
-            self.local_ips.add(ri.fip_2_rtr.rsplit('.', 1)[1])
-            ri.fip_2_rtr = None
+            device.route.delete_gateway(str(fip_2_rtr.ip), table=FIP_RT_TBL)
+            self.local_subnets.release(ri.router_id)
+            ri.rtr_fip_subnet = None
             ns_ip.del_veth(fip_2_rtr_name)
         # clean up fip-namespace if this is the last FIP
         self.agent_fip_count = self.agent_fip_count - 1
index 735bcd118bd870971b6d891abcb76f91c86b53e3..4b458fb3b9f08323e50ef1d231efdd89827e98e9 100644 (file)
@@ -49,6 +49,7 @@ class TestVarmourRouter(base.BaseTestCase):
         self.conf.set_override('interface_driver',
                                'neutron.agent.linux.interface.NullDriver')
         self.conf.root_helper = 'sudo'
+        self.conf.state_path = ''
 
         self.device_exists_p = mock.patch(
             'neutron.agent.linux.ip_lib.device_exists')
index 2cdfff34a3ef9aa0dd2315ae6e724223ee444067..db38e4a4d1b6abc18db24afb512ffed39448b8ed 100644 (file)
@@ -50,6 +50,7 @@ class TestBasicRouterOperations(base.BaseTestCase):
         self.conf.set_override('interface_driver',
                                'neutron.agent.linux.interface.NullDriver')
         self.conf.root_helper = 'sudo'
+        self.conf.state_path = ''
 
         self.device_exists_p = mock.patch(
             'neutron.agent.linux.ip_lib.device_exists')
index a1854cff8d30ef9ab8d43559c4c45276a547561e..e99c6da13c9d0cd568e09ba9854c495d14dc4473 100644 (file)
@@ -118,6 +118,83 @@ class TestExclusiveRouterProcessor(base.BaseTestCase):
         self.assertEqual(2, len([i for i in master.updates()]))
 
 
+class TestLinkLocalAddrAllocator(base.BaseTestCase):
+    def setUp(self):
+        super(TestLinkLocalAddrAllocator, self).setUp()
+        self.subnet = netaddr.IPNetwork('169.254.31.0/24')
+
+    def test__init__(self):
+        a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr)
+        self.assertEqual('/file', a.state_file)
+        self.assertEqual({}, a.allocations)
+
+    def test__init__readfile(self):
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_read') as read:
+            read.return_value = ["da873ca2,169.254.31.28/31\n"]
+            a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr)
+
+        self.assertTrue('da873ca2' in a.remembered)
+        self.assertEqual({}, a.allocations)
+
+    def test_allocate(self):
+        a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr)
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_write') as write:
+            subnet = a.allocate('deadbeef')
+
+        self.assertTrue('deadbeef' in a.allocations)
+        self.assertTrue(subnet not in a.pool)
+        self._check_allocations(a.allocations)
+        write.assert_called_once_with(['deadbeef,%s\n' % subnet.cidr])
+
+    def test_allocate_from_file(self):
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_read') as read:
+            read.return_value = ["deadbeef,169.254.31.88/31\n"]
+            a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr)
+
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_write') as write:
+            subnet = a.allocate('deadbeef')
+
+        self.assertEqual(netaddr.IPNetwork('169.254.31.88/31'), subnet)
+        self.assertTrue(subnet not in a.pool)
+        self._check_allocations(a.allocations)
+        self.assertFalse(write.called)
+
+    def test_allocate_exhausted_pool(self):
+        subnet = netaddr.IPNetwork('169.254.31.0/31')
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_read') as read:
+            read.return_value = ["deadbeef,169.254.31.0/31\n"]
+            a = l3_agent.LinkLocalAllocator('/file', subnet.cidr)
+
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_write') as write:
+            allocation = a.allocate('abcdef12')
+
+        self.assertEqual(subnet, allocation)
+        self.assertFalse('deadbeef' in a.allocations)
+        self.assertTrue('abcdef12' in a.allocations)
+        self.assertTrue(allocation not in a.pool)
+        self._check_allocations(a.allocations)
+        write.assert_called_once_with(['abcdef12,%s\n' % allocation.cidr])
+
+        self.assertRaises(RuntimeError, a.allocate, 'deadbeef')
+
+    def test_release(self):
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_write') as write:
+            a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr)
+            subnet = a.allocate('deadbeef')
+            write.reset_mock()
+            a.release('deadbeef')
+
+        self.assertTrue('deadbeef' not in a.allocations)
+        self.assertTrue(subnet in a.pool)
+        self.assertEqual({}, a.allocations)
+        write.assert_called_once_with([])
+
+    def _check_allocations(self, allocations):
+        for key, subnet in allocations.items():
+            self.assertTrue(subnet in self.subnet)
+            self.assertEqual(subnet.prefixlen, 31)
+
+
 def router_append_interface(router, count=1, ip_version=4, ra_mode=None,
                             addr_mode=None):
     if ip_version == 4:
@@ -787,8 +864,9 @@ class TestBasicRouterOperations(base.BaseTestCase):
         device.addr.list.return_value = []
         ri.iptables_manager.ipv4['nat'] = mock.MagicMock()
 
-        fip_statuses = agent.process_router_floating_ip_addresses(
-            ri, {'id': _uuid()})
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_write'):
+            fip_statuses = agent.process_router_floating_ip_addresses(
+                ri, {'id': _uuid()})
         self.assertEqual({fip_id: l3_constants.FLOATINGIP_STATUS_ACTIVE},
                          fip_statuses)
         device.addr.add.assert_called_once_with(4, '15.1.2.3/32', '15.1.2.3')
@@ -1843,12 +1921,13 @@ class TestBasicRouterOperations(base.BaseTestCase):
         fip_2_rtr_name = agent.get_fip_int_device_name(ri.router_id)
         fip_ns_name = agent.get_fip_ns_name(str(fip['floating_network_id']))
 
-        agent.create_rtr_2_fip_link(ri, fip['floating_network_id'])
+        with mock.patch.object(l3_agent.LinkLocalAllocator, '_write'):
+            agent.create_rtr_2_fip_link(ri, fip['floating_network_id'])
         self.mock_ip.add_veth.assert_called_with(rtr_2_fip_name,
                                                  fip_2_rtr_name, fip_ns_name)
         # TODO(mrsmith): add more aasserts -
         self.mock_ip_dev.route.add_gateway.assert_called_once_with(
-            ri.fip_2_rtr, table=16)
+            '169.254.31.29', table=16)
 
     # TODO(mrsmith): test _create_agent_gateway_port
 
@@ -1872,12 +1951,14 @@ class TestBasicRouterOperations(base.BaseTestCase):
                'floating_network_id': _uuid(),
                'port_id': _uuid()}
         agent.agent_gateway_port = agent_gw_port
+        ri.rtr_fip_subnet = l3_agent.LinkLocalAddressPair('169.254.30.42/31')
         agent.floating_ip_added_dist(ri, fip)
         self.mock_rule.add_rule_from.assert_called_with('192.168.0.1',
                                                         16, FIP_PRI)
         # TODO(mrsmith): add more asserts
 
-    def test_floating_ip_removed_dist(self):
+    @mock.patch.object(l3_agent.LinkLocalAllocator, '_write')
+    def test_floating_ip_removed_dist(self, write):
         agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
         router = prepare_router_data()
         agent_gw_port = {'fixed_ips': [{'ip_address': '20.0.0.30',
@@ -1896,20 +1977,24 @@ class TestBasicRouterOperations(base.BaseTestCase):
         ri.fip_2_rtr = '11.22.33.42'
         ri.rtr_2_fip = '11.22.33.40'
         agent.agent_gateway_port = agent_gw_port
+        s = l3_agent.LinkLocalAddressPair('169.254.30.42/31')
+        ri.rtr_fip_subnet = s
         agent.floating_ip_removed_dist(ri, fip_cidr)
         self.mock_rule.delete_rule_priority.assert_called_with(FIP_PRI)
         self.mock_ip_dev.route.delete_route.assert_called_with(fip_cidr,
-                                                               ri.rtr_2_fip)
+                                                               str(s.ip))
         with mock.patch.object(agent, '_destroy_fip_namespace') as f:
             ri.dist_fip_count = 1
             agent.agent_fip_count = 1
             fip_ns_name = agent.get_fip_ns_name(
                 str(agent._fetch_external_net_id()))
+            ri.rtr_fip_subnet = agent.local_subnets.allocate(ri.router_id)
+            _, fip_to_rtr = ri.rtr_fip_subnet.get_pair()
             agent.floating_ip_removed_dist(ri, fip_cidr)
             self.mock_ip.del_veth.assert_called_once_with(
                 agent.get_fip_int_device_name(router['id']))
             self.mock_ip_dev.route.delete_gateway.assert_called_once_with(
-                '11.22.33.42', table=16)
+                str(fip_to_rtr.ip), table=16)
             f.assert_called_once_with(fip_ns_name)