From c3c9f580393aea658571b00b3afd0b729dffe89b Mon Sep 17 00:00:00 2001 From: Carl Baldwin Date: Fri, 25 Jul 2014 03:57:40 +0000 Subject: [PATCH] Preserve link local IP allocations for DVR fip ns across restart The L3 agent allocates link local address pairs used in connecting the routers to the floating ip namespace. When those allocations are forgetten by restarting the L3 agent they all get rewired on restart. This change preserves the allocations using a file in the local file system. Storing them in the database would be overkill and would affect system performance. Change-Id: I39614c7ea2a7dcc35bf969c90045adc5926ea9df Closes-Bug: #1348306 Partially-Implements: blueprint neutron-ovs-dvr Co-Authored-By: Rajeev Grover --- neutron/agent/l3_agent.py | 133 +++++++++++++++--- .../agents/varmour/test_varmour_router.py | 1 + .../drivers/varmour/test_varmour_fwaas.py | 1 + neutron/tests/unit/test_l3_agent.py | 99 ++++++++++++- 4 files changed, 207 insertions(+), 27 deletions(-) diff --git a/neutron/agent/l3_agent.py b/neutron/agent/l3_agent.py index dc308ca35..bacb21b67 100644 --- a/neutron/agent/l3_agent.py +++ b/neutron/agent/l3_agent.py @@ -20,6 +20,7 @@ import eventlet eventlet.monkey_patch() import netaddr +import os from oslo.config import cfg import Queue @@ -58,7 +59,7 @@ SNAT_NS_PREFIX = 'snat-' FIP_2_ROUTER_DEV_PREFIX = 'fpr-' ROUTER_2_FIP_DEV_PREFIX = 'rfp-' FIP_EXT_DEV_PREFIX = 'fg-' -FIP_LL_PREFIX = '169.254.30.' +FIP_LL_SUBNET = '169.254.30.0/23' # Route Table index for FIPs FIP_RT_TBL = 16 # Rule priority range for FIPs @@ -142,6 +143,99 @@ class L3PluginApi(n_rpc.RpcProxy): version='1.3') +class LinkLocalAddressPair(netaddr.IPNetwork): + def __init__(self, addr): + super(LinkLocalAddressPair, self).__init__(addr) + + def get_pair(self): + """Builds an address pair from the first and last addresses. """ + return (netaddr.IPNetwork("%s/%s" % (self.network, self.prefixlen)), + netaddr.IPNetwork("%s/%s" % (self.broadcast, self.prefixlen))) + + +class LinkLocalAllocator(object): + """Manages allocation of link local IP addresses. + + These link local addresses are used for routing inside the fip namespaces. + The associations need to persist across agent restarts to maintain + consistency. Without this, there is disruption in network connectivity + as the agent rewires the connections with the new IP address assocations. + + Persisting these in the database is unnecessary and would degrade + performance. + """ + def __init__(self, state_file, subnet): + """Read the file with previous allocations recorded. + + See the note in the allocate method for more detail. + """ + self.state_file = state_file + subnet = netaddr.IPNetwork(subnet) + + self.allocations = {} + + self.remembered = {} + for line in self._read(): + key, cidr = line.strip().split(',') + self.remembered[key] = LinkLocalAddressPair(cidr) + + self.pool = set(LinkLocalAddressPair(s) for s in subnet.subnet(31)) + self.pool.difference_update(self.remembered.values()) + + def allocate(self, key): + """Try to allocate a link local address pair. + + I expect this to work in all cases because I expect the pool size to be + large enough for any situation. Nonetheless, there is some defensive + programming in here. + + Since the allocations are persisted, there is the chance to leak + allocations which should have been released but were not. This leak + could eventually exhaust the pool. + + So, if a new allocation is needed, the code first checks to see if + there are any remembered allocations for the key. If not, it checks + the free pool. If the free pool is empty then it dumps the remembered + allocations to free the pool. This final desparate step will not + happen often in practice. + """ + if key in self.remembered: + self.allocations[key] = self.remembered.pop(key) + return self.allocations[key] + + if not self.pool: + # Desparate times. Try to get more in the pool. + self.pool.update(self.remembered.values()) + self.remembered.clear() + if not self.pool: + # More than 256 routers on a compute node! + raise RuntimeError(_("Cannot allocate link local address")) + + self.allocations[key] = self.pool.pop() + self._write_allocations() + return self.allocations[key] + + def release(self, key): + self.pool.add(self.allocations.pop(key)) + self._write_allocations() + + def _write_allocations(self): + current = ["%s,%s\n" % (k, v) for k, v in self.allocations.items()] + remembered = ["%s,%s\n" % (k, v) for k, v in self.remembered.items()] + current.extend(remembered) + self._write(current) + + def _write(self, lines): + with open(self.state_file, "w") as f: + f.writelines(lines) + + def _read(self): + if not os.path.exists(self.state_file): + return [] + with open(self.state_file) as f: + return f.readlines() + + class RouterInfo(object): def __init__(self, router_id, root_helper, use_namespaces, router): @@ -164,10 +258,8 @@ class RouterInfo(object): namespace=self.ns_name) self.routes = [] # DVR Data - # Linklocal router to floating IP addr - self.rtr_2_fip = None - # Linklocal floating to router IP addr - self.fip_2_rtr = None + # Linklocal subnet for router and floating IP namespace link + self.rtr_fip_subnet = None self.dist_fip_count = 0 @property @@ -433,7 +525,9 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager): # dvr data self.agent_gateway_port = None self.agent_fip_count = 0 - self.local_ips = set(range(2, 251)) + self.local_subnets = LinkLocalAllocator( + os.path.join(self.conf.state_path, 'fip-linklocal-networks'), + FIP_LL_SUBNET) self.fip_priorities = set(range(FIP_PR_START, FIP_PR_END)) self._queue = RouterProcessingQueue() @@ -1373,24 +1467,23 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager): fip_ns_name = self.get_fip_ns_name(str(network_id)) # add link local IP to interface - if ri.rtr_2_fip is None: - ri.rtr_2_fip = FIP_LL_PREFIX + str(self.local_ips.pop()) - if ri.fip_2_rtr is None: - ri.fip_2_rtr = FIP_LL_PREFIX + str(self.local_ips.pop()) + if ri.rtr_fip_subnet is None: + ri.rtr_fip_subnet = self.local_subnets.allocate(ri.router_id) + rtr_2_fip, fip_2_rtr = ri.rtr_fip_subnet.get_pair() ip_wrapper = ip_lib.IPWrapper(self.root_helper, namespace=ri.ns_name) int_dev = ip_wrapper.add_veth(rtr_2_fip_name, fip_2_rtr_name, fip_ns_name) - self.internal_ns_interface_added(ri.rtr_2_fip + '/31', + self.internal_ns_interface_added(str(rtr_2_fip), rtr_2_fip_name, ri.ns_name) - self.internal_ns_interface_added(ri.fip_2_rtr + '/31', + self.internal_ns_interface_added(str(fip_2_rtr), fip_2_rtr_name, fip_ns_name) int_dev[0].link.set_up() int_dev[1].link.set_up() # add default route for the link local interface device = ip_lib.IPDevice(rtr_2_fip_name, self.root_helper, namespace=ri.ns_name) - device.route.add_gateway(ri.fip_2_rtr, table=FIP_RT_TBL) + device.route.add_gateway(str(fip_2_rtr.ip), table=FIP_RT_TBL) #setup the NAT rules and chains self._handle_router_fip_nat_rules(ri, rtr_2_fip_name, 'add_rules') @@ -1407,9 +1500,10 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager): #Add routing rule in fip namespace fip_cidr = str(floating_ip) + FLOATING_IP_CIDR_SUFFIX fip_ns_name = self.get_fip_ns_name(str(fip['floating_network_id'])) + rtr_2_fip, _ = ri.rtr_fip_subnet.get_pair() device = ip_lib.IPDevice(fip_2_rtr_name, self.root_helper, namespace=fip_ns_name) - device.route.add_route(fip_cidr, ri.rtr_2_fip) + device.route.add_route(fip_cidr, str(rtr_2_fip.ip)) interface_name = ( self.get_fip_ext_device_name(self.agent_gateway_port['id'])) self._send_gratuitous_arp_packet(fip_ns_name, @@ -1424,6 +1518,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager): floating_ip = fip_cidr.split('/')[0] rtr_2_fip_name = self.get_rtr_int_device_name(ri.router_id) fip_2_rtr_name = self.get_fip_int_device_name(ri.router_id) + rtr_2_fip, fip_2_rtr = ri.rtr_fip_subnet.get_pair() fip_ns_name = self.get_fip_ns_name(str(self._fetch_external_net_id())) ip_rule_rtr = ip_lib.IpRule(self.root_helper, namespace=ri.ns_name) if floating_ip in ri.floating_ips_dict: @@ -1437,7 +1532,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager): device = ip_lib.IPDevice(fip_2_rtr_name, self.root_helper, namespace=fip_ns_name) - device.route.delete_route(fip_cidr, ri.rtr_2_fip) + device.route.delete_route(fip_cidr, str(rtr_2_fip.ip)) # check if this is the last FIP for this router ri.dist_fip_count = ri.dist_fip_count - 1 if ri.dist_fip_count == 0: @@ -1445,11 +1540,9 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager): device = ip_lib.IPDevice(rtr_2_fip_name, self.root_helper, namespace=ri.ns_name) ns_ip = ip_lib.IPWrapper(self.root_helper, namespace=fip_ns_name) - device.route.delete_gateway(ri.fip_2_rtr, table=FIP_RT_TBL) - self.local_ips.add(ri.rtr_2_fip.rsplit('.', 1)[1]) - ri.rtr_2_fip = None - self.local_ips.add(ri.fip_2_rtr.rsplit('.', 1)[1]) - ri.fip_2_rtr = None + device.route.delete_gateway(str(fip_2_rtr.ip), table=FIP_RT_TBL) + self.local_subnets.release(ri.router_id) + ri.rtr_fip_subnet = None ns_ip.del_veth(fip_2_rtr_name) # clean up fip-namespace if this is the last FIP self.agent_fip_count = self.agent_fip_count - 1 diff --git a/neutron/tests/unit/services/firewall/agents/varmour/test_varmour_router.py b/neutron/tests/unit/services/firewall/agents/varmour/test_varmour_router.py index 735bcd118..4b458fb3b 100644 --- a/neutron/tests/unit/services/firewall/agents/varmour/test_varmour_router.py +++ b/neutron/tests/unit/services/firewall/agents/varmour/test_varmour_router.py @@ -49,6 +49,7 @@ class TestVarmourRouter(base.BaseTestCase): self.conf.set_override('interface_driver', 'neutron.agent.linux.interface.NullDriver') self.conf.root_helper = 'sudo' + self.conf.state_path = '' self.device_exists_p = mock.patch( 'neutron.agent.linux.ip_lib.device_exists') diff --git a/neutron/tests/unit/services/firewall/drivers/varmour/test_varmour_fwaas.py b/neutron/tests/unit/services/firewall/drivers/varmour/test_varmour_fwaas.py index 2cdfff34a..db38e4a4d 100644 --- a/neutron/tests/unit/services/firewall/drivers/varmour/test_varmour_fwaas.py +++ b/neutron/tests/unit/services/firewall/drivers/varmour/test_varmour_fwaas.py @@ -50,6 +50,7 @@ class TestBasicRouterOperations(base.BaseTestCase): self.conf.set_override('interface_driver', 'neutron.agent.linux.interface.NullDriver') self.conf.root_helper = 'sudo' + self.conf.state_path = '' self.device_exists_p = mock.patch( 'neutron.agent.linux.ip_lib.device_exists') diff --git a/neutron/tests/unit/test_l3_agent.py b/neutron/tests/unit/test_l3_agent.py index a1854cff8..e99c6da13 100644 --- a/neutron/tests/unit/test_l3_agent.py +++ b/neutron/tests/unit/test_l3_agent.py @@ -118,6 +118,83 @@ class TestExclusiveRouterProcessor(base.BaseTestCase): self.assertEqual(2, len([i for i in master.updates()])) +class TestLinkLocalAddrAllocator(base.BaseTestCase): + def setUp(self): + super(TestLinkLocalAddrAllocator, self).setUp() + self.subnet = netaddr.IPNetwork('169.254.31.0/24') + + def test__init__(self): + a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr) + self.assertEqual('/file', a.state_file) + self.assertEqual({}, a.allocations) + + def test__init__readfile(self): + with mock.patch.object(l3_agent.LinkLocalAllocator, '_read') as read: + read.return_value = ["da873ca2,169.254.31.28/31\n"] + a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr) + + self.assertTrue('da873ca2' in a.remembered) + self.assertEqual({}, a.allocations) + + def test_allocate(self): + a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr) + with mock.patch.object(l3_agent.LinkLocalAllocator, '_write') as write: + subnet = a.allocate('deadbeef') + + self.assertTrue('deadbeef' in a.allocations) + self.assertTrue(subnet not in a.pool) + self._check_allocations(a.allocations) + write.assert_called_once_with(['deadbeef,%s\n' % subnet.cidr]) + + def test_allocate_from_file(self): + with mock.patch.object(l3_agent.LinkLocalAllocator, '_read') as read: + read.return_value = ["deadbeef,169.254.31.88/31\n"] + a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr) + + with mock.patch.object(l3_agent.LinkLocalAllocator, '_write') as write: + subnet = a.allocate('deadbeef') + + self.assertEqual(netaddr.IPNetwork('169.254.31.88/31'), subnet) + self.assertTrue(subnet not in a.pool) + self._check_allocations(a.allocations) + self.assertFalse(write.called) + + def test_allocate_exhausted_pool(self): + subnet = netaddr.IPNetwork('169.254.31.0/31') + with mock.patch.object(l3_agent.LinkLocalAllocator, '_read') as read: + read.return_value = ["deadbeef,169.254.31.0/31\n"] + a = l3_agent.LinkLocalAllocator('/file', subnet.cidr) + + with mock.patch.object(l3_agent.LinkLocalAllocator, '_write') as write: + allocation = a.allocate('abcdef12') + + self.assertEqual(subnet, allocation) + self.assertFalse('deadbeef' in a.allocations) + self.assertTrue('abcdef12' in a.allocations) + self.assertTrue(allocation not in a.pool) + self._check_allocations(a.allocations) + write.assert_called_once_with(['abcdef12,%s\n' % allocation.cidr]) + + self.assertRaises(RuntimeError, a.allocate, 'deadbeef') + + def test_release(self): + with mock.patch.object(l3_agent.LinkLocalAllocator, '_write') as write: + a = l3_agent.LinkLocalAllocator('/file', self.subnet.cidr) + subnet = a.allocate('deadbeef') + write.reset_mock() + a.release('deadbeef') + + self.assertTrue('deadbeef' not in a.allocations) + self.assertTrue(subnet in a.pool) + self.assertEqual({}, a.allocations) + write.assert_called_once_with([]) + + def _check_allocations(self, allocations): + for key, subnet in allocations.items(): + self.assertTrue(subnet in self.subnet) + self.assertEqual(subnet.prefixlen, 31) + + def router_append_interface(router, count=1, ip_version=4, ra_mode=None, addr_mode=None): if ip_version == 4: @@ -787,8 +864,9 @@ class TestBasicRouterOperations(base.BaseTestCase): device.addr.list.return_value = [] ri.iptables_manager.ipv4['nat'] = mock.MagicMock() - fip_statuses = agent.process_router_floating_ip_addresses( - ri, {'id': _uuid()}) + with mock.patch.object(l3_agent.LinkLocalAllocator, '_write'): + fip_statuses = agent.process_router_floating_ip_addresses( + ri, {'id': _uuid()}) self.assertEqual({fip_id: l3_constants.FLOATINGIP_STATUS_ACTIVE}, fip_statuses) device.addr.add.assert_called_once_with(4, '15.1.2.3/32', '15.1.2.3') @@ -1843,12 +1921,13 @@ class TestBasicRouterOperations(base.BaseTestCase): fip_2_rtr_name = agent.get_fip_int_device_name(ri.router_id) fip_ns_name = agent.get_fip_ns_name(str(fip['floating_network_id'])) - agent.create_rtr_2_fip_link(ri, fip['floating_network_id']) + with mock.patch.object(l3_agent.LinkLocalAllocator, '_write'): + agent.create_rtr_2_fip_link(ri, fip['floating_network_id']) self.mock_ip.add_veth.assert_called_with(rtr_2_fip_name, fip_2_rtr_name, fip_ns_name) # TODO(mrsmith): add more aasserts - self.mock_ip_dev.route.add_gateway.assert_called_once_with( - ri.fip_2_rtr, table=16) + '169.254.31.29', table=16) # TODO(mrsmith): test _create_agent_gateway_port @@ -1872,12 +1951,14 @@ class TestBasicRouterOperations(base.BaseTestCase): 'floating_network_id': _uuid(), 'port_id': _uuid()} agent.agent_gateway_port = agent_gw_port + ri.rtr_fip_subnet = l3_agent.LinkLocalAddressPair('169.254.30.42/31') agent.floating_ip_added_dist(ri, fip) self.mock_rule.add_rule_from.assert_called_with('192.168.0.1', 16, FIP_PRI) # TODO(mrsmith): add more asserts - def test_floating_ip_removed_dist(self): + @mock.patch.object(l3_agent.LinkLocalAllocator, '_write') + def test_floating_ip_removed_dist(self, write): agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) router = prepare_router_data() agent_gw_port = {'fixed_ips': [{'ip_address': '20.0.0.30', @@ -1896,20 +1977,24 @@ class TestBasicRouterOperations(base.BaseTestCase): ri.fip_2_rtr = '11.22.33.42' ri.rtr_2_fip = '11.22.33.40' agent.agent_gateway_port = agent_gw_port + s = l3_agent.LinkLocalAddressPair('169.254.30.42/31') + ri.rtr_fip_subnet = s agent.floating_ip_removed_dist(ri, fip_cidr) self.mock_rule.delete_rule_priority.assert_called_with(FIP_PRI) self.mock_ip_dev.route.delete_route.assert_called_with(fip_cidr, - ri.rtr_2_fip) + str(s.ip)) with mock.patch.object(agent, '_destroy_fip_namespace') as f: ri.dist_fip_count = 1 agent.agent_fip_count = 1 fip_ns_name = agent.get_fip_ns_name( str(agent._fetch_external_net_id())) + ri.rtr_fip_subnet = agent.local_subnets.allocate(ri.router_id) + _, fip_to_rtr = ri.rtr_fip_subnet.get_pair() agent.floating_ip_removed_dist(ri, fip_cidr) self.mock_ip.del_veth.assert_called_once_with( agent.get_fip_int_device_name(router['id'])) self.mock_ip_dev.route.delete_gateway.assert_called_once_with( - '11.22.33.42', table=16) + str(fip_to_rtr.ip), table=16) f.assert_called_once_with(fip_ns_name) -- 2.45.2