From ea587e113a838f74efed9a04c78c0ff3d860d04b Mon Sep 17 00:00:00 2001 From: Assaf Muller Date: Tue, 23 Dec 2014 13:52:41 +0200 Subject: [PATCH] Make L3 HA VIPs ordering consistent in keepalived.conf Currently the order of VIPs in keepalived.conf is determined by sorting the VIPs whenever one is added or removed. As it turns out, keepalived doesn't like it when the primary VIP changes. One side effect is that virtual routes, in our case the router's default route, may be removed. This patch fabricates an IP address on the router's HA interface and uses it as the primary VIP. Closes-Bug: #1404945 Change-Id: I993daf594a28918de6fafff465f5f40e7b89305e --- neutron/agent/l3/ha.py | 6 +- neutron/agent/linux/keepalived.py | 78 ++++++++++++++++--- .../tests/functional/agent/test_l3_agent.py | 5 +- .../tests/unit/agent/linux/test_keepalived.py | 75 ++++++++++++++++-- neutron/tests/unit/test_l3_agent.py | 60 +------------- 5 files changed, 149 insertions(+), 75 deletions(-) diff --git a/neutron/agent/l3/ha.py b/neutron/agent/l3/ha.py index c1f6f4928..7de0960ec 100644 --- a/neutron/agent/l3/ha.py +++ b/neutron/agent/l3/ha.py @@ -113,9 +113,11 @@ class AgentMixin(object): config = ri.keepalived_manager.config interface_name = self.get_ha_device_name(ri.ha_port['id']) + ha_port_cidr = ri.ha_port['subnet']['cidr'] instance = keepalived.KeepalivedInstance( - 'BACKUP', interface_name, ri.ha_vr_id, nopreempt=True, - advert_int=self.conf.ha_vrrp_advert_int, priority=ri.ha_priority) + 'BACKUP', interface_name, ri.ha_vr_id, ha_port_cidr, + nopreempt=True, advert_int=self.conf.ha_vrrp_advert_int, + priority=ri.ha_priority) instance.track_interfaces.append(interface_name) if self.conf.ha_vrrp_auth_password: diff --git a/neutron/agent/linux/keepalived.py b/neutron/agent/linux/keepalived.py index 429206bcb..596ee6e02 100644 --- a/neutron/agent/linux/keepalived.py +++ b/neutron/agent/linux/keepalived.py @@ -16,6 +16,7 @@ import itertools import os import stat +import netaddr from oslo.config import cfg from neutron.agent.linux import external_process @@ -28,10 +29,35 @@ VALID_STATES = ['MASTER', 'BACKUP'] VALID_NOTIFY_STATES = ['master', 'backup', 'fault'] VALID_AUTH_TYPES = ['AH', 'PASS'] HA_DEFAULT_PRIORITY = 50 +PRIMARY_VIP_RANGE_SIZE = 24 +# TODO(amuller): Use L3 agent constant when new constants module is introduced. +FIP_LL_SUBNET = '169.254.30.0/23' + LOG = logging.getLogger(__name__) +def get_free_range(parent_range, excluded_ranges, size=PRIMARY_VIP_RANGE_SIZE): + """Get a free IP range, from parent_range, of the specified size. + + :param parent_range: String representing an IP range. E.g: '169.254.0.0/16' + :param excluded_ranges: A list of strings to be excluded from parent_range + :param size: What should be the size of the range returned? + :return: A string representing an IP range + """ + free_cidrs = netaddr.IPSet([parent_range]) - netaddr.IPSet(excluded_ranges) + for cidr in free_cidrs.iter_cidrs(): + if cidr.prefixlen <= size: + return '%s/%s' % (cidr.network, size) + + raise ValueError(_('Network of size %(size)s, from IP range ' + '%(parent_range)s excluding IP ranges ' + '%(excluded_ranges)s was not found.') % + {'size': size, + 'parent_range': parent_range, + 'excluded_ranges': excluded_ranges}) + + class InvalidInstanceStateException(exceptions.NeutronException): message = (_('Invalid instance state: %%(state)s, valid states are: ' '%(valid_states)s') % @@ -106,7 +132,7 @@ class KeepalivedGroup(object): class KeepalivedInstance(object): """Instance section of a keepalived configuration.""" - def __init__(self, state, interface, vrouter_id, + def __init__(self, state, interface, vrouter_id, ha_cidr, priority=HA_DEFAULT_PRIORITY, advert_int=None, mcast_src_ip=None, nopreempt=False): self.name = 'VR_%s' % vrouter_id @@ -125,6 +151,13 @@ class KeepalivedInstance(object): self.vips = [] self.virtual_routes = [] self.authentication = tuple() + metadata_cidr = '169.254.169.254/32' + self.primary_vip_range = get_free_range( + parent_range='169.254.0.0/16', + excluded_ranges=[metadata_cidr, + FIP_LL_SUBNET, + ha_cidr], + size=PRIMARY_VIP_RANGE_SIZE) def set_authentication(self, auth_type, password): if auth_type not in VALID_AUTH_TYPES: @@ -152,18 +185,46 @@ class KeepalivedInstance(object): (' %s' % i for i in self.track_interfaces), [' }']) - def _build_vips_config(self): - vips_sorted = sorted(self.vips, key=lambda vip: vip.ip_address) - first_address = vips_sorted.pop(0) + def _generate_primary_vip(self): + """Return an address in the primary_vip_range CIDR, with the router's + VRID in the host section. + For example, if primary_vip_range is 169.254.0.0/24, and this router's + VRID is 5, the result is 169.254.0.5. Using the VRID assures that + the primary VIP is consistent amongst HA router instances on different + nodes. + """ + + ip = (netaddr.IPNetwork(self.primary_vip_range).network + + self.vrouter_id) + return netaddr.IPNetwork('%s/%s' % (ip, PRIMARY_VIP_RANGE_SIZE)) + + def _build_vips_config(self): + # NOTE(amuller): The primary VIP must be consistent in order to avoid + # keepalived bugs. Changing the VIP in the 'virtual_ipaddress' and + # SIGHUP'ing keepalived can remove virtual routers, including the + # router's default gateway. + # We solve this by never changing the VIP in the virtual_ipaddress + # section, herein known as the primary VIP. + # The only interface known to exist for HA routers is the HA interface + # (self.interface). We generate an IP on that device and use it as the + # primary VIP. The other VIPs (Internal interfaces IPs, the external + # interface IP and floating IPs) are placed in the + # virtual_ipaddress_excluded section. + + primary = KeepalivedVipAddress(str(self._generate_primary_vip()), + self.interface) vips_result = [' virtual_ipaddress {', - ' %s' % first_address.build_config(), + ' %s' % primary.build_config(), ' }'] - if vips_sorted: + + if self.vips: vips_result.extend( itertools.chain([' virtual_ipaddress_excluded {'], (' %s' % vip.build_config() - for vip in vips_sorted), + for vip in + sorted(self.vips, + key=lambda vip: vip.ip_address)), [' }'])) return vips_result @@ -201,8 +262,7 @@ class KeepalivedInstance(object): if self.track_interfaces: config.extend(self._build_track_interface_config()) - if self.vips: - config.extend(self._build_vips_config()) + config.extend(self._build_vips_config()) if self.virtual_routes: config.extend(self._build_virtual_routes_config()) diff --git a/neutron/tests/functional/agent/test_l3_agent.py b/neutron/tests/functional/agent/test_l3_agent.py index e24ca504c..7e9a83434 100644 --- a/neutron/tests/functional/agent/test_l3_agent.py +++ b/neutron/tests/functional/agent/test_l3_agent.py @@ -173,9 +173,10 @@ vrrp_instance VR_1 { %(ha_device_name)s } virtual_ipaddress { - %(floating_ip_cidr)s dev %(external_device_name)s + 169.254.0.1/24 dev %(ha_device_name)s } virtual_ipaddress_excluded { + %(floating_ip_cidr)s dev %(external_device_name)s %(external_device_cidr)s dev %(external_device_name)s %(internal_device_cidr)s dev %(internal_device_name)s } @@ -377,7 +378,7 @@ class L3HATestFramework(L3AgentTestFramework): router_info_2 = copy.deepcopy(router_info) router_info_2[l3_constants.HA_INTERFACE_KEY] = ( - test_l3_agent.get_ha_interface(ip='169.254.0.3', + test_l3_agent.get_ha_interface(ip='169.254.192.2', mac='22:22:22:22:22:22')) router2 = self.manage_router(self.failover_agent, router_info_2) diff --git a/neutron/tests/unit/agent/linux/test_keepalived.py b/neutron/tests/unit/agent/linux/test_keepalived.py index 9bd6c4f2e..5dfe0dc0f 100644 --- a/neutron/tests/unit/agent/linux/test_keepalived.py +++ b/neutron/tests/unit/agent/linux/test_keepalived.py @@ -12,6 +12,8 @@ # License for the specific language governing permissions and limitations # under the License. +import testtools + from neutron.agent.linux import keepalived from neutron.tests import base @@ -19,6 +21,40 @@ from neutron.tests import base # http://www.keepalived.org/pdf/UserGuide.pdf +class KeepalivedGetFreeRangeTestCase(base.BaseTestCase): + def test_get_free_range(self): + free_range = keepalived.get_free_range( + parent_range='169.254.0.0/16', + excluded_ranges=['169.254.0.0/24', + '169.254.1.0/24', + '169.254.2.0/24'], + size=24) + self.assertEqual('169.254.3.0/24', free_range) + + def test_get_free_range_without_excluded(self): + free_range = keepalived.get_free_range( + parent_range='169.254.0.0/16', + excluded_ranges=[], + size=20) + self.assertEqual('169.254.0.0/20', free_range) + + def test_get_free_range_excluded_out_of_parent(self): + free_range = keepalived.get_free_range( + parent_range='169.254.0.0/16', + excluded_ranges=['255.255.255.0/24'], + size=24) + self.assertEqual('169.254.0.0/24', free_range) + + def test_get_free_range_not_found(self): + tiny_parent_range = '192.168.1.0/24' + huge_size = 8 + with testtools.ExpectedException(ValueError): + keepalived.get_free_range( + parent_range=tiny_parent_range, + excluded_ranges=[], + size=huge_size) + + class KeepalivedConfBaseMixin(object): def _get_config(self): @@ -30,6 +66,7 @@ class KeepalivedConfBaseMixin(object): group1.set_notify('master', '/tmp/script.sh') instance1 = keepalived.KeepalivedInstance('MASTER', 'eth0', 1, + '169.254.192.0/18', advert_int=5) instance1.set_authentication('AH', 'pass123') instance1.track_interfaces.append("eth0") @@ -59,6 +96,7 @@ class KeepalivedConfBaseMixin(object): group1.add_instance(instance1) instance2 = keepalived.KeepalivedInstance('MASTER', 'eth4', 2, + '169.254.192.0/18', mcast_src_ip='224.0.0.1') instance2.track_interfaces.append("eth4") @@ -107,9 +145,10 @@ vrrp_instance VR_1 { eth0 } virtual_ipaddress { - 192.168.1.0/24 dev eth1 + 169.254.0.1/24 dev eth0 } virtual_ipaddress_excluded { + 192.168.1.0/24 dev eth1 192.168.2.0/24 dev eth2 192.168.3.0/24 dev eth2 192.168.55.0/24 dev eth10 @@ -128,9 +167,10 @@ vrrp_instance VR_2 { eth4 } virtual_ipaddress { - 192.168.2.0/24 dev eth2 + 169.254.0.2/24 dev eth4 } virtual_ipaddress_excluded { + 192.168.2.0/24 dev eth2 192.168.3.0/24 dev eth6 192.168.55.0/24 dev eth10 } @@ -160,10 +200,11 @@ class KeepalivedStateExceptionTestCase(base.BaseTestCase): invalid_vrrp_state = 'into a club' self.assertRaises(keepalived.InvalidInstanceStateException, keepalived.KeepalivedInstance, - invalid_vrrp_state, 'eth0', 33) + invalid_vrrp_state, 'eth0', 33, '169.254.192.0/18') invalid_auth_type = '[hip, hip]' - instance = keepalived.KeepalivedInstance('MASTER', 'eth0', 1) + instance = keepalived.KeepalivedInstance('MASTER', 'eth0', 1, + '169.254.192.0/18') self.assertRaises(keepalived.InvalidAuthenticationTypeExecption, instance.set_authentication, invalid_auth_type, 'some_password') @@ -171,6 +212,12 @@ class KeepalivedStateExceptionTestCase(base.BaseTestCase): class KeepalivedInstanceTestCase(base.BaseTestCase, KeepalivedConfBaseMixin): + def test_generate_primary_vip(self): + instance = keepalived.KeepalivedInstance('MASTER', 'ha0', 42, + '169.254.192.0/18') + self.assertEqual('169.254.0.42/24', + str(instance._generate_primary_vip())) + def test_remove_adresses_by_interface(self): config = self._get_config() instance = config.get_instance(1) @@ -202,6 +249,9 @@ vrrp_instance VR_1 { eth0 } virtual_ipaddress { + 169.254.0.1/24 dev eth0 + } + virtual_ipaddress_excluded { 192.168.1.0/24 dev eth1 } virtual_routes { @@ -218,9 +268,10 @@ vrrp_instance VR_2 { eth4 } virtual_ipaddress { - 192.168.2.0/24 dev eth2 + 169.254.0.2/24 dev eth4 } virtual_ipaddress_excluded { + 192.168.2.0/24 dev eth2 192.168.3.0/24 dev eth6 192.168.55.0/24 dev eth10 } @@ -228,6 +279,20 @@ vrrp_instance VR_2 { self.assertEqual(expected, config.get_config_str()) + def test_build_config_no_vips(self): + expected = """vrrp_instance VR_1 { + state MASTER + interface eth0 + virtual_router_id 1 + priority 50 + virtual_ipaddress { + 169.254.0.1/24 dev eth0 + } +}""" + instance = keepalived.KeepalivedInstance( + 'MASTER', 'eth0', 1, '169.254.192.0/18') + self.assertEqual(expected, '\n'.join(instance.build_config())) + class KeepalivedVirtualRouteTestCase(base.BaseTestCase): def test_virtual_route_with_dev(self): diff --git a/neutron/tests/unit/test_l3_agent.py b/neutron/tests/unit/test_l3_agent.py index 4a8944d86..ccae49697 100644 --- a/neutron/tests/unit/test_l3_agent.py +++ b/neutron/tests/unit/test_l3_agent.py @@ -137,7 +137,7 @@ def _get_subnet_id(port): #TODO(jschwarz): This is a shared function with both the unit tests # and the functional tests, and should be moved elsewhere (probably # neutron/tests/common/). -def get_ha_interface(ip='169.254.0.2', mac='12:34:56:78:2b:5d'): +def get_ha_interface(ip='169.254.192.1', mac='12:34:56:78:2b:5d'): return {'admin_state_up': True, 'device_id': _uuid(), 'device_owner': 'network:router_ha_interface', @@ -148,8 +148,8 @@ def get_ha_interface(ip='169.254.0.2', mac='12:34:56:78:2b:5d'): 'name': u'L3 HA Admin port 0', 'network_id': _uuid(), 'status': u'ACTIVE', - 'subnet': {'cidr': '169.254.0.0/24', - 'gateway_ip': '169.254.0.1', + 'subnet': {'cidr': '169.254.192.0/18', + 'gateway_ip': '169.254.255.254', 'id': _uuid()}, 'tenant_id': '', 'agent_id': _uuid(), @@ -889,60 +889,6 @@ class TestBasicRouterOperations(base.BaseTestCase): self.assertEqual(agent.process_router_floating_ip_nat_rules.called, distributed) - def test_ha_router_keepalived_config(self): - agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) - router = prepare_router_data(enable_ha=True) - router['routes'] = [ - {'destination': '8.8.8.8/32', 'nexthop': '35.4.0.10'}, - {'destination': '8.8.4.4/32', 'nexthop': '35.4.0.11'}] - ri = l3router.RouterInfo(router['id'], self.conf.root_helper, - router=router) - ri.router = router - with contextlib.nested(mock.patch.object(agent, - '_spawn_metadata_proxy'), - mock.patch('neutron.agent.linux.' - 'utils.replace_file'), - mock.patch('neutron.agent.linux.' - 'utils.execute'), - mock.patch('os.makedirs')): - agent.process_ha_router_added(ri) - agent.process_router(ri) - config = ri.keepalived_manager.config - ha_iface = agent.get_ha_device_name(ri.ha_port['id']) - ex_iface = agent.get_external_device_name(ri.ex_gw_port['id']) - int_iface = agent.get_internal_device_name( - ri.internal_ports[0]['id']) - - expected = """vrrp_sync_group VG_1 { - group { - VR_1 - } -} -vrrp_instance VR_1 { - state BACKUP - interface %(ha_iface)s - virtual_router_id 1 - priority 50 - nopreempt - advert_int 2 - track_interface { - %(ha_iface)s - } - virtual_ipaddress { - 19.4.4.4/24 dev %(ex_iface)s - } - virtual_ipaddress_excluded { - 35.4.0.4/24 dev %(int_iface)s - } - virtual_routes { - 0.0.0.0/0 via 19.4.4.1 dev %(ex_iface)s - 8.8.8.8/32 via 35.4.0.10 - 8.8.4.4/32 via 35.4.0.11 - } -}""" % {'ha_iface': ha_iface, 'ex_iface': ex_iface, 'int_iface': int_iface} - - self.assertEqual(expected, config.get_config_str()) - @mock.patch('neutron.agent.linux.ip_lib.IPDevice') def _test_process_router_floating_ip_addresses_add(self, ri, agent, IPDevice): -- 2.45.2