From: sridhargaddam Date: Wed, 8 Apr 2015 10:57:19 +0000 (+0000) Subject: Spawn RADVD only in the master HA router X-Git-Url: https://review.fuel-infra.org/gitweb?a=commitdiff_plain;h=2f9b0ce940099bcc82d2940b99bdc387db22d6fc;p=openstack-build%2Fneutron-build.git Spawn RADVD only in the master HA router Currently radvd is spawned in all the HA routers irrespective of the state of the router. This approach has the following issues. 1. While processing the internal router ports (i.e., qr-xxx), ha_router removes the LLA of the interface and adds it as a VIP to Keepalived conf. Radvd daemon is spawned after this operation in the router namespace (if the port is associated with any IPv6 subnets). Radvd notices that qr-xxx interface does not have the LLA, so does not transmit any Router Advts. In this state, VMs fail to acquire IPv6 addresses because of the missing RAs. Radvd does not recover even after keepalived configures the LLA of the interface. The only solution is to restart/reload radvd daemon. Currently keepalived-state-change monitor does not do any radvd related operations when a state transition happens. So we endup in this state forever. 2. For all the routers in Backup state, qr-xxx interface does not have LLA as it is managed by keepalived and configured only on the Master HA router. In such agents syslog is flooded with the messages [1] and this can cause loss of other useful info. [1] - resetting ipv6-allrouters membership on qr-2e373555-97 This patch implements the following. 1. If the router is already in the Master state, we configure the LLA as a VIP in keepalived conf but do not delete the LLA of the internal interface. 2. We spawn radvd only if the router is in the Master State. 3. Keepalived-state-change monitor takes care of enabling/disabling radvd upon state transitions. Closes-Bug: #1440699 Change-Id: I351c71d058170265bbb8b56e1f7a3430bd8828d5 --- diff --git a/neutron/agent/l3/ha.py b/neutron/agent/l3/ha.py index e83ea141d..ffe6a82c5 100644 --- a/neutron/agent/l3/ha.py +++ b/neutron/agent/l3/ha.py @@ -113,10 +113,7 @@ class AgentMixin(object): LOG.info(_LI('Router %(router_id)s transitioned to %(state)s'), {'router_id': router_id, 'state': state}) - self._update_metadata_proxy(router_id, state) - self.state_change_notifier.queue_event((router_id, state)) - def _update_metadata_proxy(self, router_id, state): try: ri = self.router_info[router_id] except AttributeError: @@ -124,6 +121,11 @@ class AgentMixin(object): 'possibly deleted concurrently.'), router_id) return + self._update_metadata_proxy(ri, router_id, state) + self._update_radvd_daemon(ri, state) + self.state_change_notifier.queue_event((router_id, state)) + + def _update_metadata_proxy(self, ri, router_id, state): if state == 'master': LOG.debug('Spawning metadata proxy for router %s', router_id) self.metadata_driver.spawn_monitored_metadata_proxy( @@ -134,6 +136,14 @@ class AgentMixin(object): self.metadata_driver.destroy_monitored_metadata_proxy( self.process_monitor, ri.router_id, ri.ns_name, self.conf) + def _update_radvd_daemon(self, ri, state): + # Radvd has to be spawned only on the Master HA Router. If there are + # any state transitions, we enable/disable radvd accordingly. + if state == 'master': + ri.enable_radvd() + else: + ri.disable_radvd() + def notify_server(self, batched_events): translation_map = {'master': 'active', 'backup': 'standby', diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index 4f62e3442..3a80cc4a0 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -212,14 +212,17 @@ class HaRouter(router.RouterInfo): def _should_delete_ipv6_lladdr(self, ipv6_lladdr): """Only the master should have any IP addresses configured. Let keepalived manage IPv6 link local addresses, the same way we let - it manage IPv4 addresses. In order to do that, we must delete - the address first as it is autoconfigured by the kernel. + it manage IPv4 addresses. If the router is not in the master state, + we must delete the address first as it is autoconfigured by the kernel. """ manager = self.keepalived_manager if manager.get_process().active: - conf = manager.get_conf_on_disk() - managed_by_keepalived = conf and ipv6_lladdr in conf - if managed_by_keepalived: + if self.ha_state != 'master': + conf = manager.get_conf_on_disk() + managed_by_keepalived = conf and ipv6_lladdr in conf + if managed_by_keepalived: + return False + else: return False return True @@ -353,3 +356,8 @@ class HaRouter(router.RouterInfo): if self.ha_port: self.enable_keepalived() + + def enable_radvd(self, internal_ports=None): + if (self.keepalived_manager.get_process().active and + self.ha_state == 'master'): + super(HaRouter, self).enable_radvd(internal_ports) diff --git a/neutron/agent/l3/router_info.py b/neutron/agent/l3/router_info.py index 5569fb77d..3f0d801a6 100644 --- a/neutron/agent/l3/router_info.py +++ b/neutron/agent/l3/router_info.py @@ -274,7 +274,7 @@ class RouterInfo(object): self.router[l3_constants.INTERFACE_KEY] = [] self.router[l3_constants.FLOATINGIP_KEY] = [] self.process(agent) - self.radvd.disable() + self.disable_radvd() if self.router_namespace: self.router_namespace.delete() @@ -342,6 +342,17 @@ class RouterInfo(object): if netaddr.IPNetwork(subnet['cidr']).version == 6: return True + def enable_radvd(self, internal_ports=None): + LOG.debug('Spawning radvd daemon in router device: %s', self.router_id) + if not internal_ports: + internal_ports = self.internal_ports + self.radvd.enable(internal_ports) + + def disable_radvd(self): + LOG.debug('Terminating radvd daemon in router device: %s', + self.router_id) + self.radvd.disable() + def _process_internal_ports(self): existing_port_ids = set(p['id'] for p in self.internal_ports) @@ -380,7 +391,7 @@ class RouterInfo(object): # Enable RA if enable_ra: - self.radvd.enable(internal_ports) + self.enable_radvd(internal_ports) existing_devices = self._get_existing_devices() current_internal_devs = set(n for n in existing_devices diff --git a/neutron/tests/functional/agent/test_l3_agent.py b/neutron/tests/functional/agent/test_l3_agent.py index 73d2a23c2..88eec5a8c 100755 --- a/neutron/tests/functional/agent/test_l3_agent.py +++ b/neutron/tests/functional/agent/test_l3_agent.py @@ -755,6 +755,31 @@ class L3HATestFramework(L3AgentTestFramework): utils.wait_until_true(lambda: router2.ha_state == 'master') utils.wait_until_true(lambda: router1.ha_state == 'backup') + def test_ha_router_ipv6_radvd_status(self): + router_info = self.generate_router_info(ip_version=6, enable_ha=True) + router1 = self.manage_router(self.agent, router_info) + utils.wait_until_true(lambda: router1.ha_state == 'master') + utils.wait_until_true(lambda: router1.radvd.enabled) + + def _check_lla_status(router, expected): + internal_devices = router.router[l3_constants.INTERFACE_KEY] + for device in internal_devices: + lladdr = ip_lib.get_ipv6_lladdr(device['mac_address']) + exists = ip_lib.device_exists_with_ips_and_mac( + router.get_internal_device_name(device['id']), [lladdr], + device['mac_address'], router.ns_name) + self.assertEqual(expected, exists) + + _check_lla_status(router1, True) + + device_name = router1.get_ha_device_name() + ha_device = ip_lib.IPDevice(device_name, namespace=router1.ns_name) + ha_device.link.set_down() + + utils.wait_until_true(lambda: router1.ha_state == 'backup') + utils.wait_until_true(lambda: not router1.radvd.enabled, timeout=10) + _check_lla_status(router1, False) + class MetadataFakeProxyHandler(object):