]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Spawn RADVD only in the master HA router
authorsridhargaddam <sridhar.gaddam@enovance.com>
Wed, 8 Apr 2015 10:57:19 +0000 (10:57 +0000)
committerSridhar Gaddam <sridhar.gaddam@enovance.com>
Fri, 1 May 2015 14:06:36 +0000 (14:06 +0000)
Currently radvd is spawned in all the HA routers irrespective of the
state of the router. This approach has the following issues.

1. While processing the internal router ports (i.e., qr-xxx), ha_router
   removes the LLA of the interface and adds it as a VIP to Keepalived conf.
   Radvd daemon is spawned after this operation in the router namespace
   (if the port is associated with any IPv6 subnets). Radvd notices that
   qr-xxx interface does not have the LLA, so does not transmit any Router
   Advts. In this state, VMs fail to acquire IPv6 addresses because of the
   missing RAs. Radvd does not recover even after keepalived configures the
   LLA of the interface. The only solution is to restart/reload radvd daemon.
   Currently keepalived-state-change monitor does not do any radvd related
   operations when a state transition happens. So we endup in this state
   forever.
2. For all the routers in Backup state, qr-xxx interface does not have LLA
   as it is managed by keepalived and configured only on the Master HA router.
   In such agents syslog is flooded with the messages [1] and this can cause
   loss of other useful info.
   [1] - resetting ipv6-allrouters membership on qr-2e373555-97

This patch implements the following.
1. If the router is already in the Master state, we configure the LLA as a VIP
   in keepalived conf but do not delete the LLA of the internal interface.
2. We spawn radvd only if the router is in the Master State.
3. Keepalived-state-change monitor takes care of enabling/disabling radvd upon
   state transitions.

Closes-Bug: #1440699
Change-Id: I351c71d058170265bbb8b56e1f7a3430bd8828d5
(cherry picked from commit 2f9b0ce940099bcc82d2940b99bdc387db22d6fc)

neutron/agent/l3/ha.py
neutron/agent/l3/ha_router.py
neutron/agent/l3/router_info.py
neutron/tests/functional/agent/test_l3_agent.py

index a51001a65d0aed790ddd1724fe3b0346c40cafdc..9a55d25c2566a29d6db8eb6430feadeb7f47a2a6 100644 (file)
@@ -114,10 +114,7 @@ class AgentMixin(object):
         LOG.info(_LI('Router %(router_id)s transitioned to %(state)s'),
                  {'router_id': router_id,
                   'state': state})
-        self._update_metadata_proxy(router_id, state)
-        self.state_change_notifier.queue_event((router_id, state))
 
-    def _update_metadata_proxy(self, router_id, state):
         try:
             ri = self.router_info[router_id]
         except AttributeError:
@@ -125,6 +122,11 @@ class AgentMixin(object):
                          'possibly deleted concurrently.'), router_id)
             return
 
+        self._update_metadata_proxy(ri, router_id, state)
+        self._update_radvd_daemon(ri, state)
+        self.state_change_notifier.queue_event((router_id, state))
+
+    def _update_metadata_proxy(self, ri, router_id, state):
         if state == 'master':
             LOG.debug('Spawning metadata proxy for router %s', router_id)
             self.metadata_driver.spawn_monitored_metadata_proxy(
@@ -135,6 +137,14 @@ class AgentMixin(object):
             self.metadata_driver.destroy_monitored_metadata_proxy(
                 self.process_monitor, ri.router_id, ri.ns_name, self.conf)
 
+    def _update_radvd_daemon(self, ri, state):
+        # Radvd has to be spawned only on the Master HA Router. If there are
+        # any state transitions, we enable/disable radvd accordingly.
+        if state == 'master':
+            ri.enable_radvd()
+        else:
+            ri.disable_radvd()
+
     def notify_server(self, batched_events):
         translation_map = {'master': 'active',
                            'backup': 'standby',
index 4b88a3e2a817404dbf54bd80f70b22952c592943..35685b5b51ff750527f9d3e90d82cfff92d7e2ed 100644 (file)
@@ -212,14 +212,17 @@ class HaRouter(router.RouterInfo):
     def _should_delete_ipv6_lladdr(self, ipv6_lladdr):
         """Only the master should have any IP addresses configured.
         Let keepalived manage IPv6 link local addresses, the same way we let
-        it manage IPv4 addresses. In order to do that, we must delete
-        the address first as it is autoconfigured by the kernel.
+        it manage IPv4 addresses. If the router is not in the master state,
+        we must delete the address first as it is autoconfigured by the kernel.
         """
         manager = self.keepalived_manager
         if manager.get_process().active:
-            conf = manager.get_conf_on_disk()
-            managed_by_keepalived = conf and ipv6_lladdr in conf
-            if managed_by_keepalived:
+            if self.ha_state != 'master':
+                conf = manager.get_conf_on_disk()
+                managed_by_keepalived = conf and ipv6_lladdr in conf
+                if managed_by_keepalived:
+                    return False
+            else:
                 return False
         return True
 
@@ -353,3 +356,8 @@ class HaRouter(router.RouterInfo):
 
         if self.ha_port:
             self.enable_keepalived()
+
+    def enable_radvd(self, internal_ports=None):
+        if (self.keepalived_manager.get_process().active and
+                self.ha_state == 'master'):
+            super(HaRouter, self).enable_radvd(internal_ports)
index 5569fb77d33d4b53316bfc2df6f26aad660d6eb4..3f0d801a660600fd7b433755f51bce31f29d06a8 100644 (file)
@@ -274,7 +274,7 @@ class RouterInfo(object):
         self.router[l3_constants.INTERFACE_KEY] = []
         self.router[l3_constants.FLOATINGIP_KEY] = []
         self.process(agent)
-        self.radvd.disable()
+        self.disable_radvd()
         if self.router_namespace:
             self.router_namespace.delete()
 
@@ -342,6 +342,17 @@ class RouterInfo(object):
                 if netaddr.IPNetwork(subnet['cidr']).version == 6:
                     return True
 
+    def enable_radvd(self, internal_ports=None):
+        LOG.debug('Spawning radvd daemon in router device: %s', self.router_id)
+        if not internal_ports:
+            internal_ports = self.internal_ports
+        self.radvd.enable(internal_ports)
+
+    def disable_radvd(self):
+        LOG.debug('Terminating radvd daemon in router device: %s',
+                  self.router_id)
+        self.radvd.disable()
+
     def _process_internal_ports(self):
         existing_port_ids = set(p['id'] for p in self.internal_ports)
 
@@ -380,7 +391,7 @@ class RouterInfo(object):
 
         # Enable RA
         if enable_ra:
-            self.radvd.enable(internal_ports)
+            self.enable_radvd(internal_ports)
 
         existing_devices = self._get_existing_devices()
         current_internal_devs = set(n for n in existing_devices
index f19bca1ce2808d5662f5c92fa04d50b527388ab4..0d8b31c12c58d8615fe0b9beaa9bfb20ed3faf29 100755 (executable)
@@ -764,6 +764,31 @@ class L3HATestFramework(L3AgentTestFramework):
         utils.wait_until_true(lambda: router2.ha_state == 'master')
         utils.wait_until_true(lambda: router1.ha_state == 'backup')
 
+    def test_ha_router_ipv6_radvd_status(self):
+        router_info = self.generate_router_info(ip_version=6, enable_ha=True)
+        router1 = self.manage_router(self.agent, router_info)
+        utils.wait_until_true(lambda: router1.ha_state == 'master')
+        utils.wait_until_true(lambda: router1.radvd.enabled)
+
+        def _check_lla_status(router, expected):
+            internal_devices = router.router[l3_constants.INTERFACE_KEY]
+            for device in internal_devices:
+                lladdr = ip_lib.get_ipv6_lladdr(device['mac_address'])
+                exists = ip_lib.device_exists_with_ips_and_mac(
+                    router.get_internal_device_name(device['id']), [lladdr],
+                    device['mac_address'], router.ns_name)
+                self.assertEqual(expected, exists)
+
+        _check_lla_status(router1, True)
+
+        device_name = router1.get_ha_device_name()
+        ha_device = ip_lib.IPDevice(device_name, namespace=router1.ns_name)
+        ha_device.link.set_down()
+
+        utils.wait_until_true(lambda: router1.ha_state == 'backup')
+        utils.wait_until_true(lambda: not router1.radvd.enabled, timeout=10)
+        _check_lla_status(router1, False)
+
 
 class MetadataFakeProxyHandler(object):