]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Spawn RADVD only in the master HA router
authorsridhargaddam <sridhar.gaddam@enovance.com>
Wed, 8 Apr 2015 10:57:19 +0000 (10:57 +0000)
committerNuman Siddique <numan.siddique@enovance.com>
Thu, 23 Apr 2015 11:45:15 +0000 (17:15 +0530)
Currently radvd is spawned in all the HA routers irrespective of the
state of the router. This approach has the following issues.

1. While processing the internal router ports (i.e., qr-xxx), ha_router
   removes the LLA of the interface and adds it as a VIP to Keepalived conf.
   Radvd daemon is spawned after this operation in the router namespace
   (if the port is associated with any IPv6 subnets). Radvd notices that
   qr-xxx interface does not have the LLA, so does not transmit any Router
   Advts. In this state, VMs fail to acquire IPv6 addresses because of the
   missing RAs. Radvd does not recover even after keepalived configures the
   LLA of the interface. The only solution is to restart/reload radvd daemon.
   Currently keepalived-state-change monitor does not do any radvd related
   operations when a state transition happens. So we endup in this state
   forever.
2. For all the routers in Backup state, qr-xxx interface does not have LLA
   as it is managed by keepalived and configured only on the Master HA router.
   In such agents syslog is flooded with the messages [1] and this can cause
   loss of other useful info.
   [1] - resetting ipv6-allrouters membership on qr-2e373555-97

This patch implements the following.
1. If the router is already in the Master state, we configure the LLA as a VIP
   in keepalived conf but do not delete the LLA of the internal interface.
2. We spawn radvd only if the router is in the Master State.
3. Keepalived-state-change monitor takes care of enabling/disabling radvd upon
   state transitions.

Closes-Bug: #1440699
Change-Id: I351c71d058170265bbb8b56e1f7a3430bd8828d5

neutron/agent/l3/ha.py
neutron/agent/l3/ha_router.py
neutron/agent/l3/router_info.py
neutron/tests/functional/agent/test_l3_agent.py

index e83ea141d5b1d43b8ae832533abeb4375ec24fe4..ffe6a82c54a61ed92cdb645cccad6d554400df2e 100644 (file)
@@ -113,10 +113,7 @@ class AgentMixin(object):
         LOG.info(_LI('Router %(router_id)s transitioned to %(state)s'),
                  {'router_id': router_id,
                   'state': state})
-        self._update_metadata_proxy(router_id, state)
-        self.state_change_notifier.queue_event((router_id, state))
 
-    def _update_metadata_proxy(self, router_id, state):
         try:
             ri = self.router_info[router_id]
         except AttributeError:
@@ -124,6 +121,11 @@ class AgentMixin(object):
                          'possibly deleted concurrently.'), router_id)
             return
 
+        self._update_metadata_proxy(ri, router_id, state)
+        self._update_radvd_daemon(ri, state)
+        self.state_change_notifier.queue_event((router_id, state))
+
+    def _update_metadata_proxy(self, ri, router_id, state):
         if state == 'master':
             LOG.debug('Spawning metadata proxy for router %s', router_id)
             self.metadata_driver.spawn_monitored_metadata_proxy(
@@ -134,6 +136,14 @@ class AgentMixin(object):
             self.metadata_driver.destroy_monitored_metadata_proxy(
                 self.process_monitor, ri.router_id, ri.ns_name, self.conf)
 
+    def _update_radvd_daemon(self, ri, state):
+        # Radvd has to be spawned only on the Master HA Router. If there are
+        # any state transitions, we enable/disable radvd accordingly.
+        if state == 'master':
+            ri.enable_radvd()
+        else:
+            ri.disable_radvd()
+
     def notify_server(self, batched_events):
         translation_map = {'master': 'active',
                            'backup': 'standby',
index 4f62e3442a51d161800f4b0cd9ee091c0de037fb..3a80cc4a0313f3bf9010723c057f473673de2bfd 100644 (file)
@@ -212,14 +212,17 @@ class HaRouter(router.RouterInfo):
     def _should_delete_ipv6_lladdr(self, ipv6_lladdr):
         """Only the master should have any IP addresses configured.
         Let keepalived manage IPv6 link local addresses, the same way we let
-        it manage IPv4 addresses. In order to do that, we must delete
-        the address first as it is autoconfigured by the kernel.
+        it manage IPv4 addresses. If the router is not in the master state,
+        we must delete the address first as it is autoconfigured by the kernel.
         """
         manager = self.keepalived_manager
         if manager.get_process().active:
-            conf = manager.get_conf_on_disk()
-            managed_by_keepalived = conf and ipv6_lladdr in conf
-            if managed_by_keepalived:
+            if self.ha_state != 'master':
+                conf = manager.get_conf_on_disk()
+                managed_by_keepalived = conf and ipv6_lladdr in conf
+                if managed_by_keepalived:
+                    return False
+            else:
                 return False
         return True
 
@@ -353,3 +356,8 @@ class HaRouter(router.RouterInfo):
 
         if self.ha_port:
             self.enable_keepalived()
+
+    def enable_radvd(self, internal_ports=None):
+        if (self.keepalived_manager.get_process().active and
+                self.ha_state == 'master'):
+            super(HaRouter, self).enable_radvd(internal_ports)
index 5569fb77d33d4b53316bfc2df6f26aad660d6eb4..3f0d801a660600fd7b433755f51bce31f29d06a8 100644 (file)
@@ -274,7 +274,7 @@ class RouterInfo(object):
         self.router[l3_constants.INTERFACE_KEY] = []
         self.router[l3_constants.FLOATINGIP_KEY] = []
         self.process(agent)
-        self.radvd.disable()
+        self.disable_radvd()
         if self.router_namespace:
             self.router_namespace.delete()
 
@@ -342,6 +342,17 @@ class RouterInfo(object):
                 if netaddr.IPNetwork(subnet['cidr']).version == 6:
                     return True
 
+    def enable_radvd(self, internal_ports=None):
+        LOG.debug('Spawning radvd daemon in router device: %s', self.router_id)
+        if not internal_ports:
+            internal_ports = self.internal_ports
+        self.radvd.enable(internal_ports)
+
+    def disable_radvd(self):
+        LOG.debug('Terminating radvd daemon in router device: %s',
+                  self.router_id)
+        self.radvd.disable()
+
     def _process_internal_ports(self):
         existing_port_ids = set(p['id'] for p in self.internal_ports)
 
@@ -380,7 +391,7 @@ class RouterInfo(object):
 
         # Enable RA
         if enable_ra:
-            self.radvd.enable(internal_ports)
+            self.enable_radvd(internal_ports)
 
         existing_devices = self._get_existing_devices()
         current_internal_devs = set(n for n in existing_devices
index 73d2a23c29848816f350ff671def3eb2fec93b46..88eec5a8c2b4b074716406bea14194747549f7cf 100755 (executable)
@@ -755,6 +755,31 @@ class L3HATestFramework(L3AgentTestFramework):
         utils.wait_until_true(lambda: router2.ha_state == 'master')
         utils.wait_until_true(lambda: router1.ha_state == 'backup')
 
+    def test_ha_router_ipv6_radvd_status(self):
+        router_info = self.generate_router_info(ip_version=6, enable_ha=True)
+        router1 = self.manage_router(self.agent, router_info)
+        utils.wait_until_true(lambda: router1.ha_state == 'master')
+        utils.wait_until_true(lambda: router1.radvd.enabled)
+
+        def _check_lla_status(router, expected):
+            internal_devices = router.router[l3_constants.INTERFACE_KEY]
+            for device in internal_devices:
+                lladdr = ip_lib.get_ipv6_lladdr(device['mac_address'])
+                exists = ip_lib.device_exists_with_ips_and_mac(
+                    router.get_internal_device_name(device['id']), [lladdr],
+                    device['mac_address'], router.ns_name)
+                self.assertEqual(expected, exists)
+
+        _check_lla_status(router1, True)
+
+        device_name = router1.get_ha_device_name()
+        ha_device = ip_lib.IPDevice(device_name, namespace=router1.ns_name)
+        ha_device.link.set_down()
+
+        utils.wait_until_true(lambda: router1.ha_state == 'backup')
+        utils.wait_until_true(lambda: not router1.radvd.enabled, timeout=10)
+        _check_lla_status(router1, False)
+
 
 class MetadataFakeProxyHandler(object):