]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Update port bindings for master router
authorMike Kolesnik <mkolesni@redhat.com>
Tue, 7 Apr 2015 23:54:09 +0000 (19:54 -0400)
committerMike Kolesnik <mkolesni@redhat.com>
Thu, 23 Jul 2015 12:15:18 +0000 (15:15 +0300)
An HA port needs to point to the correct host (where the master router
is running) in order for L2Population to work.

Hence, this patch introduces two fixes:
* When a port owned by an HA router is up we make sure it points to the
  right node where the master is running, or a random node if there is
  no master yet (This corner case is fixed by the 2nd bullet point).

* When a L3 agent reports it's hosting a master, we need to update the
  port binding to the host the master is now running on. This fixes
  both routers with no elected master (Yet) and failovers.

This patch also changes the L3 HA failover test to use l2pop.
Note that the test does not pass when using l2pop without this patch.

Closes-Bug: #1365476
Co-Authored-By: Assaf Muller <amuller@redhat.com>
Change-Id: I8475548947526d8ea736ed7aa754fd0ca475cae2

neutron/api/rpc/handlers/l3_rpc.py
neutron/common/constants.py
neutron/db/l3_hamode_db.py
neutron/tests/unit/db/test_l3_hamode_db.py

index 3cc50a5dbff1dd564fce229c261d0668df6400a0..fb190cf30451bbaa4f79df4f965cc0688bf010de 100644 (file)
@@ -104,33 +104,70 @@ class L3RpcCallback(object):
                                                   router.get('gw_port_host'),
                                                   p, router['id'])
             else:
-                self._ensure_host_set_on_port(context, host,
-                                              router.get('gw_port'),
-                                              router['id'])
+                self._ensure_host_set_on_port(
+                    context, host,
+                    router.get('gw_port'),
+                    router['id'],
+                    ha_router_port=router.get('ha'))
             for interface in router.get(constants.INTERFACE_KEY, []):
-                self._ensure_host_set_on_port(context, host,
-                                              interface, router['id'])
+                self._ensure_host_set_on_port(
+                    context,
+                    host,
+                    interface,
+                    router['id'],
+                    ha_router_port=router.get('ha'))
             interface = router.get(constants.HA_INTERFACE_KEY)
             if interface:
                 self._ensure_host_set_on_port(context, host, interface,
                                               router['id'])
 
-    def _ensure_host_set_on_port(self, context, host, port, router_id=None):
+    def _ensure_host_set_on_port(self, context, host, port, router_id=None,
+                                 ha_router_port=False):
         if (port and host is not None and
             (port.get('device_owner') !=
              constants.DEVICE_OWNER_DVR_INTERFACE and
              port.get(portbindings.HOST_ID) != host or
              port.get(portbindings.VIF_TYPE) ==
              portbindings.VIF_TYPE_BINDING_FAILED)):
-            # All ports, including ports created for SNAT'ing for
-            # DVR are handled here
-            try:
-                self.plugin.update_port(context, port['id'],
-                                        {'port': {portbindings.HOST_ID: host}})
-            except exceptions.PortNotFound:
-                LOG.debug("Port %(port)s not found while updating "
-                          "agent binding for router %(router)s.",
-                          {"port": port['id'], "router": router_id})
+
+            # Ports owned by non-HA routers are bound again if they're
+            # already bound but the router moved to another host.
+            if not ha_router_port:
+                # All ports, including ports created for SNAT'ing for
+                # DVR are handled here
+                try:
+                    self.plugin.update_port(
+                        context,
+                        port['id'],
+                        {'port': {portbindings.HOST_ID: host}})
+                except exceptions.PortNotFound:
+                    LOG.debug("Port %(port)s not found while updating "
+                              "agent binding for router %(router)s.",
+                              {"port": port['id'], "router": router_id})
+            # Ports owned by HA routers should only be bound once, if
+            # they are unbound. These ports are moved when an agent reports
+            # that one of its routers moved to the active state.
+            else:
+                if not port.get(portbindings.HOST_ID):
+                    active_host = (
+                        self.l3plugin.get_active_host_for_ha_router(
+                            context, router_id))
+                    if active_host:
+                        host = active_host
+                    # If there is currently no active router instance (For
+                    # example it's a new router), the host that requested
+                    # the routers (Essentially a random host) will do. The
+                    # port binding will be corrected when an active is
+                    # elected.
+                    try:
+                        self.plugin.update_port(
+                            context,
+                            port['id'],
+                            {'port': {portbindings.HOST_ID: host}})
+                    except exceptions.PortNotFound:
+                        LOG.debug("Port %(port)s not found while updating "
+                                  "agent binding for router %(router)s.",
+                                  {"port": port['id'], "router": router_id})
         elif (port and
               port.get('device_owner') ==
               constants.DEVICE_OWNER_DVR_INTERFACE):
index ced7d93161c9307fffba02dd5d28b9648f39e7e7..91195d88a7176ad5f8b65ecf1a684c81ff256bca 100644 (file)
@@ -64,6 +64,8 @@ HA_NETWORK_NAME = 'HA network tenant %s'
 HA_SUBNET_NAME = 'HA subnet tenant %s'
 HA_PORT_NAME = 'HA port tenant %s'
 MINIMUM_AGENTS_FOR_HA = 2
+HA_ROUTER_STATE_ACTIVE = 'active'
+HA_ROUTER_STATE_STANDBY = 'standby'
 
 IPv4 = 'IPv4'
 IPv6 = 'IPv6'
index ef8c0b2fbd3d4cfa3c80e180b3adacb2c28d6eda..4888b036dc41204bad4f1d7292fb1e83df764f5b 100644 (file)
@@ -29,6 +29,7 @@ from neutron.db import l3_dvr_db
 from neutron.db import model_base
 from neutron.db import models_v2
 from neutron.extensions import l3_ext_ha_mode as l3_ha
+from neutron.extensions import portbindings
 from neutron.i18n import _LI
 
 VR_ID_RANGE = set(range(1, 255))
@@ -80,9 +81,11 @@ class L3HARouterAgentPortBinding(model_base.BASEV2):
                                           ondelete='CASCADE'))
     agent = orm.relationship(agents_db.Agent)
 
-    state = sa.Column(sa.Enum('active', 'standby', name='l3_ha_states'),
-                      default='standby',
-                      server_default='standby')
+    state = sa.Column(sa.Enum(constants.HA_ROUTER_STATE_ACTIVE,
+                              constants.HA_ROUTER_STATE_STANDBY,
+                              name='l3_ha_states'),
+                      default=constants.HA_ROUTER_STATE_STANDBY,
+                      server_default=constants.HA_ROUTER_STATE_STANDBY)
 
 
 class L3HARouterNetwork(model_base.BASEV2):
@@ -452,6 +455,20 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin):
         bindings = self.get_ha_router_port_bindings(context, [router_id])
         return [(binding.agent, binding.state) for binding in bindings]
 
+    def get_active_host_for_ha_router(self, context, router_id):
+        bindings = self.get_l3_bindings_hosting_router_with_ha_states(
+            context, router_id)
+        # TODO(amuller): In case we have two or more actives, this method
+        # needs to return the last agent to become active. This requires
+        # timestamps for state changes. Otherwise, if a host goes down
+        # and another takes over, we'll have two actives. In this case,
+        # if an interface is added to a router, its binding might be wrong
+        # and l2pop would not work correctly.
+        return next(
+            (agent.host for agent, state in bindings
+             if state == constants.HA_ROUTER_STATE_ACTIVE),
+            None)
+
     def _process_sync_ha_data(self, context, routers, host):
         routers_dict = dict((router['id'], router) for router in routers)
 
@@ -503,3 +520,22 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin):
         bindings = self.get_ha_router_port_bindings(
             context, router_ids=states.keys(), host=host)
         self._set_router_states(context, bindings, states)
+        self._update_router_port_bindings(context, states, host)
+
+    def _update_router_port_bindings(self, context, states, host):
+        admin_ctx = context.elevated()
+        device_filter = {'device_id': states.keys(),
+                         'device_owner':
+                         [constants.DEVICE_OWNER_ROUTER_INTF]}
+        ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter)
+        active_ports = (port for port in ports
+            if states[port['device_id']] == constants.HA_ROUTER_STATE_ACTIVE)
+
+        for port in active_ports:
+            port[portbindings.HOST_ID] = host
+            try:
+                self._core_plugin.update_port(admin_ctx, port['id'],
+                                              {attributes.PORT: port})
+            except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError):
+                # Take concurrently deleted interfaces in to account
+                pass
index 851fc3ed17b516fea75c6e0ae98d569e03d85548..e988c400726620eaf9627b091c9eeb0c77c5a0c2 100644 (file)
@@ -18,6 +18,7 @@ from oslo_config import cfg
 from oslo_utils import timeutils
 from oslo_utils import uuidutils
 
+from neutron.api.rpc.handlers import l3_rpc
 from neutron.api.v2 import attributes
 from neutron.common import constants
 from neutron import context
@@ -27,6 +28,7 @@ from neutron.db import l3_agentschedulers_db
 from neutron.db import l3_hamode_db
 from neutron.extensions import l3
 from neutron.extensions import l3_ext_ha_mode
+from neutron.extensions import portbindings
 from neutron import manager
 from neutron.scheduler import l3_agent_scheduler
 from neutron.tests.common import helpers
@@ -417,6 +419,20 @@ class L3HATestCase(L3HATestFramework):
         routers_after = self.plugin.get_routers(self.admin_ctx)
         self.assertEqual(routers_before, routers_after)
 
+    def test_get_active_host_for_ha_router(self):
+        router = self._create_router()
+        self._bind_router(router['id'])
+        self.assertEqual(
+            None,
+            self.plugin.get_active_host_for_ha_router(
+                self.admin_ctx, router['id']))
+        self.plugin.update_routers_states(
+            self.admin_ctx, {router['id']: 'active'}, self.agent2['host'])
+        self.assertEqual(
+            self.agent2['host'],
+            self.plugin.get_active_host_for_ha_router(
+                self.admin_ctx, router['id']))
+
     def test_update_routers_states(self):
         router1 = self._create_router()
         self._bind_router(router1['id'])
@@ -518,6 +534,76 @@ class L3HAModeDbTestCase(L3HATestFramework):
             self.admin_ctx, [router['id']])
         self.assertEqual(2, len(bindings))
 
+    def test_update_router_port_bindings_no_ports(self):
+        self.plugin._update_router_port_bindings(
+            self.admin_ctx, {}, self.agent1['host'])
+
+    def _get_first_interface(self, router_id):
+        device_filter = {'device_id': [router_id],
+                         'device_owner':
+                         [constants.DEVICE_OWNER_ROUTER_INTF]}
+        return self.core_plugin.get_ports(
+            self.admin_ctx,
+            filters=device_filter)[0]
+
+    def test_update_router_port_bindings_updates_host(self):
+        network_id = self._create_network(self.core_plugin, self.admin_ctx)
+        subnet = self._create_subnet(self.core_plugin, self.admin_ctx,
+                                     network_id)
+        interface_info = {'subnet_id': subnet['id']}
+
+        router = self._create_router()
+        self._bind_router(router['id'])
+        self.plugin.add_router_interface(self.admin_ctx,
+                                         router['id'],
+                                         interface_info)
+        self.plugin._update_router_port_bindings(
+            self.admin_ctx, {router['id']: 'active'}, self.agent1['host'])
+
+        port = self._get_first_interface(router['id'])
+        self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID])
+
+        self.plugin._update_router_port_bindings(
+            self.admin_ctx, {router['id']: 'active'}, self.agent2['host'])
+        port = self._get_first_interface(router['id'])
+        self.assertEqual(self.agent2['host'], port[portbindings.HOST_ID])
+
+    def test_ensure_host_set_on_ports_binds_correctly(self):
+        network_id = self._create_network(self.core_plugin, self.admin_ctx)
+        subnet = self._create_subnet(self.core_plugin, self.admin_ctx,
+                                     network_id)
+        interface_info = {'subnet_id': subnet['id']}
+
+        router = self._create_router()
+        self._bind_router(router['id'])
+        self.plugin.add_router_interface(self.admin_ctx,
+                                         router['id'],
+                                         interface_info)
+        port = self._get_first_interface(router['id'])
+        self.assertEqual('', port[portbindings.HOST_ID])
+
+        # Update the router object to include the first interface
+        router = (
+            self.plugin.list_active_sync_routers_on_active_l3_agent(
+                self.admin_ctx, self.agent1['host'], [router['id']]))[0]
+
+        # ensure_host_set_on_ports binds an unbound port
+        callback = l3_rpc.L3RpcCallback()
+        callback._l3plugin = self.plugin
+        callback._ensure_host_set_on_ports(
+            self.admin_ctx, self.agent1['host'], [router])
+        port = self._get_first_interface(router['id'])
+        self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID])
+
+        # ensure_host_set_on_ports does not rebind a bound port
+        router = (
+            self.plugin.list_active_sync_routers_on_active_l3_agent(
+                self.admin_ctx, self.agent1['host'], [router['id']]))[0]
+        callback._ensure_host_set_on_ports(
+            self.admin_ctx, self.agent2['host'], [router])
+        port = self._get_first_interface(router['id'])
+        self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID])
+
 
 class L3HAUserTestCase(L3HATestFramework):