From: Mike Kolesnik Date: Tue, 7 Apr 2015 23:54:09 +0000 (-0400) Subject: Update port bindings for master router X-Git-Url: https://review.fuel-infra.org/gitweb?a=commitdiff_plain;h=91d3a0219a43a2c06ea4043cc9eeb518815df391;p=openstack-build%2Fneutron-build.git Update port bindings for master router An HA port needs to point to the correct host (where the master router is running) in order for L2Population to work. Hence, this patch introduces two fixes: * When a port owned by an HA router is up we make sure it points to the right node where the master is running, or a random node if there is no master yet (This corner case is fixed by the 2nd bullet point). * When a L3 agent reports it's hosting a master, we need to update the port binding to the host the master is now running on. This fixes both routers with no elected master (Yet) and failovers. This patch also changes the L3 HA failover test to use l2pop. Note that the test does not pass when using l2pop without this patch. Closes-Bug: #1365476 Co-Authored-By: Assaf Muller Change-Id: I8475548947526d8ea736ed7aa754fd0ca475cae2 --- diff --git a/neutron/api/rpc/handlers/l3_rpc.py b/neutron/api/rpc/handlers/l3_rpc.py index 3cc50a5db..fb190cf30 100644 --- a/neutron/api/rpc/handlers/l3_rpc.py +++ b/neutron/api/rpc/handlers/l3_rpc.py @@ -104,33 +104,70 @@ class L3RpcCallback(object): router.get('gw_port_host'), p, router['id']) else: - self._ensure_host_set_on_port(context, host, - router.get('gw_port'), - router['id']) + self._ensure_host_set_on_port( + context, host, + router.get('gw_port'), + router['id'], + ha_router_port=router.get('ha')) for interface in router.get(constants.INTERFACE_KEY, []): - self._ensure_host_set_on_port(context, host, - interface, router['id']) + self._ensure_host_set_on_port( + context, + host, + interface, + router['id'], + ha_router_port=router.get('ha')) interface = router.get(constants.HA_INTERFACE_KEY) if interface: self._ensure_host_set_on_port(context, host, interface, router['id']) - def _ensure_host_set_on_port(self, context, host, port, router_id=None): + def _ensure_host_set_on_port(self, context, host, port, router_id=None, + ha_router_port=False): if (port and host is not None and (port.get('device_owner') != constants.DEVICE_OWNER_DVR_INTERFACE and port.get(portbindings.HOST_ID) != host or port.get(portbindings.VIF_TYPE) == portbindings.VIF_TYPE_BINDING_FAILED)): - # All ports, including ports created for SNAT'ing for - # DVR are handled here - try: - self.plugin.update_port(context, port['id'], - {'port': {portbindings.HOST_ID: host}}) - except exceptions.PortNotFound: - LOG.debug("Port %(port)s not found while updating " - "agent binding for router %(router)s.", - {"port": port['id'], "router": router_id}) + + # Ports owned by non-HA routers are bound again if they're + # already bound but the router moved to another host. + if not ha_router_port: + # All ports, including ports created for SNAT'ing for + # DVR are handled here + try: + self.plugin.update_port( + context, + port['id'], + {'port': {portbindings.HOST_ID: host}}) + except exceptions.PortNotFound: + LOG.debug("Port %(port)s not found while updating " + "agent binding for router %(router)s.", + {"port": port['id'], "router": router_id}) + # Ports owned by HA routers should only be bound once, if + # they are unbound. These ports are moved when an agent reports + # that one of its routers moved to the active state. + else: + if not port.get(portbindings.HOST_ID): + active_host = ( + self.l3plugin.get_active_host_for_ha_router( + context, router_id)) + if active_host: + host = active_host + # If there is currently no active router instance (For + # example it's a new router), the host that requested + # the routers (Essentially a random host) will do. The + # port binding will be corrected when an active is + # elected. + try: + self.plugin.update_port( + context, + port['id'], + {'port': {portbindings.HOST_ID: host}}) + except exceptions.PortNotFound: + LOG.debug("Port %(port)s not found while updating " + "agent binding for router %(router)s.", + {"port": port['id'], "router": router_id}) elif (port and port.get('device_owner') == constants.DEVICE_OWNER_DVR_INTERFACE): diff --git a/neutron/common/constants.py b/neutron/common/constants.py index ced7d9316..91195d88a 100644 --- a/neutron/common/constants.py +++ b/neutron/common/constants.py @@ -64,6 +64,8 @@ HA_NETWORK_NAME = 'HA network tenant %s' HA_SUBNET_NAME = 'HA subnet tenant %s' HA_PORT_NAME = 'HA port tenant %s' MINIMUM_AGENTS_FOR_HA = 2 +HA_ROUTER_STATE_ACTIVE = 'active' +HA_ROUTER_STATE_STANDBY = 'standby' IPv4 = 'IPv4' IPv6 = 'IPv6' diff --git a/neutron/db/l3_hamode_db.py b/neutron/db/l3_hamode_db.py index ef8c0b2fb..4888b036d 100644 --- a/neutron/db/l3_hamode_db.py +++ b/neutron/db/l3_hamode_db.py @@ -29,6 +29,7 @@ from neutron.db import l3_dvr_db from neutron.db import model_base from neutron.db import models_v2 from neutron.extensions import l3_ext_ha_mode as l3_ha +from neutron.extensions import portbindings from neutron.i18n import _LI VR_ID_RANGE = set(range(1, 255)) @@ -80,9 +81,11 @@ class L3HARouterAgentPortBinding(model_base.BASEV2): ondelete='CASCADE')) agent = orm.relationship(agents_db.Agent) - state = sa.Column(sa.Enum('active', 'standby', name='l3_ha_states'), - default='standby', - server_default='standby') + state = sa.Column(sa.Enum(constants.HA_ROUTER_STATE_ACTIVE, + constants.HA_ROUTER_STATE_STANDBY, + name='l3_ha_states'), + default=constants.HA_ROUTER_STATE_STANDBY, + server_default=constants.HA_ROUTER_STATE_STANDBY) class L3HARouterNetwork(model_base.BASEV2): @@ -452,6 +455,20 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin): bindings = self.get_ha_router_port_bindings(context, [router_id]) return [(binding.agent, binding.state) for binding in bindings] + def get_active_host_for_ha_router(self, context, router_id): + bindings = self.get_l3_bindings_hosting_router_with_ha_states( + context, router_id) + # TODO(amuller): In case we have two or more actives, this method + # needs to return the last agent to become active. This requires + # timestamps for state changes. Otherwise, if a host goes down + # and another takes over, we'll have two actives. In this case, + # if an interface is added to a router, its binding might be wrong + # and l2pop would not work correctly. + return next( + (agent.host for agent, state in bindings + if state == constants.HA_ROUTER_STATE_ACTIVE), + None) + def _process_sync_ha_data(self, context, routers, host): routers_dict = dict((router['id'], router) for router in routers) @@ -503,3 +520,22 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin): bindings = self.get_ha_router_port_bindings( context, router_ids=states.keys(), host=host) self._set_router_states(context, bindings, states) + self._update_router_port_bindings(context, states, host) + + def _update_router_port_bindings(self, context, states, host): + admin_ctx = context.elevated() + device_filter = {'device_id': states.keys(), + 'device_owner': + [constants.DEVICE_OWNER_ROUTER_INTF]} + ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter) + active_ports = (port for port in ports + if states[port['device_id']] == constants.HA_ROUTER_STATE_ACTIVE) + + for port in active_ports: + port[portbindings.HOST_ID] = host + try: + self._core_plugin.update_port(admin_ctx, port['id'], + {attributes.PORT: port}) + except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError): + # Take concurrently deleted interfaces in to account + pass diff --git a/neutron/tests/unit/db/test_l3_hamode_db.py b/neutron/tests/unit/db/test_l3_hamode_db.py index 851fc3ed1..e988c4007 100644 --- a/neutron/tests/unit/db/test_l3_hamode_db.py +++ b/neutron/tests/unit/db/test_l3_hamode_db.py @@ -18,6 +18,7 @@ from oslo_config import cfg from oslo_utils import timeutils from oslo_utils import uuidutils +from neutron.api.rpc.handlers import l3_rpc from neutron.api.v2 import attributes from neutron.common import constants from neutron import context @@ -27,6 +28,7 @@ from neutron.db import l3_agentschedulers_db from neutron.db import l3_hamode_db from neutron.extensions import l3 from neutron.extensions import l3_ext_ha_mode +from neutron.extensions import portbindings from neutron import manager from neutron.scheduler import l3_agent_scheduler from neutron.tests.common import helpers @@ -417,6 +419,20 @@ class L3HATestCase(L3HATestFramework): routers_after = self.plugin.get_routers(self.admin_ctx) self.assertEqual(routers_before, routers_after) + def test_get_active_host_for_ha_router(self): + router = self._create_router() + self._bind_router(router['id']) + self.assertEqual( + None, + self.plugin.get_active_host_for_ha_router( + self.admin_ctx, router['id'])) + self.plugin.update_routers_states( + self.admin_ctx, {router['id']: 'active'}, self.agent2['host']) + self.assertEqual( + self.agent2['host'], + self.plugin.get_active_host_for_ha_router( + self.admin_ctx, router['id'])) + def test_update_routers_states(self): router1 = self._create_router() self._bind_router(router1['id']) @@ -518,6 +534,76 @@ class L3HAModeDbTestCase(L3HATestFramework): self.admin_ctx, [router['id']]) self.assertEqual(2, len(bindings)) + def test_update_router_port_bindings_no_ports(self): + self.plugin._update_router_port_bindings( + self.admin_ctx, {}, self.agent1['host']) + + def _get_first_interface(self, router_id): + device_filter = {'device_id': [router_id], + 'device_owner': + [constants.DEVICE_OWNER_ROUTER_INTF]} + return self.core_plugin.get_ports( + self.admin_ctx, + filters=device_filter)[0] + + def test_update_router_port_bindings_updates_host(self): + network_id = self._create_network(self.core_plugin, self.admin_ctx) + subnet = self._create_subnet(self.core_plugin, self.admin_ctx, + network_id) + interface_info = {'subnet_id': subnet['id']} + + router = self._create_router() + self._bind_router(router['id']) + self.plugin.add_router_interface(self.admin_ctx, + router['id'], + interface_info) + self.plugin._update_router_port_bindings( + self.admin_ctx, {router['id']: 'active'}, self.agent1['host']) + + port = self._get_first_interface(router['id']) + self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID]) + + self.plugin._update_router_port_bindings( + self.admin_ctx, {router['id']: 'active'}, self.agent2['host']) + port = self._get_first_interface(router['id']) + self.assertEqual(self.agent2['host'], port[portbindings.HOST_ID]) + + def test_ensure_host_set_on_ports_binds_correctly(self): + network_id = self._create_network(self.core_plugin, self.admin_ctx) + subnet = self._create_subnet(self.core_plugin, self.admin_ctx, + network_id) + interface_info = {'subnet_id': subnet['id']} + + router = self._create_router() + self._bind_router(router['id']) + self.plugin.add_router_interface(self.admin_ctx, + router['id'], + interface_info) + port = self._get_first_interface(router['id']) + self.assertEqual('', port[portbindings.HOST_ID]) + + # Update the router object to include the first interface + router = ( + self.plugin.list_active_sync_routers_on_active_l3_agent( + self.admin_ctx, self.agent1['host'], [router['id']]))[0] + + # ensure_host_set_on_ports binds an unbound port + callback = l3_rpc.L3RpcCallback() + callback._l3plugin = self.plugin + callback._ensure_host_set_on_ports( + self.admin_ctx, self.agent1['host'], [router]) + port = self._get_first_interface(router['id']) + self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID]) + + # ensure_host_set_on_ports does not rebind a bound port + router = ( + self.plugin.list_active_sync_routers_on_active_l3_agent( + self.admin_ctx, self.agent1['host'], [router['id']]))[0] + callback._ensure_host_set_on_ports( + self.admin_ctx, self.agent2['host'], [router]) + port = self._get_first_interface(router['id']) + self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID]) + class L3HAUserTestCase(L3HATestFramework):