From 7cfcbac066842e2d1dd3578e9eb5801bb4a1af34 Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Tue, 16 Sep 2014 17:00:05 -0700 Subject: [PATCH] manual add/remove router for dvr_snat agent This patch is to address the failure of manual move of dvr_snat routers from one service node to another. The entry in the csnat_l3_agent_bindings table is now removed during the router to agent unbind operation. Appropriate notification is now sent to the agent to remove snat/qrouter namespace. There were other places in the code that needed to examine the snat binding table to check if updates were required - validate_agent_router_combination() and check_agent_router_scheduling_needed(). Additionally, schedule_routers() was made optional within the rpc _notification path since it can override the manual move being attempted. Change-Id: Iac9598eb79f455c4ef3d3243a96bed524e3d2f7c Closes-Bug: #1369721 Co-Authored-By: Ila Palanisamy Co-Authored-By: Oleg Bondarev --- .../rpc/agentnotifiers/l3_rpc_agent_api.py | 9 +- neutron/api/rpc/handlers/l3_rpc.py | 9 +- neutron/db/l3_agentschedulers_db.py | 20 +-- neutron/db/l3_dvrscheduler_db.py | 117 ++++++++++++++---- .../openvswitch/agent/test_agent_scheduler.py | 41 ++++++ 5 files changed, 157 insertions(+), 39 deletions(-) diff --git a/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py b/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py index c0a7160c0..fb5b3d046 100644 --- a/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py +++ b/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py @@ -100,7 +100,7 @@ class L3AgentNotifyAPI(object): cctxt.cast(context, method, payload=dvr_arptable) def _notification(self, context, method, router_ids, operation, - shuffle_agents): + shuffle_agents, schedule_routers=True): """Notify all the agents that are hosting the routers.""" plugin = manager.NeutronManager.get_service_plugins().get( service_constants.L3_ROUTER_NAT) @@ -112,7 +112,8 @@ class L3AgentNotifyAPI(object): plugin, constants.L3_AGENT_SCHEDULER_EXT_ALIAS): adminContext = (context.is_admin and context or context.elevated()) - plugin.schedule_routers(adminContext, router_ids) + if schedule_routers: + plugin.schedule_routers(adminContext, router_ids) self._agent_notification( context, method, router_ids, operation, shuffle_agents) else: @@ -138,10 +139,10 @@ class L3AgentNotifyAPI(object): self._notification_fanout(context, 'router_deleted', router_id) def routers_updated(self, context, router_ids, operation=None, data=None, - shuffle_agents=False): + shuffle_agents=False, schedule_routers=True): if router_ids: self._notification(context, 'routers_updated', router_ids, - operation, shuffle_agents) + operation, shuffle_agents, schedule_routers) def add_arp_entry(self, context, router_id, arp_table, operation=None): self._agent_notification_arp(context, 'add_arp_entry', router_id, diff --git a/neutron/api/rpc/handlers/l3_rpc.py b/neutron/api/rpc/handlers/l3_rpc.py index b1129cc74..6c2ca53f2 100644 --- a/neutron/api/rpc/handlers/l3_rpc.py +++ b/neutron/api/rpc/handlers/l3_rpc.py @@ -98,13 +98,16 @@ class L3RpcCallback(object): LOG.debug("Checking router: %(id)s for host: %(host)s", {'id': router['id'], 'host': host}) if router.get('gw_port') and router.get('distributed'): + # '' is used to effectively clear binding of a gw port if not + # bound (snat is not hosted on any l3 agent) + gw_port_host = router.get('gw_port_host') or '' self._ensure_host_set_on_port(context, - router.get('gw_port_host'), + gw_port_host, router.get('gw_port'), router['id']) for p in router.get(constants.SNAT_ROUTER_INTF_KEY, []): self._ensure_host_set_on_port(context, - router.get('gw_port_host'), + gw_port_host, p, router['id']) else: self._ensure_host_set_on_port( @@ -143,6 +146,8 @@ class L3RpcCallback(object): context, port['id'], {'port': {portbindings.HOST_ID: host}}) + # updating port's host to pass actual info to l3 agent + port[portbindings.HOST_ID] = host except exceptions.PortNotFound: LOG.debug("Port %(port)s not found while updating " "agent binding for router %(router)s.", diff --git a/neutron/db/l3_agentschedulers_db.py b/neutron/db/l3_agentschedulers_db.py index e5980b41a..e5e19040e 100644 --- a/neutron/db/l3_agentschedulers_db.py +++ b/neutron/db/l3_agentschedulers_db.py @@ -148,6 +148,9 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, :raises: DVRL3CannotAssignToDvrAgent if attempting to assign DVR router from one DVR Agent to another. """ + if agent['agent_type'] != constants.AGENT_TYPE_L3: + raise l3agentscheduler.InvalidL3Agent(id=agent['id']) + is_distributed = router.get('distributed') agent_mode = self._get_agent_mode(agent) router_type = ( @@ -167,13 +170,14 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, router_type=router_type, router_id=router['id'], agent_id=agent['id']) - is_wrong_type_or_unsuitable_agent = ( - agent['agent_type'] != constants.AGENT_TYPE_L3 or - not agentschedulers_db.services_available(agent['admin_state_up']) - or - not self.get_l3_agent_candidates(context, router, [agent], - ignore_admin_state=True)) - if is_wrong_type_or_unsuitable_agent: + is_suitable_agent = ( + agentschedulers_db.services_available(agent['admin_state_up']) and + (self.get_l3_agent_candidates(context, router, + [agent], + ignore_admin_state=True) or + self.get_snat_candidates(router, [agent])) + ) + if not is_suitable_agent: raise l3agentscheduler.InvalidL3Agent(id=agent['id']) def check_agent_router_scheduling_needed(self, context, agent, router): @@ -193,8 +197,6 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, if binding.l3_agent_id == agent_id: # router already bound to the agent we need return False - if router.get('distributed'): - return False if router.get('ha'): return True # legacy router case: router is already bound to some agent diff --git a/neutron/db/l3_dvrscheduler_db.py b/neutron/db/l3_dvrscheduler_db.py index 5e937611a..6389155cd 100644 --- a/neutron/db/l3_dvrscheduler_db.py +++ b/neutron/db/l3_dvrscheduler_db.py @@ -250,46 +250,73 @@ class L3_DVRsch_db_mixin(l3agent_sch_db.L3AgentSchedulerDbMixin): self.bind_snat_router(context, router_id, chosen_snat_agent) return chosen_snat_agent - def unbind_snat_servicenode(self, context, router_id): - """Unbind the snat router to the chosen l3 service agent.""" - vm_ports = [] + def unbind_snat(self, context, router_id, agent_id=None): + """Unbind snat from the chosen l3 service agent. + + Unbinds from any L3 agent hosting SNAT if passed agent_id is None + """ with context.session.begin(subtransactions=True): query = (context.session. query(CentralizedSnatL3AgentBinding). filter_by(router_id=router_id)) + if agent_id: + query = query.filter_by(l3_agent_id=agent_id) try: binding = query.one() except exc.NoResultFound: - LOG.debug('no snat router binding found for %s', router_id) + LOG.debug('no snat router binding found for router: %(' + 'router)s, agent: %(agent)s', + {'router': router_id, 'agent': agent_id or 'any'}) return + agent_id = binding.l3_agent_id + LOG.debug('Delete binding of the SNAT router %(router_id)s ' + 'from agent %(id)s', {'router_id': router_id, + 'id': agent_id}) + context.session.delete(binding) + + return binding + + def unbind_router_servicenode(self, context, router_id, binding): + """Unbind the router from the chosen l3 service agent.""" + port_found = False + with context.session.begin(subtransactions=True): host = binding.l3_agent.host subnet_ids = self.get_subnet_ids_on_router(context, router_id) for subnet in subnet_ids: - vm_ports = ( + ports = ( self._core_plugin.get_ports_on_host_by_subnet( context, host, subnet)) - if vm_ports: - LOG.debug('One or more ports exist on the snat enabled ' - 'l3_agent host %(host)s and router_id %(id)s', - {'host': host, 'id': router_id}) - break + for port in ports: + if (n_utils.is_dvr_serviced(port['device_owner'])): + port_found = True + LOG.debug('One or more ports exist on the snat ' + 'enabled l3_agent host %(host)s and ' + 'router_id %(id)s', + {'host': host, 'id': router_id}) + break agent_id = binding.l3_agent_id - LOG.debug('Delete binding of the SNAT router %(router_id)s ' - 'from agent %(id)s', {'router_id': router_id, - 'id': agent_id}) - context.session.delete(binding) - if not vm_ports: - query = (context.session. - query(l3agent_sch_db.RouterL3AgentBinding). - filter_by(router_id=router_id, - l3_agent_id=agent_id). - delete(synchronize_session=False)) - self.l3_rpc_notifier.router_removed_from_agent( - context, router_id, host) - LOG.debug('Removed binding for router %(router_id)s and ' - 'agent %(id)s', {'router_id': router_id, 'id': agent_id}) + if not port_found: + context.session.query( + l3agent_sch_db.RouterL3AgentBinding).filter_by( + router_id=router_id, l3_agent_id=agent_id).delete( + synchronize_session=False) + + if not port_found: + self.l3_rpc_notifier.router_removed_from_agent( + context, router_id, host) + LOG.debug('Removed binding for router %(router_id)s and ' + 'agent %(agent_id)s', + {'router_id': router_id, 'agent_id': agent_id}) + return port_found + + def unbind_snat_servicenode(self, context, router_id): + """Unbind snat AND the router from the current agent.""" + with context.session.begin(subtransactions=True): + binding = self.unbind_snat(context, router_id) + if binding: + self.unbind_router_servicenode(context, router_id, binding) def get_snat_bindings(self, context, router_ids): """Retrieves the dvr snat bindings for a router.""" @@ -403,6 +430,48 @@ class L3_DVRsch_db_mixin(l3agent_sch_db.L3AgentSchedulerDbMixin): return self._get_dvr_sync_data(context, host, agent, router_ids=router_ids, active=True) + def check_agent_router_scheduling_needed(self, context, agent, router): + if router.get('distributed'): + if router['external_gateway_info']: + return not self.get_snat_bindings(context, [router['id']]) + return False + return super(L3_DVRsch_db_mixin, + self).check_agent_router_scheduling_needed( + context, agent, router) + + def create_router_to_agent_binding(self, context, agent, router): + """Create router to agent binding.""" + router_id = router['id'] + agent_id = agent['id'] + if router['external_gateway_info'] and self.router_scheduler and ( + router.get('distributed')): + try: + self.bind_snat_router(context, router_id, agent) + self.bind_dvr_router_servicenode(context, + router_id, agent) + except db_exc.DBError: + raise l3agentscheduler.RouterSchedulingFailed( + router_id=router_id, + agent_id=agent_id) + else: + super(L3_DVRsch_db_mixin, self).create_router_to_agent_binding( + context, agent, router) + + def remove_router_from_l3_agent(self, context, agent_id, router_id): + router = self.get_router(context, router_id) + if router['external_gateway_info'] and router.get('distributed'): + binding = self.unbind_snat(context, router_id, agent_id=agent_id) + if binding: + notification_not_sent = self.unbind_router_servicenode(context, + router_id, binding) + if notification_not_sent: + self.l3_rpc_notifier.routers_updated( + context, [router_id], schedule_routers=False) + else: + super(L3_DVRsch_db_mixin, + self).remove_router_from_l3_agent( + context, agent_id, router_id) + def _notify_l3_agent_new_port(resource, event, trigger, **kwargs): LOG.debug('Received %(resource)s %(event)s', { diff --git a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py index a250f4028..3d5c487e4 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py +++ b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py @@ -1049,6 +1049,47 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): self.adminContext, [r['id']])[0]['l3_agent']['host'] self.assertNotEqual(csnat_agent_host, new_csnat_agent_host) + def test_dvr_router_csnat_manual_rescheduling(self): + helpers.register_l3_agent( + host=L3_HOSTA, agent_mode=constants.L3_AGENT_MODE_DVR_SNAT) + helpers.register_l3_agent( + host=L3_HOSTB, agent_mode=constants.L3_AGENT_MODE_DVR_SNAT) + with self.subnet() as s: + net_id = s['subnet']['network_id'] + self._set_net_external(net_id) + + router = {'name': 'router1', + 'external_gateway_info': {'network_id': net_id}, + 'admin_state_up': True, + 'distributed': True} + r = self.l3plugin.create_router(self.adminContext, + {'router': router}) + self.l3plugin.schedule_router( + self.adminContext, r['id']) + l3agents = self.l3plugin.list_l3_agents_hosting_router( + self.adminContext, r['id']) + self.assertEqual(2, len(l3agents['agents'])) + csnat_agent = self.l3plugin.get_snat_bindings( + self.adminContext, [r['id']])[0]['l3_agent'] + + self.l3plugin.remove_router_from_l3_agent( + self.adminContext, csnat_agent['id'], r['id']) + + l3agents = self.l3plugin.list_l3_agents_hosting_router( + self.adminContext, r['id']) + self.assertEqual(1, len(l3agents['agents'])) + self.assertFalse(self.l3plugin.get_snat_bindings( + self.adminContext, [r['id']])) + + self.l3plugin.add_router_to_l3_agent( + self.adminContext, csnat_agent['id'], r['id']) + + l3agents = self._list_l3_agents_hosting_router(r['id']) + self.assertEqual(2, len(l3agents['agents'])) + new_csnat_agent = self.l3plugin.get_snat_bindings( + self.adminContext, [r['id']])[0]['l3_agent'] + self.assertEqual(csnat_agent['id'], new_csnat_agent['id']) + def test_router_sync_data(self): with self.subnet() as s1,\ self.subnet(cidr='10.0.2.0/24') as s2,\ -- 2.45.2