From 419ce6281dd1e44597617d44fe5496580536b0e5 Mon Sep 17 00:00:00 2001 From: Oleg Bondarev Date: Thu, 2 Jul 2015 15:47:34 +0300 Subject: [PATCH] DVR: do not reschedule router for down agents on compute nodes Scheduling/unscheduling of DVR routers with l3 agents in 'dvr' mode running on a compute nodes is done according to DVR serviced ports created/deleted on that compute nodes. It doesn't make sense to reschedule router from l3 agent on compute node even if it's down - no other l3 agent can handle VMs running on that compute node. Closes-Bug: #1470889 Change-Id: Ib998b9e459dd1a9ab740fafa5d84dc3211ca0097 --- neutron/db/l3_agentschedulers_db.py | 22 ++++++++++++----- .../openvswitch/agent/test_agent_scheduler.py | 24 +++++++++++++++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/neutron/db/l3_agentschedulers_db.py b/neutron/db/l3_agentschedulers_db.py index 9c6413054..e5980b41a 100644 --- a/neutron/db/l3_agentschedulers_db.py +++ b/neutron/db/l3_agentschedulers_db.py @@ -103,6 +103,15 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, l3_attrs_db.RouterExtraAttributes.ha == sql.null()))) try: for binding in down_bindings: + agent_mode = self._get_agent_mode(binding.l3_agent) + if agent_mode == constants.L3_AGENT_MODE_DVR: + # rescheduling from l3 dvr agent on compute node doesn't + # make sense. Router will be removed from that agent once + # there are no dvr serviceable ports on that compute node + LOG.warn(_LW('L3 DVR agent on node %(host)s is down. ' + 'Not rescheduling from agent in \'dvr\' ' + 'mode.'), {'host': binding.l3_agent.host}) + continue LOG.warn(_LW( "Rescheduling router %(router)s from agent %(agent)s " "because the agent did not report to the server in " @@ -124,6 +133,11 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, LOG.exception(_LE("Exception encountered during router " "rescheduling.")) + def _get_agent_mode(self, agent_db): + agent_conf = self.get_configuration_dict(agent_db) + return agent_conf.get(constants.L3_AGENT_MODE, + constants.L3_AGENT_MODE_LEGACY) + def validate_agent_router_combination(self, context, agent, router): """Validate if the router can be correctly assigned to the agent. @@ -135,9 +149,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, router from one DVR Agent to another. """ is_distributed = router.get('distributed') - agent_conf = self.get_configuration_dict(agent) - agent_mode = agent_conf.get(constants.L3_AGENT_MODE, - constants.L3_AGENT_MODE_LEGACY) + agent_mode = self._get_agent_mode(agent) router_type = ( 'distributed' if is_distributed else 'centralized') @@ -407,9 +419,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, # This optimization is valid assuming that the L3 # DVR_SNAT node will be the one hosting the DHCP # Agent. - agent_conf = self.get_configuration_dict(l3_agent) - agent_mode = agent_conf.get(constants.L3_AGENT_MODE, - constants.L3_AGENT_MODE_LEGACY) + agent_mode = self._get_agent_mode(l3_agent) for subnet_id in subnet_ids: subnet_dict = core_plugin.get_subnet(context, subnet_id) diff --git a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py index e512b102f..a48b24c60 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py +++ b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py @@ -749,6 +749,30 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): ret_b = l3_rpc_cb.sync_routers(self.adminContext, host=L3_HOSTB) self.assertFalse(ret_b) + def test_router_is_not_rescheduled_from_dvr_agent(self): + router = {'name': 'router1', + 'admin_state_up': True, + 'distributed': True} + r = self.l3plugin.create_router( + self.adminContext, {'router': router}) + dvr_agent = self._register_dvr_agents()[1] + + with mock.patch.object( + self.l3plugin, + 'check_ports_exist_on_l3agent') as port_exists: + port_exists.return_value = True + self.l3plugin.schedule_router( + self.adminContext, r['id']) + agents = self._list_l3_agents_hosting_router(r['id']) + self.assertEqual(2, len(agents['agents'])) + self.assertIn(dvr_agent['host'], + [a['host'] for a in agents['agents']]) + self._take_down_agent_and_run_reschedule(dvr_agent['host']) + agents = self._list_l3_agents_hosting_router(r['id']) + self.assertEqual(2, len(agents['agents'])) + self.assertIn(dvr_agent['host'], + [a['host'] for a in agents['agents']]) + def test_router_auto_schedule_with_invalid_router(self): with self.router() as router: l3_rpc_cb = l3_rpc.L3RpcCallback() -- 2.45.2