]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
DVR: do not reschedule router for down agents on compute nodes
authorOleg Bondarev <obondarev@mirantis.com>
Thu, 2 Jul 2015 12:47:34 +0000 (15:47 +0300)
committerOleg Bondarev <obondarev@mirantis.com>
Fri, 14 Aug 2015 13:36:15 +0000 (16:36 +0300)
Scheduling/unscheduling of DVR routers with l3 agents in 'dvr' mode
running on a compute nodes is done according to DVR serviced ports
created/deleted on that compute nodes. It doesn't make sense to reschedule
router from l3 agent on compute node even if it's down - no other l3 agent
can handle VMs running on that compute node.

Closes-Bug: #1470889
Change-Id: Ib998b9e459dd1a9ab740fafa5d84dc3211ca0097

neutron/db/l3_agentschedulers_db.py
neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_agent_scheduler.py

index 9c6413054fc4c484aabdfd43e9f54ca4115c19b5..e5980b41a59f53ca7ebab7ba348b3e5e1f79588f 100644 (file)
@@ -103,6 +103,15 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
                           l3_attrs_db.RouterExtraAttributes.ha == sql.null())))
         try:
             for binding in down_bindings:
+                agent_mode = self._get_agent_mode(binding.l3_agent)
+                if agent_mode == constants.L3_AGENT_MODE_DVR:
+                    # rescheduling from l3 dvr agent on compute node doesn't
+                    # make sense. Router will be removed from that agent once
+                    # there are no dvr serviceable ports on that compute node
+                    LOG.warn(_LW('L3 DVR agent on node %(host)s is down. '
+                                 'Not rescheduling from agent in \'dvr\' '
+                                 'mode.'), {'host': binding.l3_agent.host})
+                    continue
                 LOG.warn(_LW(
                     "Rescheduling router %(router)s from agent %(agent)s "
                     "because the agent did not report to the server in "
@@ -124,6 +133,11 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
             LOG.exception(_LE("Exception encountered during router "
                               "rescheduling."))
 
+    def _get_agent_mode(self, agent_db):
+        agent_conf = self.get_configuration_dict(agent_db)
+        return agent_conf.get(constants.L3_AGENT_MODE,
+                              constants.L3_AGENT_MODE_LEGACY)
+
     def validate_agent_router_combination(self, context, agent, router):
         """Validate if the router can be correctly assigned to the agent.
 
@@ -135,9 +149,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
           router from one DVR Agent to another.
         """
         is_distributed = router.get('distributed')
-        agent_conf = self.get_configuration_dict(agent)
-        agent_mode = agent_conf.get(constants.L3_AGENT_MODE,
-                                    constants.L3_AGENT_MODE_LEGACY)
+        agent_mode = self._get_agent_mode(agent)
         router_type = (
             'distributed' if is_distributed else
             'centralized')
@@ -407,9 +419,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
         # This optimization is valid assuming that the L3
         # DVR_SNAT node will be the one hosting the DHCP
         # Agent.
-        agent_conf = self.get_configuration_dict(l3_agent)
-        agent_mode = agent_conf.get(constants.L3_AGENT_MODE,
-                                    constants.L3_AGENT_MODE_LEGACY)
+        agent_mode = self._get_agent_mode(l3_agent)
 
         for subnet_id in subnet_ids:
             subnet_dict = core_plugin.get_subnet(context, subnet_id)
index e512b102fb7e81357ac7e7093b3d14e0dcf84fb2..a48b24c600d3b54ac65e9f7988c7b46d3d22a907 100644 (file)
@@ -749,6 +749,30 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
             ret_b = l3_rpc_cb.sync_routers(self.adminContext, host=L3_HOSTB)
             self.assertFalse(ret_b)
 
+    def test_router_is_not_rescheduled_from_dvr_agent(self):
+        router = {'name': 'router1',
+                  'admin_state_up': True,
+                  'distributed': True}
+        r = self.l3plugin.create_router(
+            self.adminContext, {'router': router})
+        dvr_agent = self._register_dvr_agents()[1]
+
+        with mock.patch.object(
+                self.l3plugin,
+                'check_ports_exist_on_l3agent') as port_exists:
+            port_exists.return_value = True
+            self.l3plugin.schedule_router(
+                self.adminContext, r['id'])
+            agents = self._list_l3_agents_hosting_router(r['id'])
+            self.assertEqual(2, len(agents['agents']))
+            self.assertIn(dvr_agent['host'],
+                          [a['host'] for a in agents['agents']])
+            self._take_down_agent_and_run_reschedule(dvr_agent['host'])
+            agents = self._list_l3_agents_hosting_router(r['id'])
+            self.assertEqual(2, len(agents['agents']))
+            self.assertIn(dvr_agent['host'],
+                          [a['host'] for a in agents['agents']])
+
     def test_router_auto_schedule_with_invalid_router(self):
         with self.router() as router:
             l3_rpc_cb = l3_rpc.L3RpcCallback()