]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Unify logic that determines liveliness of DHCP agent
authorEugene Nikanorov <enikanorov@mirantis.com>
Wed, 4 Feb 2015 12:05:36 +0000 (15:05 +0300)
committerEugene Nikanorov <enikanorov@mirantis.com>
Thu, 19 Feb 2015 07:06:04 +0000 (11:06 +0400)
For DHCP agents sometimes it's not enough to check agent's last heartbeat
time because in its starting period the agent may fail to send state reports
because it's busy processing networks.
In rescheduling logic such DHCP agent is given additional time after start.
Additional time is proportional to amount of networks the agent is hosting.
Need to apply the same logic to DHCP agent scheduler to avoid a case
when starting agent is considered dead and a network gets more hosting
agents than configured.

Change-Id: I0fe6244c7d2ed42e4744351be34f251318322c54
Closes-Bug: #1417708

neutron/db/agentschedulers_db.py
neutron/scheduler/dhcp_agent_scheduler.py
neutron/tests/unit/openvswitch/test_agent_scheduler.py
neutron/tests/unit/test_dhcp_scheduler.py

index dd766cc76744dd94e58ff5d763bf4314542610e6..b474d9965186284ff1b565d4190d1a17343722f7 100644 (file)
@@ -156,7 +156,12 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
 
         self.setup_agent_status_check(self.remove_networks_from_down_agents)
 
-    def _agent_starting_up(self, context, agent):
+    def is_eligible_agent(self, context, active, agent):
+        # eligible agent is active or starting up
+        return (AgentSchedulerDbMixin.is_eligible_agent(active, agent) or
+                self.agent_starting_up(context, agent))
+
+    def agent_starting_up(self, context, agent):
         """Check if agent was just started.
 
         Method returns True if agent is in its 'starting up' period.
@@ -217,7 +222,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
         for binding in bindings:
             agent_id = binding.dhcp_agent['id']
             if agent_id not in checked_agents:
-                if self._agent_starting_up(context, binding.dhcp_agent):
+                if self.agent_starting_up(context, binding.dhcp_agent):
                     # When agent starts and it has many networks to process
                     # it may fail to send state reports in defined interval.
                     # The server will consider it dead and try to remove
@@ -287,8 +292,8 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
 
         return [binding.dhcp_agent
                 for binding in query
-                if AgentSchedulerDbMixin.is_eligible_agent(active,
-                                                           binding.dhcp_agent)]
+                if self.is_eligible_agent(context, active,
+                                          binding.dhcp_agent)]
 
     def add_network_to_dhcp_agent(self, context, id, network_id):
         self._get_network(context, network_id)
index 00c25aa801237add817a2d401c3d8fb03b11cdec..67f0aca7136a8d0720a22b99d9d9ec2475fced2c 100644 (file)
@@ -81,9 +81,8 @@ class ChanceScheduler(object):
                 return
             active_dhcp_agents = [
                 agent for agent in set(enabled_dhcp_agents)
-                if not agents_db.AgentDbMixin.is_agent_down(
-                    agent['heartbeat_timestamp'])
-                and agent not in dhcp_agents
+                if agent not in dhcp_agents and plugin.is_eligible_agent(
+                    context, True, agent)
             ]
             if not active_dhcp_agents:
                 LOG.warn(_LW('No more DHCP agents'))
index 3d8266bdba0d98a4c54a101487392cadad21b806..15d93a8008317b968416557905e45e1d9e143ba4 100644 (file)
@@ -32,6 +32,7 @@ from neutron.api.v2 import attributes
 from neutron.common import constants
 from neutron import context
 from neutron.db import agents_db
+from neutron.db import agentschedulers_db
 from neutron.db import l3_agentschedulers_db
 from neutron.extensions import agent
 from neutron.extensions import dhcpagentscheduler
@@ -478,6 +479,27 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
         self._delete('ports', port2['port']['id'])
         self.assertEqual(0, len(dhcp_agents['agents']))
 
+    def test_is_eligible_agent(self):
+        agent_startup = ('neutron.db.agentschedulers_db.'
+                         'DhcpAgentSchedulerDbMixin.agent_starting_up')
+        is_eligible_agent = ('neutron.db.agentschedulers_db.'
+                             'AgentSchedulerDbMixin.is_eligible_agent')
+        dhcp_mixin = agentschedulers_db.DhcpAgentSchedulerDbMixin()
+        with contextlib.nested(
+            mock.patch(agent_startup),
+            mock.patch(is_eligible_agent)
+        ) as (startup, elig):
+            tests = [(True, True),
+                     (True, False),
+                     (False, True),
+                     (False, False)]
+            for rv1, rv2 in tests:
+                startup.return_value = rv1
+                elig.return_value = rv2
+                self.assertEqual(rv1 or rv2,
+                                 dhcp_mixin.is_eligible_agent(None,
+                                                              None, None))
+
     def test_network_scheduler_with_down_agent(self):
         dhcp_hosta = {
             'binary': 'neutron-dhcp-agent',
@@ -488,17 +510,19 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
                                },
             'agent_type': constants.AGENT_TYPE_DHCP}
         self._register_one_agent_state(dhcp_hosta)
-        is_agent_down_str = 'neutron.db.agents_db.AgentDbMixin.is_agent_down'
-        with mock.patch(is_agent_down_str) as mock_is_agent_down:
-            mock_is_agent_down.return_value = False
+        eligible_agent_str = ('neutron.db.agentschedulers_db.'
+                              'DhcpAgentSchedulerDbMixin.is_eligible_agent')
+        with mock.patch(eligible_agent_str) as eligible_agent:
+            eligible_agent.return_value = True
             with self.port() as port:
                 dhcp_agents = self._list_dhcp_agents_hosting_network(
                     port['port']['network_id'])
             self._delete('ports', port['port']['id'])
             self._delete('networks', port['port']['network_id'])
             self.assertEqual(1, len(dhcp_agents['agents']))
-        with mock.patch(is_agent_down_str) as mock_is_agent_down:
-            mock_is_agent_down.return_value = True
+
+        with mock.patch(eligible_agent_str) as eligible_agent:
+            eligible_agent.return_value = False
             with self.port() as port:
                 dhcp_agents = self._list_dhcp_agents_hosting_network(
                     port['port']['network_id'])
index 0ad47e2d5a285f922a3db232475bd186ed9e5ccd..e8c06ce81c8a36cbfe748cdb43335442d9d60d33 100644 (file)
@@ -243,7 +243,7 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase,
                                              dhcp_agent={'id': 'id2'}),
             sched_db.NetworkDhcpAgentBinding(network_id='foo4',
                                              dhcp_agent={'id': 'id2'})]
-        with mock.patch.object(self, '_agent_starting_up',
+        with mock.patch.object(self, 'agent_starting_up',
                                side_effect=[True, False]):
             res = [b for b in self._filter_bindings(None, bindings)]
             # once per each agent id1 and id2