From: Eugene Nikanorov Date: Wed, 4 Feb 2015 12:05:36 +0000 (+0300) Subject: Unify logic that determines liveliness of DHCP agent X-Git-Url: https://review.fuel-infra.org/gitweb?a=commitdiff_plain;h=28fe7e85cc26d1a0254990ebee1bececae06f374;p=openstack-build%2Fneutron-build.git Unify logic that determines liveliness of DHCP agent For DHCP agents sometimes it's not enough to check agent's last heartbeat time because in its starting period the agent may fail to send state reports because it's busy processing networks. In rescheduling logic such DHCP agent is given additional time after start. Additional time is proportional to amount of networks the agent is hosting. Need to apply the same logic to DHCP agent scheduler to avoid a case when starting agent is considered dead and a network gets more hosting agents than configured. Change-Id: I0fe6244c7d2ed42e4744351be34f251318322c54 Closes-Bug: #1417708 --- diff --git a/neutron/db/agentschedulers_db.py b/neutron/db/agentschedulers_db.py index dd766cc76..b474d9965 100644 --- a/neutron/db/agentschedulers_db.py +++ b/neutron/db/agentschedulers_db.py @@ -156,7 +156,12 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler self.setup_agent_status_check(self.remove_networks_from_down_agents) - def _agent_starting_up(self, context, agent): + def is_eligible_agent(self, context, active, agent): + # eligible agent is active or starting up + return (AgentSchedulerDbMixin.is_eligible_agent(active, agent) or + self.agent_starting_up(context, agent)) + + def agent_starting_up(self, context, agent): """Check if agent was just started. Method returns True if agent is in its 'starting up' period. @@ -217,7 +222,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler for binding in bindings: agent_id = binding.dhcp_agent['id'] if agent_id not in checked_agents: - if self._agent_starting_up(context, binding.dhcp_agent): + if self.agent_starting_up(context, binding.dhcp_agent): # When agent starts and it has many networks to process # it may fail to send state reports in defined interval. # The server will consider it dead and try to remove @@ -287,8 +292,8 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler return [binding.dhcp_agent for binding in query - if AgentSchedulerDbMixin.is_eligible_agent(active, - binding.dhcp_agent)] + if self.is_eligible_agent(context, active, + binding.dhcp_agent)] def add_network_to_dhcp_agent(self, context, id, network_id): self._get_network(context, network_id) diff --git a/neutron/scheduler/dhcp_agent_scheduler.py b/neutron/scheduler/dhcp_agent_scheduler.py index 00c25aa80..67f0aca71 100644 --- a/neutron/scheduler/dhcp_agent_scheduler.py +++ b/neutron/scheduler/dhcp_agent_scheduler.py @@ -81,9 +81,8 @@ class ChanceScheduler(object): return active_dhcp_agents = [ agent for agent in set(enabled_dhcp_agents) - if not agents_db.AgentDbMixin.is_agent_down( - agent['heartbeat_timestamp']) - and agent not in dhcp_agents + if agent not in dhcp_agents and plugin.is_eligible_agent( + context, True, agent) ] if not active_dhcp_agents: LOG.warn(_LW('No more DHCP agents')) diff --git a/neutron/tests/unit/openvswitch/test_agent_scheduler.py b/neutron/tests/unit/openvswitch/test_agent_scheduler.py index 3d8266bdb..15d93a800 100644 --- a/neutron/tests/unit/openvswitch/test_agent_scheduler.py +++ b/neutron/tests/unit/openvswitch/test_agent_scheduler.py @@ -32,6 +32,7 @@ from neutron.api.v2 import attributes from neutron.common import constants from neutron import context from neutron.db import agents_db +from neutron.db import agentschedulers_db from neutron.db import l3_agentschedulers_db from neutron.extensions import agent from neutron.extensions import dhcpagentscheduler @@ -478,6 +479,27 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): self._delete('ports', port2['port']['id']) self.assertEqual(0, len(dhcp_agents['agents'])) + def test_is_eligible_agent(self): + agent_startup = ('neutron.db.agentschedulers_db.' + 'DhcpAgentSchedulerDbMixin.agent_starting_up') + is_eligible_agent = ('neutron.db.agentschedulers_db.' + 'AgentSchedulerDbMixin.is_eligible_agent') + dhcp_mixin = agentschedulers_db.DhcpAgentSchedulerDbMixin() + with contextlib.nested( + mock.patch(agent_startup), + mock.patch(is_eligible_agent) + ) as (startup, elig): + tests = [(True, True), + (True, False), + (False, True), + (False, False)] + for rv1, rv2 in tests: + startup.return_value = rv1 + elig.return_value = rv2 + self.assertEqual(rv1 or rv2, + dhcp_mixin.is_eligible_agent(None, + None, None)) + def test_network_scheduler_with_down_agent(self): dhcp_hosta = { 'binary': 'neutron-dhcp-agent', @@ -488,17 +510,19 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): }, 'agent_type': constants.AGENT_TYPE_DHCP} self._register_one_agent_state(dhcp_hosta) - is_agent_down_str = 'neutron.db.agents_db.AgentDbMixin.is_agent_down' - with mock.patch(is_agent_down_str) as mock_is_agent_down: - mock_is_agent_down.return_value = False + eligible_agent_str = ('neutron.db.agentschedulers_db.' + 'DhcpAgentSchedulerDbMixin.is_eligible_agent') + with mock.patch(eligible_agent_str) as eligible_agent: + eligible_agent.return_value = True with self.port() as port: dhcp_agents = self._list_dhcp_agents_hosting_network( port['port']['network_id']) self._delete('ports', port['port']['id']) self._delete('networks', port['port']['network_id']) self.assertEqual(1, len(dhcp_agents['agents'])) - with mock.patch(is_agent_down_str) as mock_is_agent_down: - mock_is_agent_down.return_value = True + + with mock.patch(eligible_agent_str) as eligible_agent: + eligible_agent.return_value = False with self.port() as port: dhcp_agents = self._list_dhcp_agents_hosting_network( port['port']['network_id']) diff --git a/neutron/tests/unit/test_dhcp_scheduler.py b/neutron/tests/unit/test_dhcp_scheduler.py index 0ad47e2d5..e8c06ce81 100644 --- a/neutron/tests/unit/test_dhcp_scheduler.py +++ b/neutron/tests/unit/test_dhcp_scheduler.py @@ -243,7 +243,7 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase, dhcp_agent={'id': 'id2'}), sched_db.NetworkDhcpAgentBinding(network_id='foo4', dhcp_agent={'id': 'id2'})] - with mock.patch.object(self, '_agent_starting_up', + with mock.patch.object(self, 'agent_starting_up', side_effect=[True, False]): res = [b for b in self._filter_bindings(None, bindings)] # once per each agent id1 and id2