From: Eugene Nikanorov Date: Tue, 3 Feb 2015 14:47:46 +0000 (+0300) Subject: Improve robustness of network failover X-Git-Url: https://review.fuel-infra.org/gitweb?a=commitdiff_plain;h=ef7e6a8553954aeb4444c90f65ac3fece8be201a;p=openstack-build%2Fneutron-build.git Improve robustness of network failover In some cases when remove_network_from_dhcp_agent is called concurrently by more than one neutron server it leads to unexpected DB exception when NetworkDhcpAgentBinding is removed. Need to avoid such case and make remove_networks_from_down_agents method resistant to such issues by catching broad exception. Change-Id: I653e200d89eb795ded742bb49420f09f66625587 Closes-Bug: #1417629 --- diff --git a/neutron/db/agentschedulers_db.py b/neutron/db/agentschedulers_db.py index 0f011306d..5b4af8df3 100644 --- a/neutron/db/agentschedulers_db.py +++ b/neutron/db/agentschedulers_db.py @@ -261,10 +261,16 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler except dhcpagentscheduler.NetworkNotHostedByDhcpAgent: # measures against concurrent operation LOG.debug("Network %(net)s already removed from DHCP agent " - "%s(agent)", + "%(agent)s", {'net': binding.network_id, 'agent': binding.dhcp_agent_id}) # still continue and allow concurrent scheduling attempt + except Exception: + LOG.exception(_LE("Unexpected exception occured while " + "removing network %(net)s from agent " + "%(agent)s"), + {'net': binding.network_id, + 'agent': binding.dhcp_agent_id}) if cfg.CONF.network_auto_schedule: self._schedule_network( @@ -317,9 +323,11 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler with context.session.begin(subtransactions=True): try: query = context.session.query(NetworkDhcpAgentBinding) - binding = query.filter( + query = query.filter( NetworkDhcpAgentBinding.network_id == network_id, - NetworkDhcpAgentBinding.dhcp_agent_id == id).one() + NetworkDhcpAgentBinding.dhcp_agent_id == id) + # just ensure the binding exists + query.one() except exc.NoResultFound: raise dhcpagentscheduler.NetworkNotHostedByDhcpAgent( network_id=network_id, agent_id=id) @@ -332,8 +340,8 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler for port in ports: port['device_id'] = constants.DEVICE_ID_RESERVED_DHCP_PORT self.update_port(context, port['id'], dict(port=port)) + query.delete() - context.session.delete(binding) dhcp_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_DHCP) if dhcp_notifier: dhcp_notifier.network_removed_from_agent( diff --git a/neutron/tests/unit/test_dhcp_scheduler.py b/neutron/tests/unit/test_dhcp_scheduler.py index 4518f764c..b7d7cc786 100644 --- a/neutron/tests/unit/test_dhcp_scheduler.py +++ b/neutron/tests/unit/test_dhcp_scheduler.py @@ -252,3 +252,15 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase, res_ids = [b.network_id for b in res] self.assertIn('foo3', res_ids) self.assertIn('foo4', res_ids) + + def test_remove_networks_from_down_agents_catches_all(self): + with contextlib.nested( + mock.patch.object( + self, 'remove_network_from_dhcp_agent', + side_effect=Exception("Unexpected exception!")), + mock.patch.object( + self, '_filter_bindings', + return_value=[sched_db.NetworkDhcpAgentBinding( + network_id='foo', dhcp_agent_id='bar')]) + ): + self.remove_networks_from_down_agents()