]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Catch broad exception in methods used in FixedIntervalLoopingCall
authorEugene Nikanorov <enikanorov@mirantis.com>
Tue, 26 May 2015 16:17:20 +0000 (20:17 +0400)
committerEugene Nikanorov <enikanorov@mirantis.com>
Tue, 2 Jun 2015 23:42:30 +0000 (03:42 +0400)
Unlike other places where it might make sense to catch specific
exceptions, methods that are used to check L3 and DHCP agents
liveness via FixedIntervalLoopingCall should never allow exceptions
to leak to calling method and interrupt the loop.

Further improvement of FixedIntervalLoopingCall might be needed,
but for the sake of easy backporting it makes sense to fix the issue
in neutron before pushing refactoring to 3rd-party library.

Change-Id: I6a61e99a6f4e445e26ea4a9923b47e35559e5703
Closes-Bug: #1458119

neutron/db/agentschedulers_db.py
neutron/db/l3_agentschedulers_db.py
neutron/tests/unit/plugins/openvswitch/test_agent_scheduler.py
neutron/tests/unit/scheduler/test_dhcp_agent_scheduler.py

index bac33a78f6bd7bae33fb25ce05797f11d432c748..61eff9b07cbbe77ec25c9f2a1cc52070aa5f9ef5 100644 (file)
@@ -270,40 +270,47 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
                    agents_db.Agent.admin_state_up))
         dhcp_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_DHCP)
 
-        for binding in self._filter_bindings(context, down_bindings):
-            LOG.warn(_LW("Removing network %(network)s from agent %(agent)s "
-                         "because the agent did not report to the server in "
-                         "the last %(dead_time)s seconds."),
-                     {'network': binding.network_id,
-                      'agent': binding.dhcp_agent_id,
-                      'dead_time': agent_dead_limit})
-            # save binding object to avoid ObjectDeletedError
-            # in case binding is concurrently deleted from the DB
-            saved_binding = {'net': binding.network_id,
-                             'agent': binding.dhcp_agent_id}
-            try:
-                # do not notify agent if it considered dead
-                # so when it is restarted it won't see network delete
-                # notifications on its queue
-                self.remove_network_from_dhcp_agent(context,
-                                                    binding.dhcp_agent_id,
-                                                    binding.network_id,
-                                                    notify=False)
-            except dhcpagentscheduler.NetworkNotHostedByDhcpAgent:
-                # measures against concurrent operation
-                LOG.debug("Network %(net)s already removed from DHCP agent "
-                          "%(agent)s",
-                          saved_binding)
-                # still continue and allow concurrent scheduling attempt
-            except Exception:
-                LOG.exception(_LE("Unexpected exception occurred while "
-                                  "removing network %(net)s from agent "
-                                  "%(agent)s"),
+        try:
+            for binding in self._filter_bindings(context, down_bindings):
+                LOG.warn(_LW("Removing network %(network)s from agent "
+                             "%(agent)s because the agent did not report "
+                             "to the server in the last %(dead_time)s "
+                             "seconds."),
+                         {'network': binding.network_id,
+                          'agent': binding.dhcp_agent_id,
+                          'dead_time': agent_dead_limit})
+                # save binding object to avoid ObjectDeletedError
+                # in case binding is concurrently deleted from the DB
+                saved_binding = {'net': binding.network_id,
+                                 'agent': binding.dhcp_agent_id}
+                try:
+                    # do not notify agent if it considered dead
+                    # so when it is restarted it won't see network delete
+                    # notifications on its queue
+                    self.remove_network_from_dhcp_agent(context,
+                                                        binding.dhcp_agent_id,
+                                                        binding.network_id,
+                                                        notify=False)
+                except dhcpagentscheduler.NetworkNotHostedByDhcpAgent:
+                    # measures against concurrent operation
+                    LOG.debug("Network %(net)s already removed from DHCP "
+                              "agent %(agent)s",
                               saved_binding)
-
-            if cfg.CONF.network_auto_schedule:
-                self._schedule_network(
-                    context, saved_binding['net'], dhcp_notifier)
+                    # still continue and allow concurrent scheduling attempt
+                except Exception:
+                    LOG.exception(_LE("Unexpected exception occurred while "
+                                      "removing network %(net)s from agent "
+                                      "%(agent)s"),
+                                  saved_binding)
+
+                if cfg.CONF.network_auto_schedule:
+                    self._schedule_network(
+                        context, saved_binding['net'], dhcp_notifier)
+        except Exception:
+            # we want to be thorough and catch whatever is raised
+            # to avoid loop abortion
+            LOG.exception(_LE("Exception encountered during network "
+                              "rescheduling"))
 
     def get_dhcp_agents_hosting_networks(
             self, context, network_ids, active=None, admin_state_up=None):
index f661dcc62217ae7e62c7741ea95d2c749fd7d391..59c397718525285cb85f31a40d2fd0b9e9a2f929 100644 (file)
@@ -116,9 +116,9 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
                     # so one broken one doesn't stop the iteration.
                     LOG.exception(_LE("Failed to reschedule router %s"),
                                   binding.router_id)
-        except db_exc.DBError:
-            # Catch DB errors here so a transient DB connectivity issue
-            # doesn't stop the loopingcall.
+        except Exception:
+            # we want to be thorough and catch whatever is raised
+            # to avoid loop abortion
             LOG.exception(_LE("Exception encountered during router "
                               "rescheduling."))
 
index 2682014d0af23c1fefdd80b3582b779f6154a165..5f14037fd0e0ee6f1e908cd5931759f8a7096ef6 100644 (file)
@@ -674,17 +674,14 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
                     db_exc.DBError(), oslo_messaging.RemoteError(),
                     l3agentscheduler.RouterReschedulingFailed(router_id='f',
                                                               agent_id='f'),
-                    ValueError('this raises')
+                    ValueError('this raises'),
+                    Exception()
                 ]).start()
-            # these first three should not raise any errors
             self._take_down_agent_and_run_reschedule(L3_HOSTA)  # DBError
             self._take_down_agent_and_run_reschedule(L3_HOSTA)  # RemoteError
             self._take_down_agent_and_run_reschedule(L3_HOSTA)  # schedule err
-
-            # ValueError is not caught so it should raise
-            self.assertRaises(ValueError,
-                              self._take_down_agent_and_run_reschedule,
-                              L3_HOSTA)
+            self._take_down_agent_and_run_reschedule(L3_HOSTA)  # Value error
+            self._take_down_agent_and_run_reschedule(L3_HOSTA)  # Exception
 
     def test_router_rescheduler_iterates_after_reschedule_failure(self):
         plugin = manager.NeutronManager.get_service_plugins().get(
index 9faa8ab5bfb27ef4a3aa6371f92fec1e66af8a0e..5ee1adb16cd4a2338544bd379f826a3d00133624 100644 (file)
@@ -248,6 +248,15 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase,
             self.assertIn('foo3', res_ids)
             self.assertIn('foo4', res_ids)
 
+    def test_reschedule_network_from_down_agent_failed_on_unexpected(self):
+        agents = self._create_and_set_agents_down(['host-a'], 1)
+        self._test_schedule_bind_network([agents[0]], self.network_id)
+        with mock.patch.object(
+            self, '_filter_bindings',
+            side_effect=Exception()):
+            # just make sure that no exception is raised
+            self.remove_networks_from_down_agents()
+
 
 class DHCPAgentWeightSchedulerTestCase(TestDhcpSchedulerBaseTestCase):
     """Unit test scenarios for WeightScheduler.schedule."""