From: Derek Higgins Date: Fri, 12 Sep 2014 15:31:44 +0000 (+0100) Subject: Retry getting the list of service plugins X-Git-Url: https://review.fuel-infra.org/gitweb?a=commitdiff_plain;h=e7f0b56d74fbfbb08a3b7a0d2da4cefb6fe2aa67;p=openstack-build%2Fneutron-build.git Retry getting the list of service plugins On systems that start both neutron-server and neutron-l3-agent together, there is a chance that the first call to neutron will timeout. Retry upto 4 more times to avoid the l3 agent exiting on startup. This should make the l3 agent a little more robust on startup but still not ideal, ideally it wouldn't exit and retry periodically. Change-Id: I2171a164f3f77bccd89895d73c1c8d67f7190488 Closes-Bug: #1353953 Closes-Bug: #1368152 Closes-Bug: #1368795 --- diff --git a/neutron/agent/l3_agent.py b/neutron/agent/l3_agent.py index 6531a6bc6..f8b3f5694 100644 --- a/neutron/agent/l3_agent.py +++ b/neutron/agent/l3_agent.py @@ -22,6 +22,7 @@ eventlet.monkey_patch() import netaddr import os from oslo.config import cfg +from oslo import messaging import Queue from neutron.agent.common import config @@ -40,6 +41,7 @@ from neutron.common import utils as common_utils from neutron import context from neutron import manager from neutron.openstack.common import excutils +from neutron.openstack.common.gettextutils import _LW from neutron.openstack.common import importutils from neutron.openstack.common import log as logging from neutron.openstack.common import loopingcall @@ -521,17 +523,35 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager): self.sync_progress = False # Get the list of service plugins from Neutron Server - try: - self.neutron_service_plugins = ( - self.plugin_rpc.get_service_plugin_list(self.context)) - except n_rpc.RemoteError as e: - LOG.warning(_('l3-agent cannot check service plugins ' - 'enabled at the neutron server when startup ' - 'due to RPC error. It happens when the server ' - 'does not support this RPC API. If the error ' - 'is UnsupportedVersion you can ignore ' - 'this warning. Detail message: %s'), e) - self.neutron_service_plugins = None + # This is the first place where we contact neutron-server on startup + # so retry in case its not ready to respond. + retry_count = 5 + while True: + retry_count = retry_count - 1 + try: + self.neutron_service_plugins = ( + self.plugin_rpc.get_service_plugin_list(self.context)) + except n_rpc.RemoteError as e: + with excutils.save_and_reraise_exception() as ctx: + ctx.reraise = False + LOG.warning(_LW('l3-agent cannot check service plugins ' + 'enabled at the neutron server when ' + 'startup due to RPC error. It happens ' + 'when the server does not support this ' + 'RPC API. If the error is ' + 'UnsupportedVersion you can ignore this ' + 'warning. Detail message: %s'), e) + self.neutron_service_plugins = None + except messaging.MessagingTimeout as e: + with excutils.save_and_reraise_exception() as ctx: + if retry_count > 0: + ctx.reraise = False + LOG.warning(_LW('l3-agent cannot check service ' + 'plugins enabled on the neutron ' + 'server. Retrying. ' + 'Detail message: %s'), e) + continue + break self._clean_stale_namespaces = self.conf.use_namespaces diff --git a/neutron/tests/unit/test_l3_agent.py b/neutron/tests/unit/test_l3_agent.py index 42d0d09ef..93aa9e76f 100644 --- a/neutron/tests/unit/test_l3_agent.py +++ b/neutron/tests/unit/test_l3_agent.py @@ -20,6 +20,7 @@ import datetime import mock import netaddr from oslo.config import cfg +from oslo import messaging from testtools import matchers from neutron.agent.common import config as agent_config @@ -2140,6 +2141,26 @@ class TestBasicRouterOperations(base.BaseTestCase): self.assertIsNone(agent.neutron_service_plugins) self.assertTrue(self.plugin_api.get_service_plugin_list.called) + def test_get_service_plugin_list_retried(self): + raise_timeout = messaging.MessagingTimeout() + # Raise a timeout the first 2 times it calls + # get_service_plugin_list then return a empty tuple + self.plugin_api.get_service_plugin_list.side_effect = ( + raise_timeout, raise_timeout, tuple() + ) + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + + self.assertEqual(agent.neutron_service_plugins, tuple()) + + def test_get_service_plugin_list_retried_max(self): + raise_timeout = messaging.MessagingTimeout() + # Raise a timeout 5 times + self.plugin_api.get_service_plugin_list.side_effect = ( + (raise_timeout, ) * 5 + ) + self.assertRaises(messaging.MessagingTimeout, l3_agent.L3NATAgent, + HOSTNAME, self.conf) + class TestL3AgentEventHandler(base.BaseTestCase):