From 6e79981b7caadbbbb2119461034dfe7b4d1c1a64 Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Fri, 12 Sep 2014 16:31:44 +0100 Subject: [PATCH] Retry getting the list of service plugins On systems that start both neutron-server and neutron-l3-agent together, there is a chance that the first call to neutron will timeout. Retry upto 4 more times to avoid the l3 agent exiting on startup. This should make the l3 agent a little more robust on startup but still not ideal, ideally it wouldn't exit and retry periodically. Change-Id: I2171a164f3f77bccd89895d73c1c8d67f7190488 Closes-Bug: #1353953 Closes-Bug: #1368152 Closes-Bug: #1368795 (cherry picked from commit e7f0b56d74fbfbb08a3b7a0d2da4cefb6fe2aa67) --- neutron/agent/l3_agent.py | 42 +++++++++++++++++++++-------- neutron/tests/unit/test_l3_agent.py | 21 +++++++++++++++ 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/neutron/agent/l3_agent.py b/neutron/agent/l3_agent.py index 308ab57ed..b3c63eefb 100644 --- a/neutron/agent/l3_agent.py +++ b/neutron/agent/l3_agent.py @@ -22,6 +22,7 @@ eventlet.monkey_patch() import netaddr import os from oslo.config import cfg +from oslo import messaging import Queue from neutron.agent.common import config @@ -41,6 +42,7 @@ from neutron.common import utils as common_utils from neutron import context from neutron import manager from neutron.openstack.common import excutils +from neutron.openstack.common.gettextutils import _LW from neutron.openstack.common import importutils from neutron.openstack.common import log as logging from neutron.openstack.common import loopingcall @@ -526,17 +528,35 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, self.sync_progress = False # Get the list of service plugins from Neutron Server - try: - self.neutron_service_plugins = ( - self.plugin_rpc.get_service_plugin_list(self.context)) - except n_rpc.RemoteError as e: - LOG.warning(_('l3-agent cannot check service plugins ' - 'enabled at the neutron server when startup ' - 'due to RPC error. It happens when the server ' - 'does not support this RPC API. If the error ' - 'is UnsupportedVersion you can ignore ' - 'this warning. Detail message: %s'), e) - self.neutron_service_plugins = None + # This is the first place where we contact neutron-server on startup + # so retry in case its not ready to respond. + retry_count = 5 + while True: + retry_count = retry_count - 1 + try: + self.neutron_service_plugins = ( + self.plugin_rpc.get_service_plugin_list(self.context)) + except n_rpc.RemoteError as e: + with excutils.save_and_reraise_exception() as ctx: + ctx.reraise = False + LOG.warning(_LW('l3-agent cannot check service plugins ' + 'enabled at the neutron server when ' + 'startup due to RPC error. It happens ' + 'when the server does not support this ' + 'RPC API. If the error is ' + 'UnsupportedVersion you can ignore this ' + 'warning. Detail message: %s'), e) + self.neutron_service_plugins = None + except messaging.MessagingTimeout as e: + with excutils.save_and_reraise_exception() as ctx: + if retry_count > 0: + ctx.reraise = False + LOG.warning(_LW('l3-agent cannot check service ' + 'plugins enabled on the neutron ' + 'server. Retrying. ' + 'Detail message: %s'), e) + continue + break self._clean_stale_namespaces = self.conf.use_namespaces diff --git a/neutron/tests/unit/test_l3_agent.py b/neutron/tests/unit/test_l3_agent.py index 3ccdd597e..3d2d33c52 100644 --- a/neutron/tests/unit/test_l3_agent.py +++ b/neutron/tests/unit/test_l3_agent.py @@ -20,6 +20,7 @@ import datetime import mock import netaddr from oslo.config import cfg +from oslo import messaging from testtools import matchers from neutron.agent.common import config as agent_config @@ -2260,6 +2261,26 @@ vrrp_instance VR_1 { self.assertIsNone(agent.neutron_service_plugins) self.assertTrue(self.plugin_api.get_service_plugin_list.called) + def test_get_service_plugin_list_retried(self): + raise_timeout = messaging.MessagingTimeout() + # Raise a timeout the first 2 times it calls + # get_service_plugin_list then return a empty tuple + self.plugin_api.get_service_plugin_list.side_effect = ( + raise_timeout, raise_timeout, tuple() + ) + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + + self.assertEqual(agent.neutron_service_plugins, tuple()) + + def test_get_service_plugin_list_retried_max(self): + raise_timeout = messaging.MessagingTimeout() + # Raise a timeout 5 times + self.plugin_api.get_service_plugin_list.side_effect = ( + (raise_timeout, ) * 5 + ) + self.assertRaises(messaging.MessagingTimeout, l3_agent.L3NATAgent, + HOSTNAME, self.conf) + class TestL3AgentEventHandler(base.BaseTestCase): -- 2.45.2