From e801eb29d79a8d1f689394756c7870b11ae75c1b Mon Sep 17 00:00:00 2001 From: Stephen Ma Date: Thu, 4 Jun 2015 20:09:23 +0000 Subject: [PATCH] L3 agent should do report state before full sync at start Sometimes the AgentNotFoundByTypeHost exception is reported during L3-agent startup. The exception is generated when the first get_routers RPC call is made. When the neutron server gets this RPC call, it might not have handled the report state RPC call yet. So the L3-agent hasn't been registered in the API server. The result is a RPC Error exception. By the time the next get_routers RPC call is made, the report state RPC call has already been done and agent registered. This patch modifies the L3 agent startup behavior to have the report state done before the agent do the sync routers RPC call. Closes-bug: 1456822 Change-Id: Id40cfd8466f45e20fea0e9df6fd57bf9c9e59da7 (cherry picked from commit b649b9c871d0734745da5a201eca83a6b407a1c5) --- neutron/agent/l3/agent.py | 16 +++++++++++++++- neutron/tests/unit/agent/l3/test_agent.py | 20 ++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/neutron/agent/l3/agent.py b/neutron/agent/l3/agent.py index b74d07ed2..6e052a18d 100644 --- a/neutron/agent/l3/agent.py +++ b/neutron/agent/l3/agent.py @@ -542,6 +542,11 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, self._queue.add(update) def after_start(self): + # Note: the FWaaS' vArmourL3NATAgent is a subclass of L3NATAgent. It + # calls this method here. So Removing this after_start() would break + # vArmourL3NATAgent. We need to find out whether vArmourL3NATAgent + # can have L3NATAgentWithStateReport as its base class instead of + # L3NATAgent. eventlet.spawn_n(self._process_routers_loop) LOG.info(_LI("L3 agent started")) # When L3 agent is ready, we immediately do a full sync @@ -551,6 +556,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, class L3NATAgentWithStateReport(L3NATAgent): def __init__(self, host, conf=None): + self.use_call = True super(L3NATAgentWithStateReport, self).__init__(host=host, conf=conf) self.state_rpc = agent_rpc.PluginReportStateAPI(topics.PLUGIN) self.agent_state = { @@ -570,7 +576,6 @@ class L3NATAgentWithStateReport(L3NATAgent): 'start_flag': True, 'agent_type': l3_constants.AGENT_TYPE_L3} report_interval = self.conf.AGENT.report_interval - self.use_call = True if report_interval: self.heartbeat = loopingcall.FixedIntervalLoopingCall( self._report_state) @@ -611,6 +616,15 @@ class L3NATAgentWithStateReport(L3NATAgent): except Exception: LOG.exception(_LE("Failed reporting state!")) + def after_start(self): + eventlet.spawn_n(self._process_routers_loop) + LOG.info(_LI("L3 agent started")) + # Do the report state before we do the first full sync. + self._report_state() + + # When L3 agent is ready, we immediately do a full sync + self.periodic_sync_routers_task(self.context) + def agent_updated(self, context, payload): """Handle the agent_updated notification event.""" self.fullsync = True diff --git a/neutron/tests/unit/agent/l3/test_agent.py b/neutron/tests/unit/agent/l3/test_agent.py index ecbf58f34..6f74c6d22 100644 --- a/neutron/tests/unit/agent/l3/test_agent.py +++ b/neutron/tests/unit/agent/l3/test_agent.py @@ -39,6 +39,7 @@ from neutron.agent.linux import external_process from neutron.agent.linux import interface from neutron.agent.linux import ra from neutron.agent.metadata import driver as metadata_driver +from neutron.agent import rpc as agent_rpc from neutron.callbacks import manager from neutron.callbacks import registry from neutron.common import config as base_config @@ -282,6 +283,7 @@ class BasicRouterOperationsFramework(base.BaseTestCase): self.conf = agent_config.setup_conf() self.conf.register_opts(base_config.core_opts) log.register_options(self.conf) + self.conf.register_opts(agent_config.AGENT_STATE_OPTS, 'AGENT') self.conf.register_opts(l3_config.OPTS) self.conf.register_opts(ha.OPTS) agent_config.register_interface_driver_opts_helper(self.conf) @@ -419,6 +421,24 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): agent.after_start() router_sync.assert_called_once_with(agent.context) + def test_l3_initial_report_state_done(self): + with mock.patch.object(l3_agent.L3NATAgentWithStateReport, + 'periodic_sync_routers_task'),\ + mock.patch.object(agent_rpc.PluginReportStateAPI, + 'report_state') as report_state,\ + mock.patch.object(eventlet, 'spawn_n'): + + agent = l3_agent.L3NATAgentWithStateReport(host=HOSTNAME, + conf=self.conf) + + self.assertEqual(agent.agent_state['start_flag'], True) + use_call_arg = agent.use_call + agent.after_start() + report_state.assert_called_once_with(agent.context, + agent.agent_state, + use_call_arg) + self.assertTrue(agent.agent_state.get('start_flag') is None) + def test_periodic_sync_routers_task_call_clean_stale_namespaces(self): agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) self.plugin_api.get_routers.return_value = [] -- 2.45.2