From 2797efc39faca97039714d3ffb6520634bf65b74 Mon Sep 17 00:00:00 2001 From: Sudipta Biswas Date: Wed, 18 Mar 2015 23:35:57 +0530 Subject: [PATCH] Add clock sync error detection on agent registration For the server to determine if an agent is alive or not, it depends on the agent's clock being mostly in sync with the server clock. The neutron-server may reject and return the request if there's a timestamp difference between the two nodes. Currently there's no good way to detect this condition from the agent code. This fix will improve the error handling logic by writing an appropriate log in the neutron server's log file for an early detection of the problem. Change-Id: If884f90c4b1786cfc63d3e2ff2d66f92122258c2 Closes-Bug: #1432582 --- neutron/db/agents_db.py | 37 +++++++++++++++++-- .../openvswitch/test_agent_scheduler.py | 35 ++++++++++++++++++ 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/neutron/db/agents_db.py b/neutron/db/agents_db.py index c9862d131..26a6212d5 100644 --- a/neutron/db/agents_db.py +++ b/neutron/db/agents_db.py @@ -29,7 +29,7 @@ from neutron.common import constants from neutron.db import model_base from neutron.db import models_v2 from neutron.extensions import agent as ext_agent -from neutron.i18n import _LW +from neutron.i18n import _LE, _LW from neutron import manager LOG = logging.getLogger(__name__) @@ -272,10 +272,41 @@ class AgentExtRpcCallback(object): """Report state from agent to server.""" time = kwargs['time'] time = timeutils.parse_strtime(time) + agent_state = kwargs['agent_state']['agent_state'] + self._check_clock_sync_on_agent_start(agent_state, time) if self.START_TIME > time: - LOG.debug("Message with invalid timestamp received") + time_agent = timeutils.isotime(time) + time_server = timeutils.isotime(self.START_TIME) + log_dict = {'agent_time': time_agent, 'server_time': time_server} + LOG.debug("Stale message received with timestamp: %(agent_time)s. " + "Skipping processing because it's older than the " + "server start timestamp: %(server_time)s", log_dict) return - agent_state = kwargs['agent_state']['agent_state'] if not self.plugin: self.plugin = manager.NeutronManager.get_plugin() self.plugin.create_or_update_agent(context, agent_state) + + def _check_clock_sync_on_agent_start(self, agent_state, agent_time): + """Checks if the server and the agent times are in sync. + + Method checks if the agent time is in sync with the server time + on start up. Ignores it, on subsequent re-connects. + """ + if agent_state.get('start_flag'): + time_server_now = timeutils.utcnow() + diff = abs((time_server_now - agent_time).seconds) + if diff > cfg.CONF.agent_down_time: + agent_name = agent_state['agent_type'] + time_agent = timeutils.isotime(agent_time) + host = agent_state['host'] + log_dict = {'host': host, + 'agent_name': agent_name, + 'agent_time': time_agent, + 'threshold': cfg.CONF.agent_down_time, + 'serv_time': timeutils.isotime(time_server_now)} + LOG.error(_LE("Message received from the host: %(host)s " + "during the registration of %(agent_name)s has " + "a timestamp: %(agent_time)s. This differs from " + "the current server timestamp: %(serv_time)s by " + "more than the threshold agent down" + "time: %(threshold)s."), log_dict) diff --git a/neutron/tests/unit/plugins/openvswitch/test_agent_scheduler.py b/neutron/tests/unit/plugins/openvswitch/test_agent_scheduler.py index 10eb7b209..6a21e3b2d 100644 --- a/neutron/tests/unit/plugins/openvswitch/test_agent_scheduler.py +++ b/neutron/tests/unit/plugins/openvswitch/test_agent_scheduler.py @@ -186,6 +186,41 @@ class AgentSchedulerTestMixIn(object): agent_state={'agent_state': agent_state}, time=timeutils.strtime()) + def test_agent_registration_bad_timestamp(self): + dhcp_hosta = { + 'binary': 'neutron-dhcp-agent', + 'host': DHCP_HOSTA, + 'start_flag': True, + 'topic': 'DHCP_AGENT', + 'configurations': {'dhcp_driver': 'dhcp_driver', + 'use_namespaces': True, + }, + 'agent_type': constants.AGENT_TYPE_DHCP} + callback = agents_db.AgentExtRpcCallback() + delta_time = datetime.datetime.now() - datetime.timedelta(days=1) + str_time = delta_time.strftime('%Y-%m-%dT%H:%M:%S.%f') + callback.report_state(self.adminContext, + agent_state={'agent_state': dhcp_hosta}, + time=str_time) + + def test_agent_registration_invalid_timestamp_allowed(self): + dhcp_hosta = { + 'binary': 'neutron-dhcp-agent', + 'host': DHCP_HOSTA, + 'start_flag': True, + 'topic': 'DHCP_AGENT', + 'configurations': {'dhcp_driver': 'dhcp_driver', + 'use_namespaces': True, + }, + 'agent_type': constants.AGENT_TYPE_DHCP} + callback = agents_db.AgentExtRpcCallback() + utc_time = datetime.datetime.utcnow() + delta_time = utc_time - datetime.timedelta(seconds=10) + str_time = delta_time.strftime('%Y-%m-%dT%H:%M:%S.%f') + callback.report_state(self.adminContext, + agent_state={'agent_state': dhcp_hosta}, + time=str_time) + def _disable_agent(self, agent_id, admin_state_up=False): new_agent = {} new_agent['agent'] = {} -- 2.45.2