]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Add clock sync error detection on agent registration
authorSudipta Biswas <sbiswas7@in.ibm.com>
Wed, 18 Mar 2015 18:05:57 +0000 (23:35 +0530)
committerSudipta Biswas <sbiswas7@in.ibm.com>
Tue, 14 Apr 2015 04:12:23 +0000 (09:42 +0530)
For the server to determine if an agent is alive or not,
it depends on the agent's clock being mostly in sync with the server
clock. The neutron-server may reject and return the request if
there's a timestamp difference between the two nodes. Currently
there's no good way to detect this condition from the agent code.

This fix will improve the error handling
logic by writing an appropriate log in the neutron server's log
file for an early detection of the problem.

Change-Id: If884f90c4b1786cfc63d3e2ff2d66f92122258c2
Closes-Bug: #1432582

neutron/db/agents_db.py
neutron/tests/unit/plugins/openvswitch/test_agent_scheduler.py

index c9862d131f4cec66fdfca6f94e5b5b56374f228b..26a6212d5731e71f161aa336e47f61daa89108be 100644 (file)
@@ -29,7 +29,7 @@ from neutron.common import constants
 from neutron.db import model_base
 from neutron.db import models_v2
 from neutron.extensions import agent as ext_agent
-from neutron.i18n import _LW
+from neutron.i18n import _LE, _LW
 from neutron import manager
 
 LOG = logging.getLogger(__name__)
@@ -272,10 +272,41 @@ class AgentExtRpcCallback(object):
         """Report state from agent to server."""
         time = kwargs['time']
         time = timeutils.parse_strtime(time)
+        agent_state = kwargs['agent_state']['agent_state']
+        self._check_clock_sync_on_agent_start(agent_state, time)
         if self.START_TIME > time:
-            LOG.debug("Message with invalid timestamp received")
+            time_agent = timeutils.isotime(time)
+            time_server = timeutils.isotime(self.START_TIME)
+            log_dict = {'agent_time': time_agent, 'server_time': time_server}
+            LOG.debug("Stale message received with timestamp: %(agent_time)s. "
+                      "Skipping processing because it's older than the "
+                      "server start timestamp: %(server_time)s", log_dict)
             return
-        agent_state = kwargs['agent_state']['agent_state']
         if not self.plugin:
             self.plugin = manager.NeutronManager.get_plugin()
         self.plugin.create_or_update_agent(context, agent_state)
+
+    def _check_clock_sync_on_agent_start(self, agent_state, agent_time):
+        """Checks if the server and the agent times are in sync.
+
+        Method checks if the agent time is in sync with the server time
+        on start up. Ignores it, on subsequent re-connects.
+        """
+        if agent_state.get('start_flag'):
+            time_server_now = timeutils.utcnow()
+            diff = abs((time_server_now - agent_time).seconds)
+            if diff > cfg.CONF.agent_down_time:
+                agent_name = agent_state['agent_type']
+                time_agent = timeutils.isotime(agent_time)
+                host = agent_state['host']
+                log_dict = {'host': host,
+                            'agent_name': agent_name,
+                            'agent_time': time_agent,
+                            'threshold': cfg.CONF.agent_down_time,
+                            'serv_time': timeutils.isotime(time_server_now)}
+                LOG.error(_LE("Message received from the host: %(host)s "
+                              "during the registration of %(agent_name)s has "
+                              "a timestamp: %(agent_time)s. This differs from "
+                              "the current server timestamp: %(serv_time)s by "
+                              "more than the threshold agent down"
+                              "time: %(threshold)s."), log_dict)
index 10eb7b20973be2cb0c1dd70c306b180a1eb30ced..6a21e3b2d71814dcf79bbdf5f61f029df9af3dba 100644 (file)
@@ -186,6 +186,41 @@ class AgentSchedulerTestMixIn(object):
                               agent_state={'agent_state': agent_state},
                               time=timeutils.strtime())
 
+    def test_agent_registration_bad_timestamp(self):
+        dhcp_hosta = {
+            'binary': 'neutron-dhcp-agent',
+            'host': DHCP_HOSTA,
+            'start_flag': True,
+            'topic': 'DHCP_AGENT',
+            'configurations': {'dhcp_driver': 'dhcp_driver',
+                               'use_namespaces': True,
+                               },
+            'agent_type': constants.AGENT_TYPE_DHCP}
+        callback = agents_db.AgentExtRpcCallback()
+        delta_time = datetime.datetime.now() - datetime.timedelta(days=1)
+        str_time = delta_time.strftime('%Y-%m-%dT%H:%M:%S.%f')
+        callback.report_state(self.adminContext,
+                              agent_state={'agent_state': dhcp_hosta},
+                              time=str_time)
+
+    def test_agent_registration_invalid_timestamp_allowed(self):
+        dhcp_hosta = {
+            'binary': 'neutron-dhcp-agent',
+            'host': DHCP_HOSTA,
+            'start_flag': True,
+            'topic': 'DHCP_AGENT',
+            'configurations': {'dhcp_driver': 'dhcp_driver',
+                               'use_namespaces': True,
+                               },
+            'agent_type': constants.AGENT_TYPE_DHCP}
+        callback = agents_db.AgentExtRpcCallback()
+        utc_time = datetime.datetime.utcnow()
+        delta_time = utc_time - datetime.timedelta(seconds=10)
+        str_time = delta_time.strftime('%Y-%m-%dT%H:%M:%S.%f')
+        callback.report_state(self.adminContext,
+                              agent_state={'agent_state': dhcp_hosta},
+                              time=str_time)
+
     def _disable_agent(self, agent_id, admin_state_up=False):
         new_agent = {}
         new_agent['agent'] = {}