]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Big Switch: Retry on 503 errors from backend
authorKevin Benton <blak111@gmail.com>
Wed, 13 Aug 2014 11:31:45 +0000 (04:31 -0700)
committerKevin Benton <blak111@gmail.com>
Tue, 2 Sep 2014 12:04:13 +0000 (05:04 -0700)
Retries requests to the backend controller up to 3
additional times with 3 seconds in between each request
if a 503 service unavailable message was returned.
The scenarios that return 503 messages from floodlight
are normally short lived locks for things like OpenStack
synchronization or upgrade blocks. Retrying should work
in the majority of cases.

Closes-Bug: #1357105
Change-Id: Ifacd3a384cfc797ba6d6af5f3c8649c333473259

neutron/plugins/bigswitch/servermanager.py
neutron/tests/unit/bigswitch/test_servermanager.py

index bc070c2e75a451cb7fc48ef000f9de38e9e6226c..db4c377882a8d11677d1a4fcc11c29331239efeb 100644 (file)
@@ -35,6 +35,7 @@ import httplib
 import os
 import socket
 import ssl
+import time
 import weakref
 
 import eventlet
@@ -72,6 +73,8 @@ ORCHESTRATION_SERVICE_ID = 'Neutron v2.0'
 HASH_MATCH_HEADER = 'X-BSN-BVS-HASH-MATCH'
 # error messages
 NXNETWORK = 'NXVNS'
+HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT = 3
+HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL = 3
 
 
 class RemoteRestError(exceptions.NeutronException):
@@ -417,10 +420,15 @@ class ServerPool(object):
         good_first = sorted(self.servers, key=lambda x: x.failed)
         first_response = None
         for active_server in good_first:
-            ret = active_server.rest_call(action, resource, data, headers,
-                                          timeout,
-                                          reconnect=self.always_reconnect,
-                                          hash_handler=hash_handler)
+            for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1):
+                ret = active_server.rest_call(action, resource, data, headers,
+                                              timeout,
+                                              reconnect=self.always_reconnect,
+                                              hash_handler=hash_handler)
+                if ret[0] != httplib.SERVICE_UNAVAILABLE:
+                    break
+                time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL)
+
             # If inconsistent, do a full synchronization
             if ret[0] == httplib.CONFLICT:
                 if not self.get_topo_function:
index ef9e4af23950d85ebfe5e777282ca08bab11a16c..5255cd9c0979b62351a820a68a841979dce375ca 100644 (file)
@@ -373,6 +373,26 @@ class ServerManagerTests(test_rp.BigSwitchProxyPluginV2TestCase):
         self.assertFalse(pl.servers.server_failure((404,),
                                                    ignore_codes=[404]))
 
+    def test_retry_on_unavailable(self):
+        pl = manager.NeutronManager.get_plugin()
+        with contextlib.nested(
+            mock.patch(SERVERMANAGER + '.ServerProxy.rest_call',
+                       return_value=(httplib.SERVICE_UNAVAILABLE, 0, 0, 0)),
+            mock.patch(SERVERMANAGER + '.time.sleep')
+        ) as (srestmock, tmock):
+            # making a call should trigger retries with sleeps in between
+            pl.servers.rest_call('GET', '/', '', None, [])
+            rest_call = [mock.call('GET', '/', '', None, False, reconnect=True,
+                                   hash_handler=mock.ANY)]
+            rest_call_count = (
+                servermanager.HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1)
+            srestmock.assert_has_calls(rest_call * rest_call_count)
+            sleep_call = [mock.call(
+                servermanager.HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL)]
+            # should sleep 1 less time than the number of calls
+            sleep_call_count = rest_call_count - 1
+            tmock.assert_has_calls(sleep_call * sleep_call_count)
+
     def test_conflict_triggers_sync(self):
         pl = manager.NeutronManager.get_plugin()
         with mock.patch(