The current retry functionality in Cinder, always uses fixed
numbers to define the time to wait before performing the next
attempts. This behaviour can make the retries of multiple
requisitions to collide in each attempt as they will wait the
same amount of time and try to use the same resource together
again.
This patch changes the behaviour of cinder.utils.retry() to
receive a parameter that allows the function to implement
randomly generated wait intervals.
Change-Id: If746434e4a19895d649529e9e9f267410a1a1c48
Closes-bug:
1486142
self.assertEqual('success', ret)
self.assertEqual(1, self.counter)
+ def test_no_retry_required_random(self):
+ self.counter = 0
+
+ with mock.patch.object(time, 'sleep') as mock_sleep:
+ @utils.retry(exception.VolumeBackendAPIException,
+ interval=2,
+ retries=3,
+ backoff_rate=2,
+ wait_random=True)
+ def succeeds():
+ self.counter += 1
+ return 'success'
+
+ ret = succeeds()
+ self.assertFalse(mock_sleep.called)
+ self.assertEqual('success', ret)
+ self.assertEqual(1, self.counter)
+
def test_retries_once(self):
self.counter = 0
interval = 2
self.assertEqual(1, mock_sleep.call_count)
mock_sleep.assert_called_with(interval * backoff_rate)
+ def test_retries_once_random(self):
+ self.counter = 0
+ interval = 2
+ backoff_rate = 2
+ retries = 3
+
+ with mock.patch.object(time, 'sleep') as mock_sleep:
+ @utils.retry(exception.VolumeBackendAPIException,
+ interval,
+ retries,
+ backoff_rate,
+ wait_random=True)
+ def fails_once():
+ self.counter += 1
+ if self.counter < 2:
+ raise exception.VolumeBackendAPIException(data='fake')
+ else:
+ return 'success'
+
+ ret = fails_once()
+ self.assertEqual('success', ret)
+ self.assertEqual(2, self.counter)
+ self.assertEqual(1, mock_sleep.call_count)
+ self.assertTrue(mock_sleep.called)
+
def test_limit_is_reached(self):
self.counter = 0
retries = 3
import logging as py_logging
import os
import pyclbr
+import random
import re
import shutil
import stat
return False
-def retry(exceptions, interval=1, retries=3, backoff_rate=2):
+def retry(exceptions, interval=1, retries=3, backoff_rate=2,
+ wait_random=False):
def _retry_on_exception(e):
return isinstance(e, exceptions)
def _backoff_sleep(previous_attempt_number, delay_since_first_attempt_ms):
exp = backoff_rate ** previous_attempt_number
- wait_for = max(0, interval * exp)
- LOG.debug("Sleeping for %s seconds", wait_for)
- return wait_for * 1000.0
+ wait_for = interval * exp
+
+ if wait_random:
+ random.seed()
+ wait_val = random.randrange(interval * 1000.0, wait_for * 1000.0)
+ else:
+ wait_val = wait_for * 1000.0
+
+ LOG.debug("Sleeping for %s seconds", (wait_val / 1000.0))
+
+ return wait_val
def _print_stop(previous_attempt_number, delay_since_first_attempt_ms):
delay_since_first_attempt = delay_since_first_attempt_ms / 1000.0
self.drv_configs = drv_configs
self.sshpool = None
- @utils.retry(exceptions=exception.HNASConnError, retries=HNAS_SSC_RETRIES)
+ @utils.retry(exceptions=exception.HNASConnError, retries=HNAS_SSC_RETRIES,
+ wait_random=True)
def run_cmd(self, cmd, ip0, user, pw, *args, **kwargs):
"""Run a command on SMU or using SSH
LOG.debug('extend_vol: %s.', out)
return out
- @utils.retry(putils.ProcessExecutionError, retries=HNAS_SSC_RETRIES)
+ @utils.retry(putils.ProcessExecutionError, retries=HNAS_SSC_RETRIES,
+ wait_random=True)
def add_iscsi_conn(self, cmd, ip0, user, pw, lun_name, hdp,
port, tgtalias, initiator):
"""Setup the lun on on the specified target port