From af03fdd41608c13afe314e4626c74ce1c7304b5f Mon Sep 17 00:00:00 2001 From: Adriano Rosso Date: Mon, 10 Aug 2015 16:19:31 -0300 Subject: [PATCH] Adds the random option to cinder retry function The current retry functionality in Cinder, always uses fixed numbers to define the time to wait before performing the next attempts. This behaviour can make the retries of multiple requisitions to collide in each attempt as they will wait the same amount of time and try to use the same resource together again. This patch changes the behaviour of cinder.utils.retry() to receive a parameter that allows the function to implement randomly generated wait intervals. Change-Id: If746434e4a19895d649529e9e9f267410a1a1c48 Closes-bug: 1486142 --- cinder/tests/unit/test_utils.py | 43 +++++++++++++++++++ cinder/utils.py | 18 ++++++-- cinder/volume/drivers/hitachi/hnas_backend.py | 6 ++- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/cinder/tests/unit/test_utils.py b/cinder/tests/unit/test_utils.py index 7d985f817..5f389c8cf 100644 --- a/cinder/tests/unit/test_utils.py +++ b/cinder/tests/unit/test_utils.py @@ -1391,6 +1391,24 @@ class TestRetryDecorator(test.TestCase): self.assertEqual('success', ret) self.assertEqual(1, self.counter) + def test_no_retry_required_random(self): + self.counter = 0 + + with mock.patch.object(time, 'sleep') as mock_sleep: + @utils.retry(exception.VolumeBackendAPIException, + interval=2, + retries=3, + backoff_rate=2, + wait_random=True) + def succeeds(): + self.counter += 1 + return 'success' + + ret = succeeds() + self.assertFalse(mock_sleep.called) + self.assertEqual('success', ret) + self.assertEqual(1, self.counter) + def test_retries_once(self): self.counter = 0 interval = 2 @@ -1415,6 +1433,31 @@ class TestRetryDecorator(test.TestCase): self.assertEqual(1, mock_sleep.call_count) mock_sleep.assert_called_with(interval * backoff_rate) + def test_retries_once_random(self): + self.counter = 0 + interval = 2 + backoff_rate = 2 + retries = 3 + + with mock.patch.object(time, 'sleep') as mock_sleep: + @utils.retry(exception.VolumeBackendAPIException, + interval, + retries, + backoff_rate, + wait_random=True) + def fails_once(): + self.counter += 1 + if self.counter < 2: + raise exception.VolumeBackendAPIException(data='fake') + else: + return 'success' + + ret = fails_once() + self.assertEqual('success', ret) + self.assertEqual(2, self.counter) + self.assertEqual(1, mock_sleep.call_count) + self.assertTrue(mock_sleep.called) + def test_limit_is_reached(self): self.counter = 0 retries = 3 diff --git a/cinder/utils.py b/cinder/utils.py index fd617f022..4bfdcfec0 100644 --- a/cinder/utils.py +++ b/cinder/utils.py @@ -27,6 +27,7 @@ import inspect import logging as py_logging import os import pyclbr +import random import re import shutil import stat @@ -768,16 +769,25 @@ def is_blk_device(dev): return False -def retry(exceptions, interval=1, retries=3, backoff_rate=2): +def retry(exceptions, interval=1, retries=3, backoff_rate=2, + wait_random=False): def _retry_on_exception(e): return isinstance(e, exceptions) def _backoff_sleep(previous_attempt_number, delay_since_first_attempt_ms): exp = backoff_rate ** previous_attempt_number - wait_for = max(0, interval * exp) - LOG.debug("Sleeping for %s seconds", wait_for) - return wait_for * 1000.0 + wait_for = interval * exp + + if wait_random: + random.seed() + wait_val = random.randrange(interval * 1000.0, wait_for * 1000.0) + else: + wait_val = wait_for * 1000.0 + + LOG.debug("Sleeping for %s seconds", (wait_val / 1000.0)) + + return wait_val def _print_stop(previous_attempt_number, delay_since_first_attempt_ms): delay_since_first_attempt = delay_since_first_attempt_ms / 1000.0 diff --git a/cinder/volume/drivers/hitachi/hnas_backend.py b/cinder/volume/drivers/hitachi/hnas_backend.py index 6b2e434af..f13dde7c3 100644 --- a/cinder/volume/drivers/hitachi/hnas_backend.py +++ b/cinder/volume/drivers/hitachi/hnas_backend.py @@ -40,7 +40,8 @@ class HnasBackend(object): self.drv_configs = drv_configs self.sshpool = None - @utils.retry(exceptions=exception.HNASConnError, retries=HNAS_SSC_RETRIES) + @utils.retry(exceptions=exception.HNASConnError, retries=HNAS_SSC_RETRIES, + wait_random=True) def run_cmd(self, cmd, ip0, user, pw, *args, **kwargs): """Run a command on SMU or using SSH @@ -560,7 +561,8 @@ class HnasBackend(object): LOG.debug('extend_vol: %s.', out) return out - @utils.retry(putils.ProcessExecutionError, retries=HNAS_SSC_RETRIES) + @utils.retry(putils.ProcessExecutionError, retries=HNAS_SSC_RETRIES, + wait_random=True) def add_iscsi_conn(self, cmd, ip0, user, pw, lun_name, hdp, port, tgtalias, initiator): """Setup the lun on on the specified target port -- 2.45.2