From 5dd6350b3f80be9239ef9b9f9ff753e3b9b0c474 Mon Sep 17 00:00:00 2001 From: Ivan Kolodyazhny Date: Thu, 11 Jun 2015 13:55:28 +0300 Subject: [PATCH] Ceph driver support retries on rados_connect_timeout Added retry feature when connection to Ceph cluster failed. This patch introduces new config options rados_connection_retries and rados_connection_interval to configure retries due to the rados_connect_timeout. DocImpact Change-Id: Ice65fc40e9bd94805700f64397caf856982fa320 Closes-Bug: #1462970 --- cinder/tests/unit/test_rbd.py | 6 ++++-- cinder/volume/drivers/rbd.py | 13 +++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cinder/tests/unit/test_rbd.py b/cinder/tests/unit/test_rbd.py index 6c52cd4c7..30a59cee4 100644 --- a/cinder/tests/unit/test_rbd.py +++ b/cinder/tests/unit/test_rbd.py @@ -840,7 +840,8 @@ class RBDTestCase(test.TestCase): self.assertEqual(1, mock_driver._disconnect_from_rados.call_count) @common_mocks - def test_connect_to_rados(self): + @mock.patch('time.sleep') + def test_connect_to_rados(self, sleep_mock): # Default self.cfg.rados_connect_timeout = -1 @@ -880,7 +881,8 @@ class RBDTestCase(test.TestCase): self.assertRaises(exception.VolumeBackendAPIException, self.driver._connect_to_rados) self.assertTrue(self.mock_rados.Rados.return_value.open_ioctx.called) - self.mock_rados.Rados.return_value.shutdown.assert_called_once_with() + self.assertEqual( + 3, self.mock_rados.Rados.return_value.shutdown.call_count) class RBDImageIOWrapperTestCase(test.TestCase): diff --git a/cinder/volume/drivers/rbd.py b/cinder/volume/drivers/rbd.py index 8461607f6..1b9815b1a 100644 --- a/cinder/volume/drivers/rbd.py +++ b/cinder/volume/drivers/rbd.py @@ -32,6 +32,7 @@ from cinder import exception from cinder.i18n import _, _LE, _LI, _LW from cinder.image import image_utils from cinder.openstack.common import fileutils +from cinder import utils from cinder.volume import driver try: @@ -83,7 +84,13 @@ rbd_opts = [ cfg.IntOpt('rados_connect_timeout', default=-1, help=_('Timeout value (in seconds) used when connecting to ' 'ceph cluster. If value < 0, no timeout is set and ' - 'default librados value is used.')) + 'default librados value is used.')), + cfg.IntOpt('rados_connection_retries', default=3, + help=_('Number of retries if connection to ceph cluster ' + 'failed.')), + cfg.IntOpt('rados_connection_interval', default=5, + help=_('Interval value (in seconds) between connection ' + 'retries to ceph cluster.')) ] CONF = cfg.CONF @@ -307,6 +314,9 @@ class RBDDriver(driver.RetypeVD, driver.TransferVD, driver.ExtendVD, args.extend(['--cluster', self.configuration.rbd_cluster_name]) return args + @utils.retry(exception.VolumeBackendAPIException, + CONF.rados_connection_interval, + CONF.rados_connection_retries) def _connect_to_rados(self, pool=None): LOG.debug("opening connection to ceph cluster (timeout=%s).", self.configuration.rados_connect_timeout) @@ -335,7 +345,6 @@ class RBDDriver(driver.RetypeVD, driver.TransferVD, driver.ExtendVD, except self.rados.Error: msg = _("Error connecting to ceph cluster.") LOG.exception(msg) - # shutdown cannot raise an exception client.shutdown() raise exception.VolumeBackendAPIException(data=msg) -- 2.45.2