From 66bd2b39d7490facc55dee3cab9523e8608fe491 Mon Sep 17 00:00:00 2001 From: Michal Jura Date: Mon, 7 Mar 2016 11:29:32 +0100 Subject: [PATCH] Fix failure with rbd on slow ceph clusters Make rados connection interval and retries configurable for _try_remove_volume() function Otherwise on slow ceph clusters, we can get following problem: "ImageBusy error raised while deleting rbd volume. This may have been caused by a connection from a client that has crashed and, if so, may be resolved by retrying the delete after 30 seconds has elapsed." Change-Id: I1230715663ea00c3eb4241154e6f194dee0e23d4 Co-Authored-By: Dirk Mueller Closes-Bug: #1554045 --- cinder/tests/unit/test_rbd.py | 2 ++ cinder/volume/drivers/rbd.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cinder/tests/unit/test_rbd.py b/cinder/tests/unit/test_rbd.py index c1f4a4904..bdfb38e7b 100644 --- a/cinder/tests/unit/test_rbd.py +++ b/cinder/tests/unit/test_rbd.py @@ -144,6 +144,8 @@ class RBDTestCase(test.TestCase): self.cfg.rbd_user = None self.cfg.volume_dd_blocksize = '1M' self.cfg.rbd_store_chunk_size = 4 + self.cfg.rados_connection_retries = 3 + self.cfg.rados_connection_interval = 5 mock_exec = mock.Mock() mock_exec.return_value = ('', '') diff --git a/cinder/volume/drivers/rbd.py b/cinder/volume/drivers/rbd.py index 91a02133e..5f4f1bcac 100644 --- a/cinder/volume/drivers/rbd.py +++ b/cinder/volume/drivers/rbd.py @@ -702,7 +702,9 @@ class RBDDriver(driver.TransferVD, driver.ExtendVD, finally: rbd_image.close() - @utils.retry(self.rbd.ImageBusy, retries=3) + @utils.retry(self.rbd.ImageBusy, + self.configuration.rados_connection_interval, + self.configuration.rados_connection_retries) def _try_remove_volume(client, volume_name): self.RBDProxy().remove(client.ioctx, volume_name) -- 2.45.2