From 9f83602a82153e0be8c127c3d936b512327336f7 Mon Sep 17 00:00:00 2001 From: Ivan Kolodyazhny Date: Mon, 20 Apr 2015 22:53:14 +0300 Subject: [PATCH] Move RBD calls to a separate threads RBD is a python binding for librados which isn't patched by eventlet. Making long-running tasks like removing big (~100GB, ~1TB) volumes blocks eventlet loop and all cinder-volume service hangs until it finished when rados_connect_timeout is disabled. It makes cinder-volume services unavailable for a while. This patch moves all rados calls to a separate python thread which doesn't block eventlet loop. Change-Id: Ibaf43858d60e1320c339f2523b5c09c7f7c7f91e Closes-Bug: #1401335 --- cinder/volume/drivers/rbd.py | 58 ++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/cinder/volume/drivers/rbd.py b/cinder/volume/drivers/rbd.py index a78921d3a..368de9f18 100644 --- a/cinder/volume/drivers/rbd.py +++ b/cinder/volume/drivers/rbd.py @@ -21,6 +21,7 @@ import os import tempfile import urllib +from eventlet import tpool from oslo_config import cfg from oslo_log import log as logging from oslo_utils import encodeutils @@ -291,6 +292,9 @@ class RBDDriver(driver.VolumeDriver): with RADOSClient(self): pass + def RBDProxy(self): + return tpool.Proxy(self.rbd.RBD()) + def _ceph_args(self): args = [] if self.configuration.rbd_user: @@ -303,10 +307,14 @@ class RBDDriver(driver.VolumeDriver): LOG.debug("opening connection to ceph cluster (timeout=%s)." % (self.configuration.rados_connect_timeout)) - client = self.rados.Rados( - rados_id=self.configuration.rbd_user, - clustername=self.configuration.rbd_cluster_name, - conffile=self.configuration.rbd_ceph_conf) + # NOTE (e0ne): rados is binding to C lbirary librados. + # It blocks eventlet loop so we need to run it in a native + # python thread. + client = tpool.Proxy( + self.rados.Rados( + rados_id=self.configuration.rbd_user, + clustername=self.configuration.rbd_cluster_name, + conffile=self.configuration.rbd_ceph_conf)) if pool is not None: pool = encodeutils.safe_encode(pool) else: @@ -493,9 +501,9 @@ class RBDDriver(driver.VolumeDriver): "'%(dest)s'" % {'src_vol': src_name, 'src_snap': clone_snap, 'dest': dest_name}) - self.rbd.RBD().clone(client.ioctx, src_name, clone_snap, - client.ioctx, dest_name, - features=client.features) + self.RBDProxy().clone(client.ioctx, src_name, clone_snap, + client.ioctx, dest_name, + features=client.features) except Exception as exc: src_volume.unprotect_snap(clone_snap) src_volume.remove_snap(clone_snap) @@ -522,12 +530,12 @@ class RBDDriver(driver.VolumeDriver): order = int(math.log(chunk_size, 2)) with RADOSClient(self) as client: - self.rbd.RBD().create(client.ioctx, - encodeutils.safe_encode(volume['name']), - size, - order, - old_format=False, - features=client.features) + self.RBDProxy().create(client.ioctx, + encodeutils.safe_encode(volume['name']), + size, + order, + old_format=False, + features=client.features) def _flatten(self, pool, volume_name): LOG.debug('flattening %(pool)s/%(img)s' % @@ -541,12 +549,12 @@ class RBDDriver(driver.VolumeDriver): dst=volume['name'])) with RADOSClient(self, src_pool) as src_client: with RADOSClient(self) as dest_client: - self.rbd.RBD().clone(src_client.ioctx, - encodeutils.safe_encode(src_image), - encodeutils.safe_encode(src_snap), - dest_client.ioctx, - encodeutils.safe_encode(volume['name']), - features=src_client.features) + self.RBDProxy().clone(src_client.ioctx, + encodeutils.safe_encode(src_image), + encodeutils.safe_encode(src_snap), + dest_client.ioctx, + encodeutils.safe_encode(volume['name']), + features=src_client.features) def _resize(self, volume, **kwargs): size = kwargs.get('size', None) @@ -622,7 +630,7 @@ class RBDDriver(driver.VolumeDriver): # keep walking up the chain if it is itself a clone. if (not parent_has_snaps) and parent_name.endswith('.deleted'): LOG.debug("deleting parent %s" % (parent_name)) - self.rbd.RBD().remove(client.ioctx, parent_name) + self.RBDProxy().remove(client.ioctx, parent_name) # Now move up to grandparent if there is one if g_parent: @@ -671,7 +679,7 @@ class RBDDriver(driver.VolumeDriver): if clone_snap is None: LOG.debug("deleting rbd volume %s" % (volume_name)) try: - self.rbd.RBD().remove(client.ioctx, volume_name) + self.RBDProxy().remove(client.ioctx, volume_name) except self.rbd.ImageBusy: msg = (_("ImageBusy error raised while deleting rbd " "volume. This may have been caused by a " @@ -698,7 +706,7 @@ class RBDDriver(driver.VolumeDriver): # delete it. Instead we will keep it as a silent volume which # will be deleted when it's snapshot and clones are deleted. new_name = "%s.deleted" % (volume_name) - self.rbd.RBD().rename(client.ioctx, volume_name, new_name) + self.RBDProxy().rename(client.ioctx, volume_name, new_name) def create_snapshot(self, snapshot): """Creates an rbd snapshot.""" @@ -949,9 +957,9 @@ class RBDDriver(driver.VolumeDriver): # Raise an exception if we didn't find a suitable rbd image. with RADOSClient(self) as client: rbd_name = existing_ref['source-name'] - self.rbd.RBD().rename(client.ioctx, - encodeutils.safe_encode(rbd_name), - encodeutils.safe_encode(volume['name'])) + self.RBDProxy().rename(client.ioctx, + encodeutils.safe_encode(rbd_name), + encodeutils.safe_encode(volume['name'])) def manage_existing_get_size(self, volume, existing_ref): """Return size of an existing image for manage_existing. -- 2.45.2