long flashcopy operation may block volume service

author Alan Jiang <ajiang@us.ibm.com>

Thu, 3 Oct 2013 22:03:09 +0000 (17:03 -0500)

committer Avishay Traeger <avishay@il.ibm.com>

Wed, 9 Oct 2013 17:50:28 +0000 (20:50 +0300)
author Alan Jiang <ajiang@us.ibm.com>
Thu, 3 Oct 2013 22:03:09 +0000 (17:03 -0500)
committer Avishay Traeger <avishay@il.ibm.com>
Wed, 9 Oct 2013 17:50:28 +0000 (20:50 +0300)
diff --git a/cinder/tests/test_storwize_svc.py b/cinder/tests/test_storwize_svc.py

index 2ae4298cfe84e98ce1de5c9444c48ab9297e84a2..76edce2076d474d15cb9e473bb1c24443bad9e18 100644 (file)
--- a/cinder/tests/test_storwize_svc.py
+++ b/cinder/tests/test_storwize_svc.py
@@ -39,6 +39,7 @@ from cinder.volume import configuration as conf
  from cinder.volume.drivers import storwize_svc
  from cinder.volume import volume_types
  
+from eventlet import greenthread
  
  LOG = logging.getLogger(__name__)
  
@@ -1165,7 +1166,7 @@ port_speed!N/A
                                  'no'])
  
          for d in to_delete:
-            del self._fcmappings_list[k]
+            del self._fcmappings_list[d]
  
          return self._print_info_cmd(rows=rows, **kwargs)
  
@@ -1485,6 +1486,8 @@ class StorwizeSVCDriverTestCase(test.TestCase):
          self.driver.do_setup(None)
          self.driver.check_for_setup_error()
          self.stubs.Set(storwize_svc.time, 'sleep', lambda s: None)
+        self.stubs.Set(greenthread, 'sleep', lambda *x, **y: None)
+        self.stubs.Set(storwize_svc, 'CHECK_FCMAPPING_INTERVAL', 0)
  
      def _set_flag(self, flag, value):
          group = self.driver.configuration.config_group
diff --git a/cinder/volume/drivers/storwize_svc.py b/cinder/volume/drivers/storwize_svc.py

index ee5fc6ddee20dfb2f848ca420d3f5c472bb7625c..51984bcc16bfb70681fd064f79aa3ded1acf1d37 100644 (file)
--- a/cinder/volume/drivers/storwize_svc.py
+++ b/cinder/volume/drivers/storwize_svc.py
@@ -51,6 +51,7 @@ from cinder import context
  from cinder import exception
  from cinder.openstack.common import excutils
  from cinder.openstack.common import log as logging
+from cinder.openstack.common import loopingcall
  from cinder.openstack.common import processutils
  from cinder.openstack.common import strutils
  from cinder import utils
@@ -112,6 +113,8 @@ storwize_svc_opts = [
  CONF = cfg.CONF
  CONF.register_opts(storwize_svc_opts)
  
+CHECK_FCMAPPING_INTERVAL = 300
+
  
  class StorwizeSVCDriver(san.SanDriver):
      """IBM Storwize V7000 and SVC iSCSI/FC volume driver.
@@ -1243,55 +1246,73 @@ class StorwizeSVCDriver(san.SanDriver):
              return True
  
      def _ensure_vdisk_no_fc_mappings(self, name, allow_snaps=True):
-        # Ensure vdisk has no FlashCopy mappings
+        """Ensure vdisk has no flashcopy mappings."""
+        timer = loopingcall.FixedIntervalLoopingCall(
+            self._check_vdisk_fc_mappings, name, allow_snaps)
+        # Create a timer greenthread. The default volume service heart
+        # beat is every 10 seconds. The flashcopy usually takes hours
+        # before it finishes. Don't set the sleep interval shorter
+        # than the heartbeat. Otherwise volume service heartbeat
+        # will not be serviced.
+        LOG.debug(_('Calling _ensure_vdisk_no_fc_mappings: vdisk %s')
+                  % name)
+        ret = timer.start(interval=CHECK_FCMAPPING_INTERVAL).wait()
+        timer.stop()
+        return ret
+
+    def _check_vdisk_fc_mappings(self, name, allow_snaps=True):
+        """FlashCopy mapping check helper."""
+
+        LOG.debug(_('Loopcall: _check_vdisk_fc_mappings(), vdisk %s') % name)
          mapping_ids = self._get_vdisk_fc_mappings(name)
-        while len(mapping_ids):
-            wait_for_copy = False
-            for map_id in mapping_ids:
-                attrs = self._get_flashcopy_mapping_attributes(map_id)
-                if not attrs:
-                    continue
-                source = attrs['source_vdisk_name']
-                target = attrs['target_vdisk_name']
-                copy_rate = attrs['copy_rate']
-                status = attrs['status']
-
-                if copy_rate == '0':
-                    # Case #2: A vdisk that has snapshots
-                    if source == name:
-                        if not allow_snaps:
-                            return False
-                        ssh_cmd = ['svctask', 'chfcmap', '-copyrate', '50',
-                                   '-autodelete', 'on', map_id]
-                        out, err = self._run_ssh(ssh_cmd)
-                        wait_for_copy = True
-                    # Case #3: A snapshot
-                    else:
-                        msg = (_('Vdisk %(name)s not involved in '
-                                 'mapping %(src)s -> %(tgt)s') %
-                               {'name': name, 'src': source, 'tgt': target})
-                        self._driver_assert(target == name, msg)
-                        if status in ['copying', 'prepared']:
-                            self._run_ssh(['svctask', 'stopfcmap', map_id])
-                        elif status in ['stopping', 'preparing']:
-                            wait_for_copy = True
-                        else:
-                            self._run_ssh(['svctask', 'rmfcmap', '-force',
-                                           map_id])
-                # Case 4: Copy in progress - wait and will autodelete
+        wait_for_copy = False
+        for map_id in mapping_ids:
+            attrs = self._get_flashcopy_mapping_attributes(map_id)
+            if not attrs:
+                continue
+            source = attrs['source_vdisk_name']
+            target = attrs['target_vdisk_name']
+            copy_rate = attrs['copy_rate']
+            status = attrs['status']
+
+            if copy_rate == '0':
+                # Case #2: A vdisk that has snapshots. Return
+                #          False if snapshot is not allowed.
+                if source == name:
+                    if not allow_snaps:
+                        raise loopingcall.LoopingCallDone(retvalue=False)
+                    ssh_cmd = ['svctask', 'chfcmap', '-copyrate', '50',
+                               '-autodelete', 'on', map_id]
+                    out, err = self._run_ssh(ssh_cmd)
+                    wait_for_copy = True
+                # Case #3: A snapshot
                  else:
-                    if status == 'prepared':
+                    msg = (_('Vdisk %(name)s not involved in '
+                             'mapping %(src)s -> %(tgt)s') %
+                           {'name': name, 'src': source, 'tgt': target})
+                    self._driver_assert(target == name, msg)
+                    if status in ['copying', 'prepared']:
                          self._run_ssh(['svctask', 'stopfcmap', map_id])
-                        self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
-                    elif status == 'idle_or_copied':
-                        # Prepare failed
-                        self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
-                    else:
+                        # Need to wait for the fcmap to change to
+                        # stopped state before remove fcmap
+                        wait_for_copy = True
+                    elif status in ['stopping', 'preparing']:
                          wait_for_copy = True
-            if wait_for_copy:
-                time.sleep(5)
-            mapping_ids = self._get_vdisk_fc_mappings(name)
-        return True
+                    else:
+                        self._run_ssh(['svctask', 'rmfcmap', '-force',
+                                       map_id])
+            # Case 4: Copy in progress - wait and will autodelete
+            else:
+                if status == 'prepared':
+                    self._run_ssh(['svctask', 'stopfcmap', map_id])
+                    self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
+                elif status == 'idle_or_copied':
+                    # Prepare failed
+                    self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
+                else:
+                    wait_for_copy = True
+        if not wait_for_copy or not len(mapping_ids):
+            raise loopingcall.LoopingCallDone(retvalue=True)
  
      def _delete_vdisk(self, name, force):
          """Deletes existing vdisks.
diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py

index be3d11a555dd40e0b1889efbac9efd90d00d03c6..dda762858838a1551cf11c70c92ff724a111ad53 100644 (file)
--- a/cinder/volume/manager.py
+++ b/cinder/volume/manager.py
@@ -64,6 +64,8 @@ from cinder.volume import volume_types
  
  from cinder.taskflow import states
  
+from eventlet.greenpool import GreenPool
+
  LOG = logging.getLogger(__name__)
  
  QUOTAS = quota.QUOTAS
@@ -76,6 +78,10 @@ volume_manager_opts = [
                 default=300,
                 help='Timeout for creating the volume to migrate to '
                      'when performing volume migration (seconds)'),
+    cfg.BoolOpt('volume_service_inithost_offload',
+                default=False,
+                help='Offload pending volume delete during '
+                     'volume service startup'),
  ]
  
  CONF = cfg.CONF
@@ -144,6 +150,8 @@ class VolumeManager(manager.SchedulerDependentManager):
                                              *args, **kwargs)
          self.configuration = Configuration(volume_manager_opts,
                                             config_group=service_name)
+        self._tp = GreenPool()
+
          if not volume_driver:
              # Get from configuration, which will get the default
              # if its not using the multi backend
@@ -165,6 +173,9 @@ class VolumeManager(manager.SchedulerDependentManager):
              configuration=self.configuration,
              db=self.db)
  
+    def _add_to_threadpool(self, func, *args, **kwargs):
+        self._tp.spawn_n(func, *args, **kwargs)
+
      def init_host(self):
          """Do any initialization that needs to be run if this is a
             standalone service.
@@ -208,7 +219,15 @@ class VolumeManager(manager.SchedulerDependentManager):
          for volume in volumes:
              if volume['status'] == 'deleting':
                  LOG.info(_('Resuming delete on volume: %s') % volume['id'])
-                self.delete_volume(ctxt, volume['id'])
+                if CONF.volume_service_inithost_offload:
+                    # Offload all the pending volume delete operations to the
+                    # threadpool to prevent the main volume service thread
+                    # from being blocked.
+                    self._add_to_threadpool(self.delete_volume(ctxt,
+                                                               volume['id']))
+                else:
+                    # By default, delete volumes sequentially
+                    self.delete_volume(ctxt, volume['id'])
  
          # collect and publish service capabilities
          self.publish_service_capabilities(ctxt)
diff --git a/etc/cinder/cinder.conf.sample b/etc/cinder/cinder.conf.sample

index df1d31f109d3b834551c588ac6ecb950140e81b7..af0cf84ae63db551466225291fae010cf1c7dd3e 100644 (file)
--- a/etc/cinder/cinder.conf.sample
+++ b/etc/cinder/cinder.conf.sample
@@ -1760,6 +1760,10 @@
  # performing volume migration (seconds) (integer value)
  #migration_create_volume_timeout_secs=300
  
+# Offload pending volume delete during volume service startup
+# (boolean value)
+#volume_service_inithost_offload=false
+
  
  #
  # Options defined in cinder.volume.utils
@@ -1770,4 +1774,4 @@
  #volume_dd_blocksize=1M
  
  
-# Total option count: 381
+# Total option count: 382
author	Alan Jiang <ajiang@us.ibm.com>
	Thu, 3 Oct 2013 22:03:09 +0000 (17:03 -0500)
committer	Avishay Traeger <avishay@il.ibm.com>
	Wed, 9 Oct 2013 17:50:28 +0000 (20:50 +0300)
cinder/tests/test_storwize_svc.py		patch \| blob \| history
cinder/volume/drivers/storwize_svc.py		patch \| blob \| history
cinder/volume/manager.py		patch \| blob \| history
etc/cinder/cinder.conf.sample		patch \| blob \| history