Delete the temporary volume if migration fails

author Vincent Hou <sbhou@cn.ibm.com>

Tue, 3 Mar 2015 08:04:41 +0000 (16:04 +0800)

committer Vincent Hou <sbhou@cn.ibm.com>

Wed, 8 Apr 2015 07:33:40 +0000 (15:33 +0800)
author Vincent Hou <sbhou@cn.ibm.com>
Tue, 3 Mar 2015 08:04:41 +0000 (16:04 +0800)
committer Vincent Hou <sbhou@cn.ibm.com>
Wed, 8 Apr 2015 07:33:40 +0000 (15:33 +0800)
diff --git a/cinder/tests/test_volume.py b/cinder/tests/test_volume.py

index 8bf72e358898a5dd82ea01c73a18aeda712c9ed1..671bce85f8df8666c148ebec05df6eb09d7ae118 100644 (file)
--- a/cinder/tests/test_volume.py
+++ b/cinder/tests/test_volume.py
@@ -3828,6 +3828,108 @@ class VolumeTestCase(BaseVolumeTestCase):
              self.assertIsNone(volume['migration_status'])
              self.assertEqual('available', volume['status'])
  
+    def test_clean_temporary_volume(self):
+        def fake_delete_volume(ctxt, volume):
+            db.volume_destroy(ctxt, volume['id'])
+
+        fake_volume = tests_utils.create_volume(self.context, size=1,
+                                                host=CONF.host)
+        fake_new_volume = tests_utils.create_volume(self.context, size=1,
+                                                    host=CONF.host)
+        # Check when the migrated volume is in migration
+        db.volume_update(self.context, fake_volume['id'],
+                         {'migration_status': 'migrating'})
+        # 1. Only clean the db
+        self.volume._clean_temporary_volume(self.context, fake_volume['id'],
+                                            fake_new_volume['id'],
+                                            clean_db_only=True)
+        self.assertRaises(exception.VolumeNotFound,
+                          db.volume_get, self.context,
+                          fake_new_volume['id'])
+
+        # 2. Delete the backend storage
+        fake_new_volume = tests_utils.create_volume(self.context, size=1,
+                                                    host=CONF.host)
+        with mock.patch.object(volume_rpcapi.VolumeAPI, 'delete_volume') as \
+                mock_delete_volume:
+            mock_delete_volume.side_effect = fake_delete_volume
+            self.volume._clean_temporary_volume(self.context,
+                                                fake_volume['id'],
+                                                fake_new_volume['id'],
+                                                clean_db_only=False)
+            self.assertRaises(exception.VolumeNotFound,
+                              db.volume_get, self.context,
+                              fake_new_volume['id'])
+
+        # Check when the migrated volume is not in migration
+        fake_new_volume = tests_utils.create_volume(self.context, size=1,
+                                                    host=CONF.host)
+        db.volume_update(self.context, fake_volume['id'],
+                         {'migration_status': 'non-migrating'})
+        self.volume._clean_temporary_volume(self.context, fake_volume['id'],
+                                            fake_new_volume['id'])
+        volume = db.volume_get(context.get_admin_context(),
+                               fake_new_volume['id'])
+        self.assertIsNone(volume['migration_status'])
+
+    def test_migrate_volume_generic_create_volume_error(self):
+        def fake_create_volume(ctxt, volume, host, req_spec, filters,
+                               allow_reschedule=True):
+            db.volume_update(ctxt, volume['id'],
+                             {'status': 'error'})
+
+        with mock.patch.object(self.volume.driver, 'migrate_volume'), \
+                mock.patch.object(volume_rpcapi.VolumeAPI, 'create_volume') as \
+                mock_create_volume, \
+                mock.patch.object(self.volume, '_clean_temporary_volume') as \
+                clean_temporary_volume:
+
+            # Exception case at the creation of the new temporary volume
+            mock_create_volume.side_effect = fake_create_volume
+            volume = tests_utils.create_volume(self.context, size=0,
+                                               host=CONF.host)
+            host_obj = {'host': 'newhost', 'capabilities': {}}
+            self.assertRaises(exception.VolumeMigrationFailed,
+                              self.volume.migrate_volume,
+                              self.context,
+                              volume['id'],
+                              host_obj,
+                              True)
+            volume = db.volume_get(context.get_admin_context(), volume['id'])
+            self.assertIsNone(volume['migration_status'])
+            self.assertEqual('available', volume['status'])
+            self.assertTrue(clean_temporary_volume.called)
+
+    def test_migrate_volume_generic_timeout_error(self):
+        CONF.set_override("migration_create_volume_timeout_secs", 2)
+
+        def fake_create_volume(ctxt, volume, host, req_spec, filters,
+                               allow_reschedule=True):
+            db.volume_update(ctxt, volume['id'],
+                             {'status': 'creating'})
+
+        with mock.patch.object(self.volume.driver, 'migrate_volume'), \
+                mock.patch.object(volume_rpcapi.VolumeAPI, 'create_volume') as \
+                mock_create_volume, \
+                mock.patch.object(self.volume, '_clean_temporary_volume') as \
+                clean_temporary_volume:
+
+            # Exception case at the timeout of the volume creation
+            mock_create_volume.side_effect = fake_create_volume
+            volume = tests_utils.create_volume(self.context, size=0,
+                                               host=CONF.host)
+            host_obj = {'host': 'newhost', 'capabilities': {}}
+            self.assertRaises(exception.VolumeMigrationFailed,
+                              self.volume.migrate_volume,
+                              self.context,
+                              volume['id'],
+                              host_obj,
+                              True)
+            volume = db.volume_get(context.get_admin_context(), volume['id'])
+            self.assertIsNone(volume['migration_status'])
+            self.assertEqual('available', volume['status'])
+            self.assertTrue(clean_temporary_volume.called)
+
      def test_migrate_volume_generic_create_export_error(self):
          def fake_create_volume(ctxt, volume, host, req_spec, filters,
                                 allow_reschedule=True):
diff --git a/cinder/volume/driver.py b/cinder/volume/driver.py

index 5e4a70ce1c87cfcae800b2c99ec426396f2f7b14..b9dfea00f024f2ef22fc13e8be7d97845bfc60f9 100644 (file)
--- a/cinder/volume/driver.py
+++ b/cinder/volume/driver.py
@@ -640,7 +640,23 @@ class BaseVD(object):
              # Call remote manager's initialize_connection which includes
              # driver's create_export and initialize_connection
              rpcapi = volume_rpcapi.VolumeAPI()
-            conn = rpcapi.initialize_connection(context, volume, properties)
+            try:
+                conn = rpcapi.initialize_connection(context, volume,
+                                                    properties)
+            except Exception:
+                with excutils.save_and_reraise_exception():
+                    # It is possible that initialize_connection fails due to
+                    # timeout. In fact, the volume is already attached after
+                    # the timeout error is raised, so the connection worths
+                    # a try of terminating.
+                    try:
+                        rpcapi.terminate_connection(context, volume,
+                                                    properties, force=True)
+                    except Exception:
+                        LOG.warning(_LW("Failed terminating the connection "
+                                        "of volume %(volume_id)s, but it is "
+                                        "acceptable."),
+                                    {'volume_id': volume['id']})
          else:
              # Call local driver's create_export and initialize_connection.
              # NOTE(avishay) This is copied from the manager's code - need to
diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py

index 2bb52257bc1d012fa460c783fd7dbd60e9517db7..3adfa8c83a500f250c2b8075690659714ec8c2d0 100644 (file)
--- a/cinder/volume/manager.py
+++ b/cinder/volume/manager.py
@@ -528,6 +528,10 @@ class VolumeManager(manager.SchedulerDependentManager):
              raise exception.InvalidVolume(
                  reason=_("volume is not local to this node"))
  
+        is_migrating = volume_ref['migration_status'] is not None
+        is_migrating_dest = (is_migrating and
+                             volume_ref['migration_status'].startswith(
+                                 'target:'))
          self._notify_about_volume_usage(context, volume_ref, "delete.start")
          try:
              # NOTE(flaper87): Verify the driver is enabled
@@ -545,19 +549,17 @@ class VolumeManager(manager.SchedulerDependentManager):
          except exception.VolumeIsBusy:
              LOG.error(_LE("Cannot delete volume %s: volume is busy"),
                        volume_ref['id'])
-            self.db.volume_update(context, volume_ref['id'],
-                                  {'status': 'available'})
+            # If this is a destination volume, we have to clear the database
+            # record to avoid user confusion.
+            self._clear_db(context, is_migrating_dest, volume_ref,
+                           'available')
              return True
          except Exception:
              with excutils.save_and_reraise_exception():
-                self.db.volume_update(context,
-                                      volume_ref['id'],
-                                      {'status': 'error_deleting'})
-
-        is_migrating = volume_ref['migration_status'] is not None
-        is_migrating_dest = (is_migrating and
-                             volume_ref['migration_status'].startswith(
-                                 'target:'))
+                # If this is a destination volume, we have to clear the
+                # database record to avoid user confusion.
+                self._clear_db(context, is_migrating_dest, volume_ref,
+                               'error_deleting')
  
          # If deleting source/destination volume in a migration, we should
          # skip quotas.
@@ -613,6 +615,21 @@ class VolumeManager(manager.SchedulerDependentManager):
  
          return True
  
+    def _clear_db(self, context, is_migrating_dest, volume_ref, status):
+        # This method is called when driver.unmanage() or
+        # driver.delete_volume() fails in delete_volume(), so it is already
+        # in the exception handling part.
+        if is_migrating_dest:
+            self.db.volume_destroy(context, volume_ref['id'])
+            LOG.error(_LE("Unable to delete the destination volume %s "
+                          "during volume migration, but the database "
+                          "record needs to be deleted."),
+                      volume_ref['id'])
+        else:
+            self.db.volume_update(context,
+                                  volume_ref['id'],
+                                  {'status': status})
+
      def create_snapshot(self, context, volume_id, snapshot):
          """Creates and exports the snapshot."""
          context = context.elevated()
@@ -1224,13 +1241,19 @@ class VolumeManager(manager.SchedulerDependentManager):
          new_volume = self.db.volume_get(ctxt, new_volume['id'])
          tries = 0
          while new_volume['status'] != 'available':
-            tries = tries + 1
+            tries += 1
              now = time.time()
              if new_volume['status'] == 'error':
                  msg = _("failed to create new_volume on destination host")
+                self._clean_temporary_volume(ctxt, volume['id'],
+                                             new_volume['id'],
+                                             clean_db_only=True)
                  raise exception.VolumeMigrationFailed(reason=msg)
              elif now > deadline:
                  msg = _("timeout creating new_volume on destination host")
+                self._clean_temporary_volume(ctxt, volume['id'],
+                                             new_volume['id'],
+                                             clean_db_only=True)
                  raise exception.VolumeMigrationFailed(reason=msg)
              else:
                  time.sleep(tries ** 2)
@@ -1257,34 +1280,11 @@ class VolumeManager(manager.SchedulerDependentManager):
                                                    new_volume['id'])
          except Exception:
              with excutils.save_and_reraise_exception():
-                msg = _("Failed to copy volume %(vol1)s to %(vol2)s")
-                LOG.error(msg % {'vol1': volume['id'],
-                                 'vol2': new_volume['id']})
-                volume = self.db.volume_get(ctxt, volume['id'])
-
-                # If we're in the migrating phase, we need to cleanup
-                # destination volume because source volume is remaining
-                if volume['migration_status'] == 'migrating':
-                    rpcapi.delete_volume(ctxt, new_volume)
-                else:
-                    # If we're in the completing phase don't delete the
-                    # destination because we may have already deleted the
-                    # source! But the migration_status in database should
-                    # be cleared to handle volume after migration failure
-                    try:
-                        updates = {'migration_status': None}
-                        self.db.volume_update(ctxt, new_volume['id'], updates)
-                    except exception.VolumeNotFound:
-                        LOG.info(_LI("Couldn't find destination volume "
-                                     "%(vol)s in database. The entry might be "
-                                     "successfully deleted during migration "
-                                     "completion phase."),
-                                 {'vol': new_volume['id']})
-
-                    LOG.warn(_LW("Failed to migrate volume. The destination "
-                                 "volume %(vol)s is not deleted since the "
-                                 "source volume may have already deleted."),
-                             {'vol': new_volume['id']})
+                msg = _LE("Failed to copy volume %(vol1)s to %(vol2)s")
+                LOG.error(msg, {'vol1': volume['id'],
+                                'vol2': new_volume['id']})
+                self._clean_temporary_volume(ctxt, volume['id'],
+                                             new_volume['id'])
  
      def _get_original_status(self, volume):
          attachments = volume['volume_attachment']
@@ -1293,6 +1293,47 @@ class VolumeManager(manager.SchedulerDependentManager):
          else:
              return 'in-use'
  
+    def _clean_temporary_volume(self, ctxt, volume_id, new_volume_id,
+                                clean_db_only=False):
+        volume = self.db.volume_get(ctxt, volume_id)
+        # If we're in the migrating phase, we need to cleanup
+        # destination volume because source volume is remaining
+        if volume['migration_status'] == 'migrating':
+            try:
+                if clean_db_only:
+                    # The temporary volume is not created, only DB data
+                    # is created
+                    self.db.volume_destroy(ctxt, new_volume_id)
+                else:
+                    # The temporary volume is already created
+                    rpcapi = volume_rpcapi.VolumeAPI()
+                    volume = self.db.volume_get(ctxt, new_volume_id)
+                    rpcapi.delete_volume(ctxt, volume)
+            except exception.VolumeNotFound:
+                LOG.info(_LI("Couldn't find the temporary volume "
+                             "%(vol)s in the database. There is no need "
+                             "to clean up this volume."),
+                         {'vol': new_volume_id})
+        else:
+            # If we're in the completing phase don't delete the
+            # destination because we may have already deleted the
+            # source! But the migration_status in database should
+            # be cleared to handle volume after migration failure
+            try:
+                updates = {'migration_status': None}
+                self.db.volume_update(ctxt, new_volume_id, updates)
+            except exception.VolumeNotFound:
+                LOG.info(_LI("Couldn't find destination volume "
+                             "%(vol)s in the database. The entry might be "
+                             "successfully deleted during migration "
+                             "completion phase."),
+                         {'vol': new_volume_id})
+
+                LOG.warning(_LW("Failed to migrate volume. The destination "
+                                "volume %(vol)s is not deleted since the "
+                                "source volume may have been deleted."),
+                            {'vol': new_volume_id})
+
      def migrate_volume_completion(self, ctxt, volume_id, new_volume_id,
                                    error=False):
          try:
author	Vincent Hou <sbhou@cn.ibm.com>
	Tue, 3 Mar 2015 08:04:41 +0000 (16:04 +0800)
committer	Vincent Hou <sbhou@cn.ibm.com>
	Wed, 8 Apr 2015 07:33:40 +0000 (15:33 +0800)
cinder/tests/test_volume.py		patch \| blob \| history
cinder/volume/driver.py		patch \| blob \| history
cinder/volume/manager.py		patch \| blob \| history