]> review.fuel-infra Code Review - openstack-build/cinder-build.git/commitdiff
Delete the temporary volume if migration fails
authorVincent Hou <sbhou@cn.ibm.com>
Tue, 3 Mar 2015 08:04:41 +0000 (16:04 +0800)
committerVincent Hou <sbhou@cn.ibm.com>
Wed, 8 Apr 2015 07:33:40 +0000 (15:33 +0800)
Issues resolved in this patch include the following changes:
* A temporary volume is created on the destination host before migrating
the data from the source. However, if the creation of this volume fails,
its record will be left in the database as redundant information. This
patch will remove the database record if the creation fails.

* If attaching the remote dest volme fails at initialize_connection
due to timeout, we need to terminate the connection. Otherwise, the dest
volume will not be released and successfully deleted.

Closes-bug: #1398177
Change-Id: I5bc75bd7652841fddd66481ee001fb682168215c

cinder/tests/test_volume.py
cinder/volume/driver.py
cinder/volume/manager.py

index 8bf72e358898a5dd82ea01c73a18aeda712c9ed1..671bce85f8df8666c148ebec05df6eb09d7ae118 100644 (file)
@@ -3828,6 +3828,108 @@ class VolumeTestCase(BaseVolumeTestCase):
             self.assertIsNone(volume['migration_status'])
             self.assertEqual('available', volume['status'])
 
+    def test_clean_temporary_volume(self):
+        def fake_delete_volume(ctxt, volume):
+            db.volume_destroy(ctxt, volume['id'])
+
+        fake_volume = tests_utils.create_volume(self.context, size=1,
+                                                host=CONF.host)
+        fake_new_volume = tests_utils.create_volume(self.context, size=1,
+                                                    host=CONF.host)
+        # Check when the migrated volume is in migration
+        db.volume_update(self.context, fake_volume['id'],
+                         {'migration_status': 'migrating'})
+        # 1. Only clean the db
+        self.volume._clean_temporary_volume(self.context, fake_volume['id'],
+                                            fake_new_volume['id'],
+                                            clean_db_only=True)
+        self.assertRaises(exception.VolumeNotFound,
+                          db.volume_get, self.context,
+                          fake_new_volume['id'])
+
+        # 2. Delete the backend storage
+        fake_new_volume = tests_utils.create_volume(self.context, size=1,
+                                                    host=CONF.host)
+        with mock.patch.object(volume_rpcapi.VolumeAPI, 'delete_volume') as \
+                mock_delete_volume:
+            mock_delete_volume.side_effect = fake_delete_volume
+            self.volume._clean_temporary_volume(self.context,
+                                                fake_volume['id'],
+                                                fake_new_volume['id'],
+                                                clean_db_only=False)
+            self.assertRaises(exception.VolumeNotFound,
+                              db.volume_get, self.context,
+                              fake_new_volume['id'])
+
+        # Check when the migrated volume is not in migration
+        fake_new_volume = tests_utils.create_volume(self.context, size=1,
+                                                    host=CONF.host)
+        db.volume_update(self.context, fake_volume['id'],
+                         {'migration_status': 'non-migrating'})
+        self.volume._clean_temporary_volume(self.context, fake_volume['id'],
+                                            fake_new_volume['id'])
+        volume = db.volume_get(context.get_admin_context(),
+                               fake_new_volume['id'])
+        self.assertIsNone(volume['migration_status'])
+
+    def test_migrate_volume_generic_create_volume_error(self):
+        def fake_create_volume(ctxt, volume, host, req_spec, filters,
+                               allow_reschedule=True):
+            db.volume_update(ctxt, volume['id'],
+                             {'status': 'error'})
+
+        with mock.patch.object(self.volume.driver, 'migrate_volume'), \
+                mock.patch.object(volume_rpcapi.VolumeAPI, 'create_volume') as \
+                mock_create_volume, \
+                mock.patch.object(self.volume, '_clean_temporary_volume') as \
+                clean_temporary_volume:
+
+            # Exception case at the creation of the new temporary volume
+            mock_create_volume.side_effect = fake_create_volume
+            volume = tests_utils.create_volume(self.context, size=0,
+                                               host=CONF.host)
+            host_obj = {'host': 'newhost', 'capabilities': {}}
+            self.assertRaises(exception.VolumeMigrationFailed,
+                              self.volume.migrate_volume,
+                              self.context,
+                              volume['id'],
+                              host_obj,
+                              True)
+            volume = db.volume_get(context.get_admin_context(), volume['id'])
+            self.assertIsNone(volume['migration_status'])
+            self.assertEqual('available', volume['status'])
+            self.assertTrue(clean_temporary_volume.called)
+
+    def test_migrate_volume_generic_timeout_error(self):
+        CONF.set_override("migration_create_volume_timeout_secs", 2)
+
+        def fake_create_volume(ctxt, volume, host, req_spec, filters,
+                               allow_reschedule=True):
+            db.volume_update(ctxt, volume['id'],
+                             {'status': 'creating'})
+
+        with mock.patch.object(self.volume.driver, 'migrate_volume'), \
+                mock.patch.object(volume_rpcapi.VolumeAPI, 'create_volume') as \
+                mock_create_volume, \
+                mock.patch.object(self.volume, '_clean_temporary_volume') as \
+                clean_temporary_volume:
+
+            # Exception case at the timeout of the volume creation
+            mock_create_volume.side_effect = fake_create_volume
+            volume = tests_utils.create_volume(self.context, size=0,
+                                               host=CONF.host)
+            host_obj = {'host': 'newhost', 'capabilities': {}}
+            self.assertRaises(exception.VolumeMigrationFailed,
+                              self.volume.migrate_volume,
+                              self.context,
+                              volume['id'],
+                              host_obj,
+                              True)
+            volume = db.volume_get(context.get_admin_context(), volume['id'])
+            self.assertIsNone(volume['migration_status'])
+            self.assertEqual('available', volume['status'])
+            self.assertTrue(clean_temporary_volume.called)
+
     def test_migrate_volume_generic_create_export_error(self):
         def fake_create_volume(ctxt, volume, host, req_spec, filters,
                                allow_reschedule=True):
index 5e4a70ce1c87cfcae800b2c99ec426396f2f7b14..b9dfea00f024f2ef22fc13e8be7d97845bfc60f9 100644 (file)
@@ -640,7 +640,23 @@ class BaseVD(object):
             # Call remote manager's initialize_connection which includes
             # driver's create_export and initialize_connection
             rpcapi = volume_rpcapi.VolumeAPI()
-            conn = rpcapi.initialize_connection(context, volume, properties)
+            try:
+                conn = rpcapi.initialize_connection(context, volume,
+                                                    properties)
+            except Exception:
+                with excutils.save_and_reraise_exception():
+                    # It is possible that initialize_connection fails due to
+                    # timeout. In fact, the volume is already attached after
+                    # the timeout error is raised, so the connection worths
+                    # a try of terminating.
+                    try:
+                        rpcapi.terminate_connection(context, volume,
+                                                    properties, force=True)
+                    except Exception:
+                        LOG.warning(_LW("Failed terminating the connection "
+                                        "of volume %(volume_id)s, but it is "
+                                        "acceptable."),
+                                    {'volume_id': volume['id']})
         else:
             # Call local driver's create_export and initialize_connection.
             # NOTE(avishay) This is copied from the manager's code - need to
index 2bb52257bc1d012fa460c783fd7dbd60e9517db7..3adfa8c83a500f250c2b8075690659714ec8c2d0 100644 (file)
@@ -528,6 +528,10 @@ class VolumeManager(manager.SchedulerDependentManager):
             raise exception.InvalidVolume(
                 reason=_("volume is not local to this node"))
 
+        is_migrating = volume_ref['migration_status'] is not None
+        is_migrating_dest = (is_migrating and
+                             volume_ref['migration_status'].startswith(
+                                 'target:'))
         self._notify_about_volume_usage(context, volume_ref, "delete.start")
         try:
             # NOTE(flaper87): Verify the driver is enabled
@@ -545,19 +549,17 @@ class VolumeManager(manager.SchedulerDependentManager):
         except exception.VolumeIsBusy:
             LOG.error(_LE("Cannot delete volume %s: volume is busy"),
                       volume_ref['id'])
-            self.db.volume_update(context, volume_ref['id'],
-                                  {'status': 'available'})
+            # If this is a destination volume, we have to clear the database
+            # record to avoid user confusion.
+            self._clear_db(context, is_migrating_dest, volume_ref,
+                           'available')
             return True
         except Exception:
             with excutils.save_and_reraise_exception():
-                self.db.volume_update(context,
-                                      volume_ref['id'],
-                                      {'status': 'error_deleting'})
-
-        is_migrating = volume_ref['migration_status'] is not None
-        is_migrating_dest = (is_migrating and
-                             volume_ref['migration_status'].startswith(
-                                 'target:'))
+                # If this is a destination volume, we have to clear the
+                # database record to avoid user confusion.
+                self._clear_db(context, is_migrating_dest, volume_ref,
+                               'error_deleting')
 
         # If deleting source/destination volume in a migration, we should
         # skip quotas.
@@ -613,6 +615,21 @@ class VolumeManager(manager.SchedulerDependentManager):
 
         return True
 
+    def _clear_db(self, context, is_migrating_dest, volume_ref, status):
+        # This method is called when driver.unmanage() or
+        # driver.delete_volume() fails in delete_volume(), so it is already
+        # in the exception handling part.
+        if is_migrating_dest:
+            self.db.volume_destroy(context, volume_ref['id'])
+            LOG.error(_LE("Unable to delete the destination volume %s "
+                          "during volume migration, but the database "
+                          "record needs to be deleted."),
+                      volume_ref['id'])
+        else:
+            self.db.volume_update(context,
+                                  volume_ref['id'],
+                                  {'status': status})
+
     def create_snapshot(self, context, volume_id, snapshot):
         """Creates and exports the snapshot."""
         context = context.elevated()
@@ -1224,13 +1241,19 @@ class VolumeManager(manager.SchedulerDependentManager):
         new_volume = self.db.volume_get(ctxt, new_volume['id'])
         tries = 0
         while new_volume['status'] != 'available':
-            tries = tries + 1
+            tries += 1
             now = time.time()
             if new_volume['status'] == 'error':
                 msg = _("failed to create new_volume on destination host")
+                self._clean_temporary_volume(ctxt, volume['id'],
+                                             new_volume['id'],
+                                             clean_db_only=True)
                 raise exception.VolumeMigrationFailed(reason=msg)
             elif now > deadline:
                 msg = _("timeout creating new_volume on destination host")
+                self._clean_temporary_volume(ctxt, volume['id'],
+                                             new_volume['id'],
+                                             clean_db_only=True)
                 raise exception.VolumeMigrationFailed(reason=msg)
             else:
                 time.sleep(tries ** 2)
@@ -1257,34 +1280,11 @@ class VolumeManager(manager.SchedulerDependentManager):
                                                   new_volume['id'])
         except Exception:
             with excutils.save_and_reraise_exception():
-                msg = _("Failed to copy volume %(vol1)s to %(vol2)s")
-                LOG.error(msg % {'vol1': volume['id'],
-                                 'vol2': new_volume['id']})
-                volume = self.db.volume_get(ctxt, volume['id'])
-
-                # If we're in the migrating phase, we need to cleanup
-                # destination volume because source volume is remaining
-                if volume['migration_status'] == 'migrating':
-                    rpcapi.delete_volume(ctxt, new_volume)
-                else:
-                    # If we're in the completing phase don't delete the
-                    # destination because we may have already deleted the
-                    # source! But the migration_status in database should
-                    # be cleared to handle volume after migration failure
-                    try:
-                        updates = {'migration_status': None}
-                        self.db.volume_update(ctxt, new_volume['id'], updates)
-                    except exception.VolumeNotFound:
-                        LOG.info(_LI("Couldn't find destination volume "
-                                     "%(vol)s in database. The entry might be "
-                                     "successfully deleted during migration "
-                                     "completion phase."),
-                                 {'vol': new_volume['id']})
-
-                    LOG.warn(_LW("Failed to migrate volume. The destination "
-                                 "volume %(vol)s is not deleted since the "
-                                 "source volume may have already deleted."),
-                             {'vol': new_volume['id']})
+                msg = _LE("Failed to copy volume %(vol1)s to %(vol2)s")
+                LOG.error(msg, {'vol1': volume['id'],
+                                'vol2': new_volume['id']})
+                self._clean_temporary_volume(ctxt, volume['id'],
+                                             new_volume['id'])
 
     def _get_original_status(self, volume):
         attachments = volume['volume_attachment']
@@ -1293,6 +1293,47 @@ class VolumeManager(manager.SchedulerDependentManager):
         else:
             return 'in-use'
 
+    def _clean_temporary_volume(self, ctxt, volume_id, new_volume_id,
+                                clean_db_only=False):
+        volume = self.db.volume_get(ctxt, volume_id)
+        # If we're in the migrating phase, we need to cleanup
+        # destination volume because source volume is remaining
+        if volume['migration_status'] == 'migrating':
+            try:
+                if clean_db_only:
+                    # The temporary volume is not created, only DB data
+                    # is created
+                    self.db.volume_destroy(ctxt, new_volume_id)
+                else:
+                    # The temporary volume is already created
+                    rpcapi = volume_rpcapi.VolumeAPI()
+                    volume = self.db.volume_get(ctxt, new_volume_id)
+                    rpcapi.delete_volume(ctxt, volume)
+            except exception.VolumeNotFound:
+                LOG.info(_LI("Couldn't find the temporary volume "
+                             "%(vol)s in the database. There is no need "
+                             "to clean up this volume."),
+                         {'vol': new_volume_id})
+        else:
+            # If we're in the completing phase don't delete the
+            # destination because we may have already deleted the
+            # source! But the migration_status in database should
+            # be cleared to handle volume after migration failure
+            try:
+                updates = {'migration_status': None}
+                self.db.volume_update(ctxt, new_volume_id, updates)
+            except exception.VolumeNotFound:
+                LOG.info(_LI("Couldn't find destination volume "
+                             "%(vol)s in the database. The entry might be "
+                             "successfully deleted during migration "
+                             "completion phase."),
+                         {'vol': new_volume_id})
+
+                LOG.warning(_LW("Failed to migrate volume. The destination "
+                                "volume %(vol)s is not deleted since the "
+                                "source volume may have been deleted."),
+                            {'vol': new_volume_id})
+
     def migrate_volume_completion(self, ctxt, volume_id, new_volume_id,
                                   error=False):
         try: