]> review.fuel-infra Code Review - openstack-build/cinder-build.git/commitdiff
Allow replicated volumes to be recoverable
authorMudassir Latif <mudassir.latif@purestorage.com>
Wed, 2 Sep 2015 22:07:04 +0000 (15:07 -0700)
committerMudassir Latif <mudassir.latif@purestorage.com>
Tue, 22 Dec 2015 15:36:11 +0000 (07:36 -0800)
Currently, once they are in ‘error’ or ‘failed-over’ states you can’t
do anything with them. We need to allow enable_replication after the
admin has resolved any issues that may have prevented the operation.

Same thing for after a fail-over, you cannot enable replication on it.

This change allows for more states to be recoverable via enable
replication. It also more uniformly sets the state to ‘error’ when
something bad happens.

'enabled' --> replication is on.
'disabled' --> replication is off by design.
'failed-over' --> we have just failed over. Replication is off.
'error' --> an error occurred during the last operation.

allowed flow is:
'enabled' --> 'failed-over'/'disabled'/'error'
'error' --> 'enabled'/'error'
'disabled' --> 'enabled'/'error'
'failed-over' --> 'enabled'/'error'

Change-Id: Icbc22b8501b048b1755019305430177ba0a9385f
Partial-Bug: #1491631

cinder/tests/unit/test_replication.py
cinder/volume/api.py
cinder/volume/driver.py
cinder/volume/manager.py
doc/source/devref/replication.rst

index 830465d75a552633e3f3c734fd481dac6ebf271f..1f0bc33c390bddb865ec192726aa0bc73bc347c9 100644 (file)
@@ -30,9 +30,9 @@ from cinder.volume import driver
 CONF = cfg.CONF
 
 
-class VolumeReplicationTestCase(test.TestCase):
+class VolumeReplicationTestCaseBase(test.TestCase):
     def setUp(self):
-        super(VolumeReplicationTestCase, self).setUp()
+        super(VolumeReplicationTestCaseBase, self).setUp()
         self.ctxt = context.RequestContext('user', 'fake', False)
         self.adm_ctxt = context.RequestContext('admin', 'fake', True)
         self.manager = importutils.import_object(CONF.volume_manager)
@@ -42,6 +42,8 @@ class VolumeReplicationTestCase(test.TestCase):
                                                 spec=driver.VolumeDriver)
         self.driver = self.driver_patcher.start()
 
+
+class VolumeReplicationTestCase(VolumeReplicationTestCaseBase):
     @mock.patch('cinder.utils.require_driver_initialized')
     def test_promote_replica_uninit_driver(self, _init):
         """Test promote replication when driver is not initialized."""
@@ -111,3 +113,183 @@ class VolumeReplicationTestCase(test.TestCase):
                           self.manager.reenable_replication,
                           self.adm_ctxt,
                           vol['id'])
+
+
+class VolumeManagerReplicationV2Tests(VolumeReplicationTestCaseBase):
+    mock_driver = None
+    mock_db = None
+    vol = None
+
+    def setUp(self):
+        super(VolumeManagerReplicationV2Tests, self).setUp()
+        self.mock_db = mock.Mock()
+        self.mock_driver = mock.Mock()
+        self.vol = test_utils.create_volume(self.ctxt,
+                                            status='available',
+                                            replication_status='enabling')
+        self.manager.driver = self.mock_driver
+        self.mock_driver.replication_enable.return_value = \
+            {'replication_status': 'enabled'}
+        self.mock_driver.replication_disable.return_value = \
+            {'replication_status': 'disabled'}
+        self.manager.db = self.mock_db
+        self.mock_db.volume_get.return_value = self.vol
+        self.mock_db.volume_update.return_value = self.vol
+
+    # enable_replication tests
+    @mock.patch('cinder.utils.require_driver_initialized')
+    def test_enable_replication_uninitialized_driver(self,
+                                                     mock_require_driver_init):
+        mock_require_driver_init.side_effect = exception.DriverNotInitialized
+        self.assertRaises(exception.DriverNotInitialized,
+                          self.manager.enable_replication,
+                          self.ctxt,
+                          self.vol)
+
+    def test_enable_replication_error_state(self):
+        self.vol['replication_status'] = 'error'
+        self.assertRaises(exception.InvalidVolume,
+                          self.manager.enable_replication,
+                          self.ctxt,
+                          self.vol)
+
+    def test_enable_replication_driver_raises_cinder_exception(self):
+        self.mock_driver.replication_enable.side_effect = \
+            exception.CinderException
+        self.assertRaises(exception.VolumeBackendAPIException,
+                          self.manager.enable_replication,
+                          self.ctxt,
+                          self.vol)
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'error'})
+
+    def test_enable_replication_driver_raises_exception(self):
+        self.mock_driver.replication_enable.side_effect = Exception
+        self.assertRaises(exception.VolumeBackendAPIException,
+                          self.manager.enable_replication,
+                          self.ctxt,
+                          self.vol)
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'error'})
+
+    def test_enable_replication_success(self):
+        self.manager.enable_replication(self.ctxt, self.vol)
+        # volume_update is called multiple times
+        self.mock_db.volume_update.side_effect = [self.vol, self.vol]
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'enabled'})
+
+    # disable_replication tests
+    @mock.patch('cinder.utils.require_driver_initialized')
+    def test_disable_replication_uninitialized_driver(self,
+                                                      mock_req_driver_init):
+        mock_req_driver_init.side_effect = exception.DriverNotInitialized
+        self.assertRaises(exception.DriverNotInitialized,
+                          self.manager.disable_replication,
+                          self.ctxt,
+                          self.vol)
+
+    def test_disable_replication_error_state(self):
+        self.vol['replication_status'] = 'error'
+        self.assertRaises(exception.InvalidVolume,
+                          self.manager.disable_replication,
+                          self.ctxt,
+                          self.vol)
+
+    def test_disable_replication_driver_raises_cinder_exception(self):
+        self.vol['replication_status'] = 'disabling'
+        self.mock_driver.replication_disable.side_effect = \
+            exception.CinderException
+        self.assertRaises(exception.VolumeBackendAPIException,
+                          self.manager.disable_replication,
+                          self.ctxt,
+                          self.vol)
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'error'})
+
+    def test_disable_replication_driver_raises_exception(self):
+        self.vol['replication_status'] = 'disabling'
+        self.mock_driver.replication_disable.side_effect = Exception
+        self.assertRaises(exception.VolumeBackendAPIException,
+                          self.manager.disable_replication,
+                          self.ctxt,
+                          self.vol)
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'error'})
+
+    def test_disable_replication_success(self):
+        self.vol['replication_status'] = 'disabling'
+        self.manager.disable_replication(self.ctxt, self.vol)
+        # volume_update is called multiple times
+        self.mock_db.volume_update.side_effect = [self.vol, self.vol]
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'disabled'})
+
+    # failover_replication tests
+    @mock.patch('cinder.utils.require_driver_initialized')
+    def test_failover_replication_uninitialized_driver(self,
+                                                       mock_driver_init):
+        self.vol['replication_status'] = 'enabling_secondary'
+        # validate that driver is called even if uninitialized
+        mock_driver_init.side_effect = exception.DriverNotInitialized
+        self.manager.failover_replication(self.ctxt, self.vol)
+        # volume_update is called multiple times
+        self.mock_db.volume_update.side_effect = [self.vol, self.vol]
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'failed-over'})
+
+    def test_failover_replication_error_state(self):
+        self.vol['replication_status'] = 'error'
+        self.assertRaises(exception.InvalidVolume,
+                          self.manager.failover_replication,
+                          self.ctxt,
+                          self.vol)
+
+    def test_failover_replication_driver_raises_cinder_exception(self):
+        self.vol['replication_status'] = 'enabling_secondary'
+        self.mock_driver.replication_failover.side_effect = \
+            exception.CinderException
+        self.assertRaises(exception.VolumeBackendAPIException,
+                          self.manager.failover_replication,
+                          self.ctxt,
+                          self.vol)
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'error'})
+
+    def test_failover_replication_driver_raises_exception(self):
+        self.vol['replication_status'] = 'enabling_secondary'
+        self.mock_driver.replication_failover.side_effect = Exception
+        self.assertRaises(exception.VolumeBackendAPIException,
+                          self.manager.failover_replication,
+                          self.ctxt,
+                          self.vol)
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'error'})
+
+    def test_failover_replication_success(self):
+        self.vol['replication_status'] = 'enabling_secondary'
+        self.manager.failover_replication(self.ctxt, self.vol)
+        # volume_update is called multiple times
+        self.mock_db.volume_update.side_effect = [self.vol, self.vol]
+        self.mock_db.volume_update.assert_called_with(
+            self.ctxt,
+            self.vol['id'],
+            {'replication_status': 'failed-over'})
index 830febb3f9b66fed6a49b4663ee9ae28ea2fe7a1..fc161a9f406d31a1b17db6aa65e1b846c3bc43b6 100644 (file)
@@ -1611,7 +1611,7 @@ class API(base.Base):
 
     #  Replication V2 methods ##
 
-    # NOTE(jdg): It might be kinda silly to propogate the named
+    # NOTE(jdg): It might be kinda silly to propagate the named
     # args with defaults all the way down through rpc into manager
     # but for now the consistency is useful, and there may be
     # some usefulness in the future (direct calls in manager?)
@@ -1641,7 +1641,7 @@ class API(base.Base):
         # call to driver regardless of replication_status, most likely
         # this indicates an issue with the driver, but might be useful
         # cases to  consider modifying this for in the future.
-        valid_rep_status = ['disabled']
+        valid_rep_status = ['disabled', 'failed-over', 'error']
         rep_status = volume.get('replication_status', valid_rep_status[0])
 
         if rep_status not in valid_rep_status:
index b5ecd979cac2ad5b4b44461e781a8c9324dabad8..210d978e48868c38b3653b487beb1de8934d6b61 100644 (file)
@@ -1672,7 +1672,7 @@ class ReplicaV2VD(object):
 
         The replication_driver_data response is vendor unique,
         data returned/used by the driver.  It is expected that
-        the reponse from the driver is in the appropriate db update
+        the response from the driver is in the appropriate db update
         format, in the form of a dict, where the vendor data is
         stored under the key 'replication_driver_data'
 
@@ -1689,10 +1689,10 @@ class ReplicaV2VD(object):
         """Disable replication on the specified volume.
 
         If the specified volume is currently replication enabled,
-        this method can be used to disable the replciation process
+        this method can be used to disable the replication process
         on the backend.
 
-        Note that we still send this call to a driver whos volume
+        Note that we still send this call to a driver whose volume
         may report replication-disabled already.  We do this as a
         safety mechanism to allow a driver to cleanup any mismatch
         in state between Cinder and itself.
@@ -1708,7 +1708,7 @@ class ReplicaV2VD(object):
 
         The replication_driver_data response is vendor unique,
         data returned/used by the driver.  It is expected that
-        the reponse from the driver is in the appropriate db update
+        the response from the driver is in the appropriate db update
         format, in the form of a dict, where the vendor data is
         stored under the key 'replication_driver_data'
 
@@ -1734,7 +1734,7 @@ class ReplicaV2VD(object):
         the replication target is a configured cinder backend, we'll
         just update the host column for the volume.
 
-        Very important point here is that in the case of a succesful
+        Very important point here is that in the case of a successful
         failover, we want to update the replication_status of the
         volume to "failed-over".  This way there's an indication that
         things worked as expected, and that it's evident that the volume
@@ -1744,7 +1744,7 @@ class ReplicaV2VD(object):
         :param context: security context
         :param volume: volume object returned by DB
         :param secondary: Specifies rep target to fail over to
-        :response: dict of udpates
+        :response: dict of updates
 
         So the response would take the form:
             {host: <properly formatted host string for db update>,
@@ -1780,7 +1780,7 @@ class ReplicaV2VD(object):
 
         Example response for replicating to an unmanaged backend:
         {'volume_id': volume['id'],
-         'targets':[{'type': 'managed',
+         'targets':[{'type': 'unmanaged',
                      'vendor-key-1': 'value-1'}...]
 
         NOTE: It's the responsibility of the driver to mask out any
index b9fd6b9804d32e7d25d5c2caf94cc8bd6b568af3..7fb57d4be5988c6e61b1fe3d8d457a9db632868c 100644 (file)
@@ -3197,8 +3197,19 @@ class VolumeManager(manager.SchedulerDependentManager):
         except exception.CinderException:
             err_msg = (_("Enable replication for volume failed."))
             LOG.exception(err_msg, resource=volume)
+            self.db.volume_update(context,
+                                  volume['id'],
+                                  {'replication_status': 'error'})
             raise exception.VolumeBackendAPIException(data=err_msg)
 
+        except Exception:
+            msg = _('enable_replication caused exception in driver.')
+            LOG.exception(msg, resource=volume)
+            self.db.volume_update(context,
+                                  volume['id'],
+                                  {'replication_status': 'error'})
+            raise exception.VolumeBackendAPIException(data=msg)
+
         try:
             if rep_driver_data:
                 volume = self.db.volume_update(context,
@@ -3246,7 +3257,19 @@ class VolumeManager(manager.SchedulerDependentManager):
         except exception.CinderException:
             err_msg = (_("Disable replication for volume failed."))
             LOG.exception(err_msg, resource=volume)
+            self.db.volume_update(context,
+                                  volume['id'],
+                                  {'replication_status': 'error'})
             raise exception.VolumeBackendAPIException(data=err_msg)
+
+        except Exception:
+            msg = _('disable_replication caused exception in driver.')
+            LOG.exception(msg, resource=volume)
+            self.db.volume_update(context,
+                                  volume['id'],
+                                  {'replication_status': 'error'})
+            raise exception.VolumeBackendAPIException(msg)
+
         try:
             if rep_driver_data:
                 volume = self.db.volume_update(context,
@@ -3255,6 +3278,9 @@ class VolumeManager(manager.SchedulerDependentManager):
         except exception.CinderException as ex:
             LOG.exception(_LE("Driver replication data update failed."),
                           resource=volume)
+            self.db.volume_update(context,
+                                  volume['id'],
+                                  {'replication_status': 'error'})
             raise exception.VolumeBackendAPIException(reason=ex)
         self.db.volume_update(context,
                               volume['id'],
@@ -3288,6 +3314,13 @@ class VolumeManager(manager.SchedulerDependentManager):
         # not being able to talk to the primary array that it's configured
         # to manage.
 
+        if volume['replication_status'] != 'enabling_secondary':
+            msg = (_("Unable to failover replication due to invalid "
+                     "replication status: %(status)s.") %
+                   {'status': volume['replication_status']})
+            LOG.error(msg, resource=volume)
+            raise exception.InvalidVolume(reason=msg)
+
         try:
             volume = self.db.volume_get(context, volume['id'])
             model_update = self.driver.replication_failover(context,
@@ -3320,6 +3353,14 @@ class VolumeManager(manager.SchedulerDependentManager):
                                   {'replication_status': 'error'})
             raise exception.VolumeBackendAPIException(data=err_msg)
 
+        except Exception:
+            msg = _('replication_failover caused exception in driver.')
+            LOG.exception(msg, resource=volume)
+            self.db.volume_update(context,
+                                  volume['id'],
+                                  {'replication_status': 'error'})
+            raise exception.VolumeBackendAPIException(msg)
+
         if model_update:
             try:
                 volume = self.db.volume_update(
@@ -3330,12 +3371,15 @@ class VolumeManager(manager.SchedulerDependentManager):
             except exception.CinderException as ex:
                 LOG.exception(_LE("Driver replication data update failed."),
                               resource=volume)
+                self.db.volume_update(context,
+                                      volume['id'],
+                                      {'replication_status': 'error'})
                 raise exception.VolumeBackendAPIException(reason=ex)
 
         # NOTE(jdg): We're setting replication status to failed-over
-        # which indicates the volume is ok, things went as epected but
+        # which indicates the volume is ok, things went as expected but
         # we're likely not replicating any longer because... well we
-        # did a fail-over.  In the case of admin brining primary
+        # did a fail-over.  In the case of admin bringing primary
         # back online he/she can use enable_replication to get this
         # state set back to enabled.
 
index bd1371e675b5263704a27452cc1ae2375233a72a..14475891b571ba3a962bbd7e58e4fad0bfca3f65 100644 (file)
@@ -208,7 +208,7 @@ This may be used for triggering a fail-over manually or for testing purposes.
 
 Note that ideally drivers will know how to update the volume reference properly so that Cinder is now
 pointing to the secondary.  Also, while it's not required, at this time; ideally the command would
-act as a toggle, allowing to switch back and forth betweeen primary and secondary and back to primary.
+act as a toggle, allowing to switch back and forth between primary and secondary and back to primary.
 
 **list_replication_targets**