Cinder replication V2

author John Griffith <john.griffith8@gmail.com>

Thu, 9 Jul 2015 21:11:54 +0000 (21:11 +0000)

committer John Griffith <john.griffith8@gmail.com>

Mon, 24 Aug 2015 21:37:15 +0000 (15:37 -0600)
author John Griffith <john.griffith8@gmail.com>
Thu, 9 Jul 2015 21:11:54 +0000 (21:11 +0000)
committer John Griffith <john.griffith8@gmail.com>
Mon, 24 Aug 2015 21:37:15 +0000 (15:37 -0600)
diff --git a/cinder/api/contrib/admin_actions.py b/cinder/api/contrib/admin_actions.py

index 46bb3fd8ac207e8d5a7f2b9d8e192a0236a212ca..cb665d2875a56a620234db84173f65b096cfc95c 100644 (file)
--- a/cinder/api/contrib/admin_actions.py
+++ b/cinder/api/contrib/admin_actions.py
@@ -255,6 +255,85 @@ class VolumeAdminController(AdminController):
                                                          new_volume, error)
          return {'save_volume_id': ret}
  
+    @wsgi.action('os-enable_replication')
+    def _enable_replication(self, req, id, body):
+        """Enable/Re-enable replication on replciation capable volume.
+
+        Admin only method, used primarily for cases like disable/re-enable
+        replication proces on a replicated volume for maintenance or testing
+        """
+
+        context = req.environ['cinder.context']
+        self.authorize(context, 'enable_replication')
+        try:
+            volume = self._get(context, id)
+        except exception.VolumeNotFound as e:
+            raise exc.HTTPNotFound(explanation=e.msg)
+        self.volume_api.enable_replication(context, volume)
+        return webob.Response(status_int=202)
+
+    @wsgi.action('os-disable_replication')
+    def _disable_replication(self, req, id, body):
+        """Disable replication on replciation capable volume.
+
+        Admin only method, used to instruct a backend to
+        disable replication process to a replicated volume.
+        """
+
+        context = req.environ['cinder.context']
+        self.authorize(context, 'disable_replication')
+        try:
+            volume = self._get(context, id)
+        except exception.VolumeNotFound as e:
+            raise exc.HTTPNotFound(explanation=e.msg)
+        self.volume_api.disable_replication(context, volume)
+        return webob.Response(status_int=202)
+
+    @wsgi.action('os-failover_replication')
+    def _failover_replication(self, req, id, body):
+        """Failover a replicating volume to it's secondary
+
+        Admin only method, used to force a fail-over to
+        a replication target. Optional secondary param to
+        indicate what device to promote in case of multiple
+        replication targets.
+        """
+
+        context = req.environ['cinder.context']
+        self.authorize(context, 'failover_replication')
+        try:
+            volume = self._get(context, id)
+        except exception.VolumeNotFound as e:
+            raise exc.HTTPNotFound(explanation=e.msg)
+        secondary = body['os-failover_replication'].get('secondary', None)
+        self.volume_api.failover_replication(context, volume, secondary)
+        return webob.Response(status_int=202)
+
+    @wsgi.action('os-list_replication_targets')
+    def _list_replication_targets(self, req, id, body):
+        """Show replication targets for the specified host.
+
+        Admin only method, used to display configured
+        replication target devices for the specified volume.
+
+        """
+
+        # TODO(jdg): We'll want an equivalent type of command
+        # to querie a backend host (show configuration for a
+        # specified backend), but priority here is for
+        # a volume as it's likely to be more useful.
+        context = req.environ['cinder.context']
+        self.authorize(context, 'list_replication_targets')
+        try:
+            volume = self._get(context, id)
+        except exception.VolumeNotFound as e:
+            raise exc.HTTPNotFound(explanation=e.msg)
+
+        # Expected response is a dict is a dict with unkonwn
+        # keys.  Should be of the form:
+        #    {'volume_id': xx, 'replication_targets':[{k: v, k1: v1...}]}
+        return self.volume_api.list_replication_targets(context, volume)
+
  
  class SnapshotAdminController(AdminController):
      """AdminController for Snapshots."""
diff --git a/cinder/tests/unit/policy.json b/cinder/tests/unit/policy.json

index e656d06859f4958991787e91db4186c0aae9b4e6..0948d3dd0afe9954423fb1109b4bfc567de99799 100644 (file)
--- a/cinder/tests/unit/policy.json
+++ b/cinder/tests/unit/policy.json
@@ -34,7 +34,10 @@
      "volume:update_readonly_flag": "",
      "volume:retype": "",
      "volume:copy_volume_to_image": "",
-
+    "volume:enable_replication": "rule:admin_api",
+    "volume:disable_replication": "rule:admin_api",
+    "volume:failover_replication": "rule:admin_api",
+    "volume:list_replication_targets": "rule:admin_api",
      "volume_extension:volume_admin_actions:reset_status": "rule:admin_api",
      "volume_extension:snapshot_admin_actions:reset_status": "rule:admin_api",
      "volume_extension:backup_admin_actions:reset_status": "rule:admin_api",
diff --git a/cinder/tests/unit/test_volume.py b/cinder/tests/unit/test_volume.py

index 07486b6277b67606819730c05e83a7e38b8dbe6b..256c272090bebfa52b67161bf5f00792154d7dc1 100644 (file)
--- a/cinder/tests/unit/test_volume.py
+++ b/cinder/tests/unit/test_volume.py
@@ -5849,6 +5849,61 @@ class GenericVolumeDriverTestCase(DriverTestCase):
                                                        volume_file)
              self.assertEqual(i, backup_service.restore.call_count)
  
+    def test_enable_replication_invalid_state(self):
+        volume_api = cinder.volume.api.API()
+        ctxt = context.get_admin_context()
+        volume = tests_utils.create_volume(ctxt,
+                                           size=1,
+                                           host=CONF.host,
+                                           replication_status='enabled')
+
+        self.assertRaises(exception.InvalidVolume,
+                          volume_api.enable_replication,
+                          ctxt, volume)
+
+    def test_enable_replication(self):
+        volume_api = cinder.volume.api.API()
+        ctxt = context.get_admin_context()
+
+        volume = tests_utils.create_volume(self.context,
+                                           size=1,
+                                           host=CONF.host,
+                                           replication_status='disabled')
+        with mock.patch.object(volume_rpcapi.VolumeAPI,
+                               'enable_replication') as mock_enable_rep:
+            volume_api.enable_replication(ctxt, volume)
+            self.assertTrue(mock_enable_rep.called)
+
+    def test_disable_replication_invalid_state(self):
+        volume_api = cinder.volume.api.API()
+        ctxt = context.get_admin_context()
+        volume = tests_utils.create_volume(ctxt,
+                                           size=1,
+                                           host=CONF.host,
+                                           replication_status='invalid-state')
+
+        self.assertRaises(exception.InvalidVolume,
+                          volume_api.disable_replication,
+                          ctxt, volume)
+
+    def test_disable_replication(self):
+        volume_api = cinder.volume.api.API()
+        ctxt = context.get_admin_context()
+
+        volume = tests_utils.create_volume(self.context,
+                                           size=1,
+                                           host=CONF.host,
+                                           replication_status='disabled')
+
+        with mock.patch.object(volume_rpcapi.VolumeAPI,
+                               'disable_replication') as mock_disable_rep:
+            volume_api.disable_replication(ctxt, volume)
+            self.assertTrue(mock_disable_rep.called)
+
+            volume['replication_status'] = 'enabled'
+            volume_api.disable_replication(ctxt, volume)
+            self.assertTrue(mock_disable_rep.called)
+
  
  class LVMISCSIVolumeDriverTestCase(DriverTestCase):
      """Test case for VolumeDriver"""
diff --git a/cinder/tests/unit/test_volume_utils.py b/cinder/tests/unit/test_volume_utils.py

index 5502c33baf8456e6911800c62cebaed97ed1f149..41b1a0add3f8509b3d26a5b676a838f289b88dbd 100644 (file)
--- a/cinder/tests/unit/test_volume_utils.py
+++ b/cinder/tests/unit/test_volume_utils.py
@@ -794,3 +794,11 @@ class VolumeUtilsTestCase(test.TestCase):
              mock_db, 'volume-d8cd1fe')
  
          self.assertFalse(result)
+
+    def test_convert_config_string_to_dict(self):
+        test_string = "{'key-1'='val-1' 'key-2'='val-2' 'key-3'='val-3'}"
+        expected_dict = {'key-1': 'val-1', 'key-2': 'val-2', 'key-3': 'val-3'}
+
+        self.assertEqual(
+            expected_dict,
+            volume_utils.convert_config_string_to_dict(test_string))
diff --git a/cinder/volume/api.py b/cinder/volume/api.py

index c06895aed8298dc6b41d707c56db716767f45af5..af64a635329d84e9f138e7cb97984e7945ccf1d9 100644 (file)
--- a/cinder/volume/api.py
+++ b/cinder/volume/api.py
@@ -1505,6 +1505,121 @@ class API(base.Base):
                       resource=vol_ref)
              return vol_ref
  
+    #  Replication V2 methods ##
+
+    # NOTE(jdg): It might be kinda silly to propogate the named
+    # args with defaults all the way down through rpc into manager
+    # but for now the consistency is useful, and there may be
+    # some usefulness in the future (direct calls in manager?)
+
+    # NOTE(jdg): Relying solely on the volume-type quota mechanism
+    # need to consider looking at how we handle configured backends
+    # WRT quotas, do they count against normal quotas or not?  For
+    # now they're a special resource, so no.
+
+    @wrap_check_policy
+    def enable_replication(self, ctxt, volume):
+
+        # NOTE(jdg): details like sync vs async
+        # and replica count are to be set via the
+        # volume-type and config files.
+
+        # Get a fresh ref from db and check status
+        volume = self.db.volume_get(ctxt, volume['id'])
+
+        # NOTE(jdg): Set a valid status as a var to minimize errors via typos
+        # also, use a list, we may want to add to it some day
+
+        # TODO(jdg): Move these up to a global list for each call and ban the
+        # free form typing of states and state checks going forward
+
+        # NOTE(jdg): There may be a need for some backends to allow this
+        # call to driver regardless of replication_status, most likely
+        # this indicates an issue with the driver, but might be useful
+        # cases to  consider modifying this for in the future.
+        valid_rep_status = ['disabled']
+        rep_status = volume.get('replication_status', valid_rep_status[0])
+
+        if rep_status not in valid_rep_status:
+            msg = (_("Invalid status to enable replication. "
+                     "valid states are: %(valid_states)s, "
+                     "current replication-state is: %(curr_state)s."),
+                   {'valid_states': valid_rep_status,
+                    'curr_state': rep_status})
+
+            raise exception.InvalidVolume(reason=msg)
+
+        vref = self.db.volume_update(ctxt,
+                                     volume['id'],
+                                     {'replication_status': 'enabling'})
+        self.volume_rpcapi.enable_replication(ctxt, vref)
+
+    @wrap_check_policy
+    def disable_replication(self, ctxt, volume):
+
+        valid_disable_status = ['disabled', 'enabled']
+
+        # NOTE(jdg): Just use disabled here (item 1 in the list) this
+        # way if someone says disable_rep on a volume that's not being
+        # replicated we just say "ok, done"
+        rep_status = volume.get('replication_status', valid_disable_status[0])
+
+        if rep_status not in valid_disable_status:
+            msg = (_("Invalid status to disable replication. "
+                     "valid states are: %(valid_states)s, "
+                     "current replication-state is: %(curr_state)s."),
+                   {'valid_states': valid_disable_status,
+                    'curr_state': rep_status})
+
+            raise exception.InvalidVolume(reason=msg)
+
+        vref = self.db.volume_update(ctxt,
+                                     volume['id'],
+                                     {'replication_status': 'disabling'})
+
+        self.volume_rpcapi.disable_replication(ctxt, vref)
+
+    @wrap_check_policy
+    def failover_replication(self,
+                             ctxt,
+                             volume,
+                             secondary=None):
+
+        # FIXME(jdg):  What is the secondary argument?
+        # for managed secondaries that's easy; it's a host
+        # for others, it's tricky; will propose a format for
+        # secondaries that includes an ID/Name that can be
+        # used as a handle
+        valid_failover_status = ['enabled']
+        rep_status = volume.get('replication_status', 'na')
+
+        if rep_status not in valid_failover_status:
+            msg = (_("Invalid status to failover replication. "
+                     "valid states are: %(valid_states)s, "
+                     "current replication-state is: %(curr_state)s."),
+                   {'valid_states': valid_failover_status,
+                    'curr_state': rep_status})
+
+            raise exception.InvalidVolume(reason=msg)
+
+        vref = self.db.volume_update(
+            ctxt,
+            volume['id'],
+            {'replication_status': 'enabling_secondary'})
+
+        self.volume_rpcapi.failover_replication(ctxt,
+                                                vref,
+                                                secondary)
+
+    @wrap_check_policy
+    def list_replication_targets(self, ctxt, volume):
+
+        # NOTE(jdg): This collects info for the specified volume
+        # it is NOT an error if the volume is not being replicated
+        # also, would be worth having something at a backend/host
+        # level to show an admin how a backend is configured.
+        return self.volume_rpcapi.list_replication_targets(ctxt, volume)
+
  
  class HostAPI(base.Base):
      def __init__(self):
diff --git a/cinder/volume/driver.py b/cinder/volume/driver.py

index 59983b3a52764176ac6971a321eef34d76f29296..7f6c8646e2a26a09cf8a7a8c83b5b01ed2337375 100644 (file)
--- a/cinder/volume/driver.py
+++ b/cinder/volume/driver.py
@@ -221,6 +221,20 @@ volume_opts = [
                  help='List of options that control which trace info '
                       'is written to the DEBUG log level to assist '
                       'developers. Valid values are method and api.'),
+    cfg.BoolOpt('managed_replication_target',
+                default=True,
+                help='There are two types of target configurations '
+                     'managed (replicate to another configured backend) '
+                     'or unmanaged (replicate to a device not managed '
+                     'by Cinder).'),
+    cfg.ListOpt('replication_devices',
+                default=None,
+                help="List of k/v pairs representing a replication target "
+                     "for this backend device.  For unmanaged the format "
+                     "is: {'key-1'='val1' 'key-2'='val2'...},{...} "
+                     "and for managed devices its simply a list of valid "
+                     "configured backend_names that the driver supports "
+                     "replicating to: backend-a,bakcend-b...")
  ]
  
  # for backward compatibility
@@ -291,6 +305,7 @@ class BaseVD(object):
              self.configuration.append_config_values(volume_opts)
              self.configuration.append_config_values(iser_opts)
              utils.setup_tracing(self.configuration.safe_get('trace_flags'))
+
          self.set_execute(execute)
          self._stats = {}
  
@@ -1384,6 +1399,187 @@ class ManageableVD(object):
          pass
  
  
+@six.add_metaclass(abc.ABCMeta)
+class ReplicaV2VD(object):
+    """Cinder replication functionality.
+
+    The Cinder replication functionality is set up primarily through
+    the use of volume-types in conjunction with the filter scheduler.
+    This requires:
+    1. The driver reports "replication = True" in it's capabilities
+    2. The cinder.conf file includes the valid_replication_devices section
+
+    The driver configuration is expected to take one of the following two
+    forms, see devref replication docs for details.
+
+    Note we provide cinder.volume.utils.convert_config_string_to_dict
+    to parse this out into a usable proper dictionary.
+
+    """
+
+    @abc.abstractmethod
+    def replication_enable(self, context, volume):
+        """Enable replication on a replication capable volume.
+
+        If the volume was created on a replication_enabled host this method
+        is used to re-enable replication for the volume.
+
+        Primarily we only want this for testing/admin purposes.  The idea
+        being that the bulk of the replication details are handled by the
+        type definition and the driver; however disable/enable(re-enable) is
+        provided for admins to test or do maintenance which is a
+        requirement by some cloud-providers.
+
+        NOTE: This is intended as an ADMIN only call and is not
+        intended to be used by end-user to enable replication.  We're
+        leaving that to volume-type info, this is for things like
+        maintenance or testing.
+
+
+        :param context: security context
+        :param volume: volume object returned by DB
+        :response: {replication_driver_data: vendor-data} DB update
+
+        The replication_driver_data response is vendor unique,
+        data returned/used by the driver.  It is expected that
+        the reponse from the driver is in the appropriate db update
+        format, in the form of a dict, where the vendor data is
+        stored under the key 'replication_driver_data'
+
+        """
+
+        # TODO(jdg): Put a check in at API layer to verify the host is
+        # replication capable before even issuing this call (can just
+        # check against the volume-type for said volume as well)
+
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def replication_disable(self, context, volume):
+        """Disable replication on the specified volume.
+
+        If the specified volume is currently replication enabled,
+        this method can be used to disable the replciation process
+        on the backend.
+
+        Note that we still send this call to a driver whos volume
+        may report replication-disabled already.  We do this as a
+        safety mechanism to allow a driver to cleanup any mismatch
+        in state between Cinder and itself.
+
+        This is intended as an ADMIN only call to allow for
+        maintenance and testing.  If a driver receives this call
+        and the process fails for some reason the driver should
+        return a status update to "replication_status=disable_failed"
+
+        :param context: security context
+        :param volume: volume object returned by DB
+        :response: {replication_driver_data: vendor-data} DB update
+
+        The replication_driver_data response is vendor unique,
+        data returned/used by the driver.  It is expected that
+        the reponse from the driver is in the appropriate db update
+        format, in the form of a dict, where the vendor data is
+        stored under the key 'replication_driver_data'
+
+        """
+
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def replication_failover(self, context, volume, secondary):
+        """Force failover to a secondary replication target.
+
+        Forces the failover action of a replicated volume to one of its
+        secondary/target devices.  By default the choice of target devices
+        is left up to the driver.  In particular we expect one way
+        replication here, but are providing a mechanism for 'n' way
+        if supported/configured.
+
+        Currently we leave it up to the driver to figure out how/what
+        to do here.  Rather than doing things like ID swaps, we instead
+        just let the driver figure out how/where to route things.
+
+        In cases where we might want to drop a volume-service node and
+        the replication target is a configured cinder backend, we'll
+        just update the host column for the volume.
+
+        Very important point here is that in the case of a succesful
+        failover, we want to update the replication_status of the
+        volume to "failed-over".  This way there's an indication that
+        things worked as expected, and that it's evident that the volume
+        may no longer be replicating to another backend (primary burst
+        in to flames).  This status will be set by the manager.
+
+        :param context: security context
+        :param volume: volume object returned by DB
+        :param secondary: Specifies rep target to fail over to
+        :response: dict of udpates
+
+        So the response would take the form:
+            {host: <properly formatted host string for db update>,
+             model_update: {standard_model_update_KVs},
+             replication_driver_data: xxxxxxx}
+
+        It is expected that the format of these responses are in a consumable
+        format to be used in a db.update call directly.
+
+        Additionally we utilize exception catching to report back to the
+        manager when things went wrong and to inform the caller on how
+        to proceed.
+
+        """
+
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def list_replication_targets(self, context, vref):
+        """Provide a means to obtain replication targets for a volume.
+
+        This method is used to query a backend to get the current
+        replication config info for the specified volume.
+
+        In the case of a volume that isn't being replicated,
+        the driver should return an empty list.
+
+
+        Example response for replicating to a managed backend:
+        {'volume_id': volume['id'],
+         'targets':[{'type': 'managed',
+                     'backend_name': 'backend_name'}...]
+
+        Example response for replicating to an unmanaged backend:
+        {'volume_id': volume['id'],
+         'targets':[{'type': 'managed',
+                     'vendor-key-1': 'value-1'}...]
+
+        NOTE: It's the responsibility of the driver to mask out any
+        passwords or sensitive information.  Also the format of the
+        response allows mixed (managed/unmanaged) targets, even though
+        the first iteration does not support configuring the driver in
+        such a manner.
+
+        """
+
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def get_replication_updates(self, context):
+        """Provide a means to obtain status updates from backend.
+
+        Provides a concise update for backends to report any errors
+        or problems with replicating volumes.  The intent is we only
+        return something here if there's an error or a problem, and to
+        notify where the backend thinks the volume is.
+
+        param: context: context of caller (probably don't need)
+        returns: [{volid: n, status: ok|error,...}]
+        """
+        # NOTE(jdg): flush this out with implementations so we all
+        # have something usable here
+        raise NotImplementedError()
+
+
  @six.add_metaclass(abc.ABCMeta)
  class ReplicaVD(object):
      @abc.abstractmethod
@@ -1928,6 +2124,7 @@ class ISCSIDriver(VolumeDriver):
          data["driver_version"] = '1.0'
          data["storage_protocol"] = 'iSCSI'
          data["pools"] = []
+        data["replication_enabled"] = False
  
          self._update_pools_and_stats(data)
  
diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py

index 0edb0d410db8aee513b69135110faaa12eaf8e2f..fa5a2d1eecd3103204f089bde91ef7cce3e5fc37 100644 (file)
--- a/cinder/volume/manager.py
+++ b/cinder/volume/manager.py
@@ -189,7 +189,7 @@ def locked_snapshot_operation(f):
  class VolumeManager(manager.SchedulerDependentManager):
      """Manages attachable block storage devices."""
  
-    RPC_API_VERSION = '1.26'
+    RPC_API_VERSION = '1.27'
  
      target = messaging.Target(version=RPC_API_VERSION)
  
@@ -405,6 +405,10 @@ class VolumeManager(manager.SchedulerDependentManager):
          self.publish_service_capabilities(ctxt)
  
          # conditionally run replication status task
+
+        # FIXME(jdg): This should go away or be handled differently
+        #  if/when we're ready for V2 replication
+
          stats = self.driver.get_volume_stats(refresh=True)
          if stats and stats.get('replication', False):
  
@@ -413,6 +417,7 @@ class VolumeManager(manager.SchedulerDependentManager):
                  self._update_replication_relationship_status(ctxt)
  
              self.add_periodic_task(run_replication_task)
+
          LOG.info(_LI("Driver initialization completed successfully."),
                   resource={'type': 'driver',
                             'id': self.driver.__class__.__name__})
@@ -1538,6 +1543,24 @@ class VolumeManager(manager.SchedulerDependentManager):
                  # queue it to be sent to the Schedulers.
                  self.update_service_capabilities(volume_stats)
  
+                if volume_stats.get('replication_enabled', False):
+                    # replciation_status provides a concise update of
+                    # replicating volumes and any error conditions
+                    # detected by the driver.  The intent is we don't
+                    # expect/worry about updates so long as nothing
+                    # changes, but if something goes wrong this is a
+                    # handy mechanism to update the manager and the db
+                    # possibly let the admin/user be notified
+
+                    # TODO(jdg): Refactor the check/update pieces to a
+                    # helper method we can share
+                    # We want to leverage some of the same update model
+                    # that we have in the targets update call
+
+                    replication_updates = self.driver.get_replication_updates()
+                    for update in replication_updates:
+                        pass
+
      def _append_volume_stats(self, vol_stats):
          pools = vol_stats.get('pools', None)
          if pools and isinstance(pools, list):
@@ -2706,3 +2729,204 @@ class VolumeManager(manager.SchedulerDependentManager):
                                  for key in model_update.iterkeys()}
              self.db.volume_update(ctxt.elevated(), new_volume['id'],
                                    model_update_new)
+
+    # Replication V2 methods
+    def enable_replication(self, context, volume):
+        """Enable replication on a replication capable volume.
+
+        If the volume was created on a replication_enabled host this method
+        is used to enable replication for the volume. Primarily used for
+        testing and maintenance.
+
+        :param context: security context
+        :param volume: volume object returned by DB
+        """
+
+        # NOTE(jdg): We're going to do fresh get from the DB and verify that
+        # we are in an expected state ('enabling')
+        volume = self.db.volume_get(context, volume['id'])
+        if volume['replication_status'] != 'enabling':
+            raise exception.InvalidVolume()
+
+        try:
+            rep_driver_data = self.driver.replication_enable(context,
+                                                             volume)
+        except exception.CinderException:
+            err_msg = (_("Enable replication for volume failed."))
+            LOG.exception(err_msg, resource=volume)
+            raise exception.VolumeBackendAPIException(data=err_msg)
+        try:
+            if rep_driver_data:
+                volume = self.db.volume_update(context,
+                                               volume['id'],
+                                               rep_driver_data)
+        except exception.CinderException as ex:
+            LOG.exception(_LE("Driver replication data update failed."),
+                          resource=volume)
+            raise exception.VolumeBackendAPIException(reason=ex)
+        self.db.volume_update(context, volume['id'],
+                              {'replication_status': 'enabled'})
+
+    def disable_replication(self, context, volume):
+        """Disable replication on the specified volume.
+
+        If the specified volume is currently replication enabled,
+        this method can be used to disable the replication process
+        on the backend.  This method assumes that we checked
+        replication status in the API layer to ensure we should
+        send this call to the driver.
+
+        :param context: security context
+        :param volume: volume object returned by DB
+        """
+
+        volume = self.db.volume_get(context, volume['id'])
+        if volume['replication_status'] != 'disabling':
+            raise exception.InvalidVolume()
+
+        try:
+            rep_driver_data = self.driver.replication_disable(context,
+                                                              volume)
+        except exception.CinderException:
+            err_msg = (_("Disable replication for volume failed."))
+            LOG.exception(err_msg, resource=volume)
+            raise exception.VolumeBackendAPIException(data=err_msg)
+        try:
+            if rep_driver_data:
+                volume = self.db.volume_update(context,
+                                               volume['id'],
+                                               rep_driver_data)
+        except exception.CinderException as ex:
+            LOG.exception(_LE("Driver replication data update failed."),
+                          resource=volume)
+            raise exception.VolumeBackendAPIException(reason=ex)
+        self.db.volume_update(context,
+                              volume['id'],
+                              {'replication_status': 'disabled'})
+
+    def failover_replication(self, context, volume, secondary=None):
+        """Force failover to a secondary replication target.
+
+        Forces the failover action of a replicated volume to one of its
+        secondary/target devices.  By default the choice of target devices
+        is left up to the driver.  In particular we expect one way
+        replication here, but are providing a mechanism for 'n' way
+        if supported/configrued.
+
+        Currently we leave it up to the driver to figure out how/what
+        to do here.  Rather than doing things like ID swaps, we instead
+        just let the driver figure out how/where to route things.
+
+        In cases where we might want to drop a volume-service node and
+        the replication target is a configured cinder backend, we'll
+        just update the host column for the volume.
+
+        :param context: security context
+        :param volume: volume object returned by DB
+        :param secondary: Specifies rep target to fail over to
+        """
+        try:
+            volume_updates = self.driver.replication_failover(context,
+                                                              volume,
+                                                              secondary)
+
+            # volume_updates is a dict containing a report of relevant
+            # items based on the backend and how it operates or what it needs
+            # {'host': 'secondary-configured-cinder-backend',
+            #  'model_update': {'update-all-the-provider-info-etc'},
+            #  'replication_driver_data': 'driver-specific-stuff-for-db'}
+            # Where 'host' is a valid cinder host string like
+            #  'foo@bar#baz'
+            # model_update and replication_driver_data are required
+
+        except exception.CinderException:
+
+            # FIXME(jdg): We need to create a few different exceptions here
+            # and handle each differently:
+            # 1. I couldn't failover, but the original setup is ok so proceed
+            #    as if this were never called
+            # 2. I ran into a problem and I have no idea what state things
+            #    are in, so set volume to error
+            # 3. I ran into a problem and a human needs to come fix me up
+
+            err_msg = (_("Replication failover for volume failed."))
+            LOG.exception(err_msg, resource=volume)
+            self.db.volume_update(context,
+                                  volume['id'],
+                                  {'replication_status': 'error'})
+            raise exception.VolumeBackendAPIException(data=err_msg)
+
+        # TODO(jdg): Come back and condense thes into a single update
+        update = {}
+        model_update = volume_updates.get('model_update', None)
+        driver_update = volume_updates.get('replication_driver_data', None)
+        host_update = volume_updates.get('host', None)
+
+        if model_update:
+            update['model'] = model_update
+        if driver_update:
+            update['replication_driver_data'] = driver_update
+        if host_update:
+            update['host'] = host_update
+
+        if update:
+            try:
+                volume = self.db.volume_update(
+                    context,
+                    volume['id'],
+                    update)
+
+            except exception.CinderException as ex:
+                LOG.exception(_LE("Driver replication data update failed."),
+                              resource=volume)
+                raise exception.VolumeBackendAPIException(reason=ex)
+
+        # NOTE(jdg): We're setting replication status to failed-over
+        # which indicates the volume is ok, things went as epected but
+        # we're likely not replicating any longer because... well we
+        # did a fail-over.  In the case of admin brining primary
+        # back online he/she can use enable_replication to get this
+        # state set back to enabled.
+
+        # Also, in the case of multiple targets, the driver can update
+        # status in the rep-status checks if it still has valid replication
+        # targets that the volume is being replicated to.
+
+        self.db.volume_update(context,
+                              volume['id'],
+                              {'replication_status': 'failed-over'})
+
+    def list_replication_targets(self, context, volume):
+        """Provide a means to obtain replication targets for a volume.
+
+        This method is used to query a backend to get the current
+        replication config info for the specified volume.
+
+        In the case of a volume that isn't being replicated,
+        the driver should return an empty list.
+
+
+        Example response for replicating to a managed backend:
+            {'volume_id': volume['id'],
+             'targets':[{'managed_host': 'backend_name'}...]
+
+        Example response for replicating to an unmanaged backend:
+            {'volume_id': volume['id'], 'targets':[{'san_ip': '1.1.1.1',
+                                                    'san_login': 'admin'},
+                                                    ....]}
+
+        NOTE: It's the responsibility of the driver to mask out any
+        passwords or sensitive information.
+
+        """
+
+        try:
+            replication_targets = self.driver.list_replication_targets(context,
+                                                                       volume)
+
+        except exception.CinderException:
+            err_msg = (_("Get replication targets failed."))
+            LOG.exception(err_msg)
+            raise exception.VolumeBackendAPIException(data=err_msg)
+
+        return replication_targets
diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py

index e980b46a4f1efbd915ff29bbb5f80e64163861f3..ab3c171803b2d36e89e2d17249b6953b7c41c305 100644 (file)
--- a/cinder/volume/rpcapi.py
+++ b/cinder/volume/rpcapi.py
@@ -72,6 +72,7 @@ class VolumeAPI(object):
          1.26 - Adds support for sending objects over RPC in
                 create_consistencygroup(), create_consistencygroup_from_src(),
                 update_consistencygroup() and delete_consistencygroup().
+        1.27 - Adds support for replication V2
      """
  
      BASE_RPC_API_VERSION = '1.0'
@@ -81,7 +82,7 @@ class VolumeAPI(object):
          target = messaging.Target(topic=CONF.volume_topic,
                                    version=self.BASE_RPC_API_VERSION)
          serializer = objects_base.CinderObjectSerializer()
-        self.client = rpc.get_client(target, '1.26', serializer=serializer)
+        self.client = rpc.get_client(target, '1.27', serializer=serializer)
  
      def create_consistencygroup(self, ctxt, group, host):
          new_host = utils.extract_host(host)
@@ -260,3 +261,29 @@ class VolumeAPI(object):
                     volume=volume,
                     new_volume=new_volume,
                     volume_status=original_volume_status)
+
+    def enable_replication(self, ctxt, volume):
+        new_host = utils.extract_host(volume['host'])
+        cctxt = self.client.prepare(server=new_host, version='1.27')
+        cctxt.cast(ctxt, 'enable_replication', volume=volume)
+
+    def disable_replication(self, ctxt, volume):
+        new_host = utils.extract_host(volume['host'])
+        cctxt = self.client.prepare(server=new_host, version='1.27')
+        cctxt.cast(ctxt, 'disable_replication',
+                   volume=volume)
+
+    def failover_replication(self,
+                             ctxt,
+                             volume,
+                             secondary=None):
+        new_host = utils.extract_host(volume['host'])
+        cctxt = self.client.prepare(server=new_host, version='1.27')
+        cctxt.cast(ctxt, 'failover_replication',
+                   volume=volume,
+                   secondary=secondary)
+
+    def list_replication_targets(self, ctxt, volume):
+        new_host = utils.extract_host(volume['host'])
+        cctxt = self.client.prepare(server=new_host, version='1.27')
+        return cctxt.call(ctxt, 'list_replication_targets', volume=volume)
diff --git a/cinder/volume/utils.py b/cinder/volume/utils.py

index c2d297bbd9a22adcf49f1df417a462bb73b4f654..893cf3bef8b53bcf0f2843be8e9fb94d1f445459 100644 (file)
--- a/cinder/volume/utils.py
+++ b/cinder/volume/utils.py
@@ -15,6 +15,7 @@
  """Volume-related Utilities and helpers."""
  
  
+import ast
  import math
  import re
  import uuid
@@ -569,3 +570,27 @@ def check_already_managed_volume(db, vol_name):
      except (exception.VolumeNotFound, ValueError):
          return False
      return False
+
+
+def convert_config_string_to_dict(config_string):
+    """Convert config file replication string to a dict.
+
+    The only supported form is as follows:
+    "{'key-1'='val-1' 'key-2'='val-2'...}"
+
+    :param config_string: Properly formatted string to convert to dict.
+    :response: dict of string values
+    """
+
+    resultant_dict = {}
+
+    try:
+        st = config_string.replace("=", ":")
+        st = st.replace(" ", ", ")
+        resultant_dict = ast.literal_eval(st)
+    except Exception:
+        LOG.warning(_LW("Error encountered translating config_string: "
+                        "%(config_string)s to dict"),
+                    {'config_string': config_string})
+
+    return resultant_dict
diff --git a/doc/source/devref/index.rst b/doc/source/devref/index.rst

index 00509ebaeecbbb282e02f41fde35f7b8ab84bb41..86e64b578ddb2b62d6de35966950d829ed5cd6cb 100644 (file)
--- a/doc/source/devref/index.rst
+++ b/doc/source/devref/index.rst
@@ -31,6 +31,7 @@ Programming HowTos and Tutorials
      addmethod.openstackapi
      drivers
      gmr
+    replication
  
  
  Background Concepts for Cinder
diff --git a/doc/source/devref/replication.rst b/doc/source/devref/replication.rst

new file mode 100644 (file)

index 0000000..fd7fc41
--- /dev/null
+++ b/doc/source/devref/replication.rst
@@ -0,0 +1,166 @@
+Replication
+============
+
+How to implement replication features in a backend driver.
+
+For backend devices that offer replication features, Cinder
+provides a common mechanism for exposing that functionality
+on a volume per volume basis while still trying to allow
+flexibility for the varying implementation and requirements
+of all the different backend devices.
+
+Most of the configuration is done via the cinder.conf file
+under the driver section and through the use of volume types.
+
+Config file examples
+--------------------
+
+The cinder.conf file is used to specify replication target
+devices for a specific driver.  There are two types of target
+devices that can be configured:
+
+   1. Cinder Managed (represented by the volume-backend name)
+   2. External devices (require vendor specific data to configure)
+
+NOTE that it is expected to be an error to have both managed and unmanaged replication
+config variables set for a single driver.
+
+Cinder managed target device
+-----------------------------
+
+In the case of a Cinder managed target device, we simply
+use another Cinder configured backend as the replication
+target.
+
+For example if we have two backend devices foo and biz that
+can replicate to each other, we can set up backend biz as
+a replication target for device foo using the following
+config entries::
+
+    .....
+    [driver-biz]
+    volume_driver=xxxx
+    volume_backend_name=biz
+
+    [driver-foo]
+    volume_driver=xxxx
+    volume_backend_name=foo
+    managed_replication_target=True
+    replication_devices=volume_backend_name-1,volume_backend_name-2....
+
+Notice that the only change from the usual driver configuration
+section here is the addition of the replication_devices option.
+
+
+Unmanaged target device
+------------------------
+
+In some cases the replication target device may not be a
+configured Cinder backend.  In this case it's the configured
+drivers responsibility to route commands to the active device
+and to update provider info to ensure the proper iSCSI targets
+are being used.
+
+This type of config changes only slightly, and instead of using
+a backend_name, it takes the vendor unique config options::
+
+    .....
+    [driver-foo]
+    volume_driver=xxxx
+    volume_backend_name=foo
+    managed_replication_target=False
+    replication_devices={'key1'='val1' 'key2'='val2' ...},
+                        {'key7'='val7'....},...
+
+Note the key/value entries can be whatever the device requires, we treat the actual
+variable in the config parser as a comma delimited list, the {} and = notations are
+convenient/common parser delimeters, and the K/V entries are space seperated.
+
+We provide a literal evaluator to convert these entries into a proper dict, thus
+format is extremely important here.
+
+
+Volume Types / Extra Specs
+---------------------------
+In order for a user to specify they'd like a replicated volume, there needs to be
+a corresponding Volume Type created by the Cloud Administrator.
+
+There's a good deal of flexibility by using volume types.  The scheduler can
+send the create request to a backend that provides replication by simply
+providing the replication=enabled key to the extra-specs of the volume type.
+
+For example, if the type was set to simply create the volume on any (or if you only had one)
+backend that supports replication, the extra-specs entry would be::
+
+    {replication: enabled}
+
+If you needed to provide a specific backend device (multiple backends supporting replication)::
+    {replication: enabled, volume_backend_name: foo}
+
+Additionally you could provide additional details using scoped keys::
+    {replication: enabled, volume_backend_name: foo,
+     replication:replication_type: async}
+
+Again, it's up to the driver to parse the volume type info on create and set things up
+as requested.  While the scoping key can be anything, it's strongly recommended that all
+backends utilize the same key (replication) for consistency and to make things easier for
+the Cloud Administrator.
+
+Capabilities reporting
+----------------------
+The following entries are expected to be added to the stats/capabilities update for
+replication configured devices::
+
+    stats["replication_enabled"] = True|False
+    stats["replication_type"] = ['async', 'sync'...]
+    stats["replication_count"] = len(self.cluster_pairs)
+
+Required methods
+-----------------
+The number of API methods associated with replication are intentionally very limited, and are
+Admin only methods.
+
+They include::
+    replication_enable(self, context, volume)
+    replication_disable(self, context, volume)
+    replication_failover(self, context, volume)
+    list_replication_targets(self, context)
+
+**replication_enable**
+
+Used to notify the driver that we would like to enable replication on a replication capable volume.
+NOTE this is NOT used as the initial create replication command, that's handled by the volume-type at
+create time.  This is provided as a method for an Admin that may have needed to disable replication
+on a volume for maintenance or whatever reason to signify that they'd like to "resume" replication on
+the given volume.
+
+**replication_disable**
+
+Used to notify the driver that we would like to disable replication on a replication capable volume.
+This again would be used by a Cloud Administrator for things like maintenance etc.
+
+**replication_failover**
+
+Used to instruct the backend to fail over to the secondary/target device on a replication capable volume.
+This may be used for triggering a fail-over manually or for testing purposes.
+
+Note that ideally drivers will know how to update the volume reference properly so that Cinder is now
+pointing to the secondary.  Also, while it's not required, at this time; ideally the command would
+act as a toggle, allowing to switch back and forth betweeen primary and secondary and back to primary.
+
+**list_replication_targets**
+
+Used by the admin to query a volume for a list of configured replication targets
+The expected return for this call is expeceted to mimic the form used in the config file.
+
+For a volume replicating to managed replication targets::
+
+    {'volume_id': volume['id'], 'targets':[{'type': 'managed',
+                                            'backend_name': 'backend_name'}...]
+
+For a volume replicating to external/unmanaged targets::
+
+    {'volume_id': volume['id'], 'targets':[{'type': 'unmanaged',
+                                            'san_ip': '127.0.0.1',
+                                            'san_login': 'admin'...}...]
+
diff --git a/etc/cinder/policy.json b/etc/cinder/policy.json

index 5a520c528bde37dc5c0c91cd4e0ee098eea54b12..7bbe497537e48a27673d8b4d2a2510c83578b87a 100644 (file)
--- a/etc/cinder/policy.json
+++ b/etc/cinder/policy.json
@@ -64,6 +64,11 @@
      "volume_extension:replication:promote": "rule:admin_api",
      "volume_extension:replication:reenable": "rule:admin_api",
  
+    "volume:enable_replication": "rule:admin_api",
+    "volume:disable_replication": "rule:admin_api",
+    "volume:failover_replication": "rule:admin_api",
+    "volume:list_replication_targets": "rule:admin_api",
+
      "backup:create" : "",
      "backup:delete": "",
      "backup:get": "",
author	John Griffith <john.griffith8@gmail.com>
	Thu, 9 Jul 2015 21:11:54 +0000 (21:11 +0000)
committer	John Griffith <john.griffith8@gmail.com>
	Mon, 24 Aug 2015 21:37:15 +0000 (15:37 -0600)
cinder/api/contrib/admin_actions.py		patch \| blob \| history
cinder/tests/unit/policy.json		patch \| blob \| history
cinder/tests/unit/test_volume.py		patch \| blob \| history
cinder/tests/unit/test_volume_utils.py		patch \| blob \| history
cinder/volume/api.py		patch \| blob \| history
cinder/volume/driver.py		patch \| blob \| history
cinder/volume/manager.py		patch \| blob \| history
cinder/volume/rpcapi.py		patch \| blob \| history
cinder/volume/utils.py		patch \| blob \| history
doc/source/devref/index.rst		patch \| blob \| history
doc/source/devref/replication.rst	[new file with mode: 0644]	patch \| blob
etc/cinder/policy.json		patch \| blob \| history