]> review.fuel-infra Code Review - openstack-build/cinder-build.git/commitdiff
Scalable backup service - Liberty compatibility
authorLisaLi <xiaoyan.li@intel.com>
Fri, 19 Feb 2016 08:28:36 +0000 (09:28 +0100)
committerMichał Dulko <michal.dulko@intel.com>
Wed, 24 Feb 2016 09:57:09 +0000 (10:57 +0100)
To support rolling upgrades we need to make sure that Mitaka's services
are running fine with Liberty's. It gets complicated with backups as
we've strongly reworked them. Main difference is that Mitaka c-bak can
handle backup/restore of any volume and Liberty was restricted to
operate only on volumes placed on the same node.

Now when running in version heterogeneous environment we need to use old
way of backup jobs scheduling and switch to new one (round robin) only
when everything is running Mitaka.

This commit implements that by adding a dummy backup RPC API version
(1.3) that marks the beginning of scalable backups era. Jobs are
scheduled the new way only if every c-bak reports that (or higher)
version.

There are also small changes to volume.rpcapi - to fail fast if some
c-vol services aren't supporting new calls required by scalable backups
feature. This allows us to error out backups with proper message when
upgrade was done in an improper way (in Mitaka we require c-vols to be
upgraded before c-baks).

This commit also includes small changes to CinderObjectSerializer to
block tries to "forwardport" an object when sending it over RPC. If a
service receives an older object it should handle it explicitly.

Related-Blueprint: scalable-backup-service
Co-Authored-By: Michal Dulko <michal.dulko@intel.com>
Change-Id: I45324336ba00726d53cfa012e8bd498868919a8c

cinder/backup/api.py
cinder/backup/manager.py
cinder/backup/rpcapi.py
cinder/exception.py
cinder/objects/base.py
cinder/tests/unit/test_backup.py
cinder/tests/unit/test_volume_rpcapi.py
cinder/volume/rpcapi.py
releasenotes/notes/scaling-backup-service-7e5058802d2fb3dc.yaml [new file with mode: 0644]

index 8dfd078cc3612157d80191f259d1419344c590fb..9ac21ebab11931ecd024de147a3200a4f2839ba9 100644 (file)
@@ -25,6 +25,7 @@ from oslo_config import cfg
 from oslo_log import log as logging
 from oslo_utils import excutils
 from oslo_utils import strutils
+from oslo_utils import versionutils
 from pytz import timezone
 import random
 
@@ -39,6 +40,7 @@ import cinder.policy
 from cinder import quota
 from cinder import utils
 import cinder.volume
+from cinder.volume import utils as volume_utils
 
 backup_api_opts = [
     cfg.BoolOpt('backup_use_same_backend',
@@ -135,6 +137,24 @@ class API(base.Base):
 
         return backups
 
+    def _is_scalable_only(self):
+        """True if we're running in deployment where all c-bak are scalable.
+
+        We need this method to decide if we can assume that all of our c-bak
+        services are decoupled from c-vol.
+
+        FIXME(dulek): This shouldn't be needed in Newton.
+        """
+        cap = self.backup_rpcapi.client.version_cap
+        if cap:
+            cap = versionutils.convert_version_to_tuple(cap)
+            return cap >= (1, 3)  # Mitaka is marked by c-bak 1.3+.
+        else:
+            # NOTE(dulek): No version cap means we're running in an environment
+            # without c-bak services. Letting it pass as Mitaka, request will
+            # just fail anyway so it doesn't really matter.
+            return True
+
     def _az_matched(self, service, availability_zone):
         return ((not availability_zone) or
                 service.availability_zone == availability_zone)
@@ -170,14 +190,29 @@ class API(base.Base):
             idx = idx + 1
         return None
 
-    def _get_available_backup_service_host(self, host, availability_zone):
+    def _get_available_backup_service_host(self, host, az, volume_host=None):
         """Return an appropriate backup service host."""
+
+        # FIXME(dulek): We need to keep compatibility with Liberty, where c-bak
+        # were coupled with c-vol. If we're running in mixed Liberty-Mitaka
+        # environment we will be scheduling backup jobs the old way.
+        #
+        # This snippet should go away in Newton. Note that volume_host
+        # parameter will also be unnecessary then.
+        if not self._is_scalable_only():
+            if volume_host and self._is_backup_service_enabled(az,
+                                                               volume_host):
+                return volume_host
+            elif host and self._is_backup_service_enabled(az, host):
+                return host
+            else:
+                raise exception.ServiceNotFound(service_id='cinder-backup')
+
         backup_host = None
-        if host and self._is_backup_service_enabled(availability_zone, host):
+        if host and self._is_backup_service_enabled(az, host):
             backup_host = host
         if not backup_host and (not host or CONF.backup_use_same_backend):
-            backup_host = self._get_any_available_backup_service(
-                availability_zone)
+            backup_host = self._get_any_available_backup_service(az)
         if not backup_host:
             raise exception.ServiceNotFound(service_id='cinder-backup')
         return backup_host
@@ -225,7 +260,8 @@ class API(base.Base):
 
         previous_status = volume['status']
         host = self._get_available_backup_service_host(
-            None, volume.availability_zone)
+            None, volume.availability_zone,
+            volume_utils.extract_host(volume.host, 'host'))
 
         # Reserve a quota before setting volume status and backup status
         try:
index 3aa486d8bb602cbc4107d2698f9f92876db0cf25..abca503aa2e3fb2cc62145561596ce1fae0985cd 100644 (file)
@@ -81,7 +81,7 @@ QUOTAS = quota.QUOTAS
 class BackupManager(manager.SchedulerDependentManager):
     """Manages backup of block storage devices."""
 
-    RPC_API_VERSION = '1.2'
+    RPC_API_VERSION = '1.3'
 
     target = messaging.Target(version=RPC_API_VERSION)
 
index 9ed3ccacc2b352299498be78f9f4ca7f693aff19..245cc07b8247368ac40d97e59ed0cb53f5e12bb4 100644 (file)
@@ -35,9 +35,12 @@ class BackupAPI(rpc.RPCAPI):
 
         1.0 - Initial version.
         1.1 - Changed methods to accept backup objects instead of IDs.
+        1.2 - A version that got in by mistake (without breaking anything).
+        1.3 - Dummy version bump to mark start of having cinder-backup service
+              decoupled from cinder-volume.
     """
 
-    RPC_API_VERSION = '1.1'
+    RPC_API_VERSION = '1.3'
     TOPIC = CONF.backup_topic
     BINARY = 'cinder-backup'
 
index 88b3c3da94fc16396861ae1c1b338be7240d3812..db01e8276a58220b9db9d1c7bf8e9e4e07d78efb 100644 (file)
@@ -351,6 +351,10 @@ class ServiceNotFound(NotFound):
     message = _("Service %(service_id)s could not be found.")
 
 
+class ServiceTooOld(Invalid):
+    message = _("Service is too old to fulfil this request.")
+
+
 class HostNotFound(NotFound):
     message = _("Host %(host)s could not be found.")
 
index 90174fb6dee5dfd4e5bfbe921d718ec4edf3d3b1..78d5b8002693a4eb645e08bb232c1644eb9b38b3 100644 (file)
@@ -18,6 +18,7 @@ import contextlib
 import datetime
 
 from oslo_log import log as logging
+from oslo_utils import versionutils
 from oslo_versionedobjects import base
 from oslo_versionedobjects import fields
 
@@ -394,8 +395,20 @@ class CinderObjectSerializer(base.VersionedObjectSerializer):
 
     def _get_capped_obj_version(self, obj):
         objname = obj.obj_name()
-        objver = OBJ_VERSIONS.get(self.version_cap, {})
-        return objver.get(objname, None)
+        version_dict = OBJ_VERSIONS.get(self.version_cap, {})
+        version_cap = version_dict.get(objname, None)
+
+        if version_cap:
+            cap_tuple = versionutils.convert_version_to_tuple(version_cap)
+            obj_tuple = versionutils.convert_version_to_tuple(obj.VERSION)
+            if cap_tuple > obj_tuple:
+                # NOTE(dulek): Do not set version cap to be higher than actual
+                # object version as we don't support "forwardporting" of
+                # objects. If service will receive an object that's too old it
+                # should handle it explicitly.
+                version_cap = None
+
+        return version_cap
 
     def serialize_entity(self, context, entity):
         if isinstance(entity, (tuple, list, set, dict)):
index 0796911ad42e78e7730a122b898722f82fce1f3e..bbbf9a798bf67e011effc899c7eb4471daddc14c 100644 (file)
@@ -508,6 +508,26 @@ class BackupTestCase(BaseBackupTest):
         self.assertEqual(fields.BackupStatus.ERROR, backup['status'])
         self.assertTrue(mock_run_backup.called)
 
+    @mock.patch('cinder.utils.brick_get_connector_properties')
+    @mock.patch('cinder.utils.temporary_chown')
+    @mock.patch('six.moves.builtins.open')
+    def test_create_backup_old_volume_service(self, mock_open,
+                                              mock_temporary_chown,
+                                              mock_get_backup_device):
+        """Test error handling when there's too old volume service in env."""
+        vol_id = self._create_volume_db_entry(size=1)
+        backup = self._create_backup_db_entry(volume_id=vol_id)
+
+        with mock.patch.object(self.backup_mgr.volume_rpcapi.client,
+                               'version_cap', '1.37'):
+            self.assertRaises(exception.ServiceTooOld,
+                              self.backup_mgr.create_backup, self.ctxt, backup)
+            vol = db.volume_get(self.ctxt, vol_id)
+            self.assertEqual('available', vol['status'])
+            self.assertEqual('error_backing-up', vol['previous_status'])
+            backup = db.backup_get(self.ctxt, backup.id)
+            self.assertEqual(fields.BackupStatus.ERROR, backup['status'])
+
     @mock.patch('cinder.utils.brick_get_connector_properties')
     @mock.patch('cinder.volume.rpcapi.VolumeAPI.get_backup_device')
     @mock.patch('cinder.utils.temporary_chown')
@@ -619,6 +639,31 @@ class BackupTestCase(BaseBackupTest):
         self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status'])
         self.assertTrue(mock_run_restore.called)
 
+    @mock.patch('cinder.utils.brick_get_connector_properties')
+    def test_restore_backup_with_old_volume_service(self, mock_get_conn):
+        """Test error handling when an error occurs during backup restore."""
+        vol_id = self._create_volume_db_entry(status='restoring-backup',
+                                              size=1)
+        backup = self._create_backup_db_entry(
+            status=fields.BackupStatus.RESTORING, volume_id=vol_id)
+
+        # Unmock secure_file_operations_enabled
+        self.volume_patches['secure_file_operations_enabled'].stop()
+
+        with mock.patch.object(self.backup_mgr.volume_rpcapi.client,
+                               'version_cap', '1.37'):
+            self.assertRaises(exception.ServiceTooOld,
+                              self.backup_mgr.restore_backup,
+                              self.ctxt,
+                              backup,
+                              vol_id)
+            vol = db.volume_get(self.ctxt, vol_id)
+            self.assertEqual('error_restoring', vol['status'])
+            backup = db.backup_get(self.ctxt, backup.id)
+            self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status'])
+
+        self.volume_patches['secure_file_operations_enabled'].start()
+
     def test_restore_backup_with_bad_service(self):
         """Test error handling.
 
index 04119477ecf84cc1e0cfbed75aaa719f68a2a0e1..aa34d7eb1a01873649e501ac7ddb7b6f83f65c87 100644 (file)
@@ -24,6 +24,7 @@ from oslo_serialization import jsonutils
 
 from cinder import context
 from cinder import db
+from cinder import exception
 from cinder import objects
 from cinder import test
 from cinder.tests.unit import fake_backup
@@ -599,15 +600,28 @@ class VolumeRpcAPITestCase(test.TestCase):
                               volume=self.fake_volume,
                               version='1.30')
 
-    def test_get_backup_device(self):
+    @mock.patch('oslo_messaging.RPCClient.can_send_version', return_value=True)
+    def test_get_backup_device(self, mock_can_send_version):
         self._test_volume_api('get_backup_device',
                               rpc_method='call',
                               backup=self.fake_backup_obj,
                               volume=self.fake_volume_obj,
                               version='1.38')
 
-    def test_secure_file_operations_enabled(self):
+        mock_can_send_version.return_value = False
+        self.assertRaises(exception.ServiceTooOld, self._test_volume_api,
+                          'get_backup_device', rpc_method='call',
+                          backup=self.fake_backup_obj,
+                          volume=self.fake_volume_obj, version='1.38')
+
+    @mock.patch('oslo_messaging.RPCClient.can_send_version', return_value=True)
+    def test_secure_file_operations_enabled(self, mock_can_send_version):
         self._test_volume_api('secure_file_operations_enabled',
                               rpc_method='call',
                               volume=self.fake_volume_obj,
                               version='1.38')
+
+        mock_can_send_version.return_value = False
+        self.assertRaises(exception.ServiceTooOld, self._test_volume_api,
+                          'secure_file_operations_enabled', rpc_method='call',
+                          volume=self.fake_volume_obj, version='1.38')
index 238216cac2f5e37e044018a9308d69bc1964628c..8bf81eae53abb95b8370b5c3b25a7e5f675f8c08 100644 (file)
@@ -19,6 +19,8 @@ Client side of the volume RPC API.
 from oslo_config import cfg
 from oslo_serialization import jsonutils
 
+from cinder import exception
+from cinder.i18n import _
 from cinder import quota
 from cinder import rpc
 from cinder.volume import utils
@@ -334,12 +336,22 @@ class VolumeAPI(rpc.RPCAPI):
         return cctxt.call(ctxt, 'get_capabilities', discover=discover)
 
     def get_backup_device(self, ctxt, backup, volume):
+        if not self.client.can_send_version('1.38'):
+            msg = _('One of cinder-volume services is too old to accept such '
+                    'request. Are you running mixed Liberty-Mitaka '
+                    'cinder-volumes?')
+            raise exception.ServiceTooOld(msg)
         new_host = utils.extract_host(volume.host)
         cctxt = self.client.prepare(server=new_host, version='1.38')
         return cctxt.call(ctxt, 'get_backup_device',
                           backup=backup)
 
     def secure_file_operations_enabled(self, ctxt, volume):
+        if not self.client.can_send_version('1.38'):
+            msg = _('One of cinder-volume services is too old to accept such '
+                    'request. Are you running mixed Liberty-Mitaka '
+                    'cinder-volumes?')
+            raise exception.ServiceTooOld(msg)
         new_host = utils.extract_host(volume.host)
         cctxt = self.client.prepare(server=new_host, version='1.38')
         return cctxt.call(ctxt, 'secure_file_operations_enabled',
diff --git a/releasenotes/notes/scaling-backup-service-7e5058802d2fb3dc.yaml b/releasenotes/notes/scaling-backup-service-7e5058802d2fb3dc.yaml
new file mode 100644 (file)
index 0000000..8a8e49b
--- /dev/null
@@ -0,0 +1,8 @@
+---
+features:
+  - cinder-backup service is now decoupled from
+    cinder-volume, which allows more flexible scaling.
+upgrade:
+  - As cinder-backup was strongly reworked in this
+    release, the recommended upgrade order when executing
+    live (rolling) upgrade is c-api->c-sch->c-vol->c-bak.