From 05a516da01225bed8b99ca49e558d40d71df3fe1 Mon Sep 17 00:00:00 2001 From: LisaLi Date: Fri, 19 Feb 2016 09:28:36 +0100 Subject: [PATCH] Scalable backup service - Liberty compatibility To support rolling upgrades we need to make sure that Mitaka's services are running fine with Liberty's. It gets complicated with backups as we've strongly reworked them. Main difference is that Mitaka c-bak can handle backup/restore of any volume and Liberty was restricted to operate only on volumes placed on the same node. Now when running in version heterogeneous environment we need to use old way of backup jobs scheduling and switch to new one (round robin) only when everything is running Mitaka. This commit implements that by adding a dummy backup RPC API version (1.3) that marks the beginning of scalable backups era. Jobs are scheduled the new way only if every c-bak reports that (or higher) version. There are also small changes to volume.rpcapi - to fail fast if some c-vol services aren't supporting new calls required by scalable backups feature. This allows us to error out backups with proper message when upgrade was done in an improper way (in Mitaka we require c-vols to be upgraded before c-baks). This commit also includes small changes to CinderObjectSerializer to block tries to "forwardport" an object when sending it over RPC. If a service receives an older object it should handle it explicitly. Related-Blueprint: scalable-backup-service Co-Authored-By: Michal Dulko Change-Id: I45324336ba00726d53cfa012e8bd498868919a8c --- cinder/backup/api.py | 46 +++++++++++++++++-- cinder/backup/manager.py | 2 +- cinder/backup/rpcapi.py | 5 +- cinder/exception.py | 4 ++ cinder/objects/base.py | 17 ++++++- cinder/tests/unit/test_backup.py | 45 ++++++++++++++++++ cinder/tests/unit/test_volume_rpcapi.py | 18 +++++++- cinder/volume/rpcapi.py | 12 +++++ ...aling-backup-service-7e5058802d2fb3dc.yaml | 8 ++++ 9 files changed, 146 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/scaling-backup-service-7e5058802d2fb3dc.yaml diff --git a/cinder/backup/api.py b/cinder/backup/api.py index 8dfd078cc..9ac21ebab 100644 --- a/cinder/backup/api.py +++ b/cinder/backup/api.py @@ -25,6 +25,7 @@ from oslo_config import cfg from oslo_log import log as logging from oslo_utils import excutils from oslo_utils import strutils +from oslo_utils import versionutils from pytz import timezone import random @@ -39,6 +40,7 @@ import cinder.policy from cinder import quota from cinder import utils import cinder.volume +from cinder.volume import utils as volume_utils backup_api_opts = [ cfg.BoolOpt('backup_use_same_backend', @@ -135,6 +137,24 @@ class API(base.Base): return backups + def _is_scalable_only(self): + """True if we're running in deployment where all c-bak are scalable. + + We need this method to decide if we can assume that all of our c-bak + services are decoupled from c-vol. + + FIXME(dulek): This shouldn't be needed in Newton. + """ + cap = self.backup_rpcapi.client.version_cap + if cap: + cap = versionutils.convert_version_to_tuple(cap) + return cap >= (1, 3) # Mitaka is marked by c-bak 1.3+. + else: + # NOTE(dulek): No version cap means we're running in an environment + # without c-bak services. Letting it pass as Mitaka, request will + # just fail anyway so it doesn't really matter. + return True + def _az_matched(self, service, availability_zone): return ((not availability_zone) or service.availability_zone == availability_zone) @@ -170,14 +190,29 @@ class API(base.Base): idx = idx + 1 return None - def _get_available_backup_service_host(self, host, availability_zone): + def _get_available_backup_service_host(self, host, az, volume_host=None): """Return an appropriate backup service host.""" + + # FIXME(dulek): We need to keep compatibility with Liberty, where c-bak + # were coupled with c-vol. If we're running in mixed Liberty-Mitaka + # environment we will be scheduling backup jobs the old way. + # + # This snippet should go away in Newton. Note that volume_host + # parameter will also be unnecessary then. + if not self._is_scalable_only(): + if volume_host and self._is_backup_service_enabled(az, + volume_host): + return volume_host + elif host and self._is_backup_service_enabled(az, host): + return host + else: + raise exception.ServiceNotFound(service_id='cinder-backup') + backup_host = None - if host and self._is_backup_service_enabled(availability_zone, host): + if host and self._is_backup_service_enabled(az, host): backup_host = host if not backup_host and (not host or CONF.backup_use_same_backend): - backup_host = self._get_any_available_backup_service( - availability_zone) + backup_host = self._get_any_available_backup_service(az) if not backup_host: raise exception.ServiceNotFound(service_id='cinder-backup') return backup_host @@ -225,7 +260,8 @@ class API(base.Base): previous_status = volume['status'] host = self._get_available_backup_service_host( - None, volume.availability_zone) + None, volume.availability_zone, + volume_utils.extract_host(volume.host, 'host')) # Reserve a quota before setting volume status and backup status try: diff --git a/cinder/backup/manager.py b/cinder/backup/manager.py index 3aa486d8b..abca503aa 100644 --- a/cinder/backup/manager.py +++ b/cinder/backup/manager.py @@ -81,7 +81,7 @@ QUOTAS = quota.QUOTAS class BackupManager(manager.SchedulerDependentManager): """Manages backup of block storage devices.""" - RPC_API_VERSION = '1.2' + RPC_API_VERSION = '1.3' target = messaging.Target(version=RPC_API_VERSION) diff --git a/cinder/backup/rpcapi.py b/cinder/backup/rpcapi.py index 9ed3ccacc..245cc07b8 100644 --- a/cinder/backup/rpcapi.py +++ b/cinder/backup/rpcapi.py @@ -35,9 +35,12 @@ class BackupAPI(rpc.RPCAPI): 1.0 - Initial version. 1.1 - Changed methods to accept backup objects instead of IDs. + 1.2 - A version that got in by mistake (without breaking anything). + 1.3 - Dummy version bump to mark start of having cinder-backup service + decoupled from cinder-volume. """ - RPC_API_VERSION = '1.1' + RPC_API_VERSION = '1.3' TOPIC = CONF.backup_topic BINARY = 'cinder-backup' diff --git a/cinder/exception.py b/cinder/exception.py index 88b3c3da9..db01e8276 100644 --- a/cinder/exception.py +++ b/cinder/exception.py @@ -351,6 +351,10 @@ class ServiceNotFound(NotFound): message = _("Service %(service_id)s could not be found.") +class ServiceTooOld(Invalid): + message = _("Service is too old to fulfil this request.") + + class HostNotFound(NotFound): message = _("Host %(host)s could not be found.") diff --git a/cinder/objects/base.py b/cinder/objects/base.py index 90174fb6d..78d5b8002 100644 --- a/cinder/objects/base.py +++ b/cinder/objects/base.py @@ -18,6 +18,7 @@ import contextlib import datetime from oslo_log import log as logging +from oslo_utils import versionutils from oslo_versionedobjects import base from oslo_versionedobjects import fields @@ -394,8 +395,20 @@ class CinderObjectSerializer(base.VersionedObjectSerializer): def _get_capped_obj_version(self, obj): objname = obj.obj_name() - objver = OBJ_VERSIONS.get(self.version_cap, {}) - return objver.get(objname, None) + version_dict = OBJ_VERSIONS.get(self.version_cap, {}) + version_cap = version_dict.get(objname, None) + + if version_cap: + cap_tuple = versionutils.convert_version_to_tuple(version_cap) + obj_tuple = versionutils.convert_version_to_tuple(obj.VERSION) + if cap_tuple > obj_tuple: + # NOTE(dulek): Do not set version cap to be higher than actual + # object version as we don't support "forwardporting" of + # objects. If service will receive an object that's too old it + # should handle it explicitly. + version_cap = None + + return version_cap def serialize_entity(self, context, entity): if isinstance(entity, (tuple, list, set, dict)): diff --git a/cinder/tests/unit/test_backup.py b/cinder/tests/unit/test_backup.py index 0796911ad..bbbf9a798 100644 --- a/cinder/tests/unit/test_backup.py +++ b/cinder/tests/unit/test_backup.py @@ -508,6 +508,26 @@ class BackupTestCase(BaseBackupTest): self.assertEqual(fields.BackupStatus.ERROR, backup['status']) self.assertTrue(mock_run_backup.called) + @mock.patch('cinder.utils.brick_get_connector_properties') + @mock.patch('cinder.utils.temporary_chown') + @mock.patch('six.moves.builtins.open') + def test_create_backup_old_volume_service(self, mock_open, + mock_temporary_chown, + mock_get_backup_device): + """Test error handling when there's too old volume service in env.""" + vol_id = self._create_volume_db_entry(size=1) + backup = self._create_backup_db_entry(volume_id=vol_id) + + with mock.patch.object(self.backup_mgr.volume_rpcapi.client, + 'version_cap', '1.37'): + self.assertRaises(exception.ServiceTooOld, + self.backup_mgr.create_backup, self.ctxt, backup) + vol = db.volume_get(self.ctxt, vol_id) + self.assertEqual('available', vol['status']) + self.assertEqual('error_backing-up', vol['previous_status']) + backup = db.backup_get(self.ctxt, backup.id) + self.assertEqual(fields.BackupStatus.ERROR, backup['status']) + @mock.patch('cinder.utils.brick_get_connector_properties') @mock.patch('cinder.volume.rpcapi.VolumeAPI.get_backup_device') @mock.patch('cinder.utils.temporary_chown') @@ -619,6 +639,31 @@ class BackupTestCase(BaseBackupTest): self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status']) self.assertTrue(mock_run_restore.called) + @mock.patch('cinder.utils.brick_get_connector_properties') + def test_restore_backup_with_old_volume_service(self, mock_get_conn): + """Test error handling when an error occurs during backup restore.""" + vol_id = self._create_volume_db_entry(status='restoring-backup', + size=1) + backup = self._create_backup_db_entry( + status=fields.BackupStatus.RESTORING, volume_id=vol_id) + + # Unmock secure_file_operations_enabled + self.volume_patches['secure_file_operations_enabled'].stop() + + with mock.patch.object(self.backup_mgr.volume_rpcapi.client, + 'version_cap', '1.37'): + self.assertRaises(exception.ServiceTooOld, + self.backup_mgr.restore_backup, + self.ctxt, + backup, + vol_id) + vol = db.volume_get(self.ctxt, vol_id) + self.assertEqual('error_restoring', vol['status']) + backup = db.backup_get(self.ctxt, backup.id) + self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status']) + + self.volume_patches['secure_file_operations_enabled'].start() + def test_restore_backup_with_bad_service(self): """Test error handling. diff --git a/cinder/tests/unit/test_volume_rpcapi.py b/cinder/tests/unit/test_volume_rpcapi.py index 04119477e..aa34d7eb1 100644 --- a/cinder/tests/unit/test_volume_rpcapi.py +++ b/cinder/tests/unit/test_volume_rpcapi.py @@ -24,6 +24,7 @@ from oslo_serialization import jsonutils from cinder import context from cinder import db +from cinder import exception from cinder import objects from cinder import test from cinder.tests.unit import fake_backup @@ -599,15 +600,28 @@ class VolumeRpcAPITestCase(test.TestCase): volume=self.fake_volume, version='1.30') - def test_get_backup_device(self): + @mock.patch('oslo_messaging.RPCClient.can_send_version', return_value=True) + def test_get_backup_device(self, mock_can_send_version): self._test_volume_api('get_backup_device', rpc_method='call', backup=self.fake_backup_obj, volume=self.fake_volume_obj, version='1.38') - def test_secure_file_operations_enabled(self): + mock_can_send_version.return_value = False + self.assertRaises(exception.ServiceTooOld, self._test_volume_api, + 'get_backup_device', rpc_method='call', + backup=self.fake_backup_obj, + volume=self.fake_volume_obj, version='1.38') + + @mock.patch('oslo_messaging.RPCClient.can_send_version', return_value=True) + def test_secure_file_operations_enabled(self, mock_can_send_version): self._test_volume_api('secure_file_operations_enabled', rpc_method='call', volume=self.fake_volume_obj, version='1.38') + + mock_can_send_version.return_value = False + self.assertRaises(exception.ServiceTooOld, self._test_volume_api, + 'secure_file_operations_enabled', rpc_method='call', + volume=self.fake_volume_obj, version='1.38') diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index 238216cac..8bf81eae5 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -19,6 +19,8 @@ Client side of the volume RPC API. from oslo_config import cfg from oslo_serialization import jsonutils +from cinder import exception +from cinder.i18n import _ from cinder import quota from cinder import rpc from cinder.volume import utils @@ -334,12 +336,22 @@ class VolumeAPI(rpc.RPCAPI): return cctxt.call(ctxt, 'get_capabilities', discover=discover) def get_backup_device(self, ctxt, backup, volume): + if not self.client.can_send_version('1.38'): + msg = _('One of cinder-volume services is too old to accept such ' + 'request. Are you running mixed Liberty-Mitaka ' + 'cinder-volumes?') + raise exception.ServiceTooOld(msg) new_host = utils.extract_host(volume.host) cctxt = self.client.prepare(server=new_host, version='1.38') return cctxt.call(ctxt, 'get_backup_device', backup=backup) def secure_file_operations_enabled(self, ctxt, volume): + if not self.client.can_send_version('1.38'): + msg = _('One of cinder-volume services is too old to accept such ' + 'request. Are you running mixed Liberty-Mitaka ' + 'cinder-volumes?') + raise exception.ServiceTooOld(msg) new_host = utils.extract_host(volume.host) cctxt = self.client.prepare(server=new_host, version='1.38') return cctxt.call(ctxt, 'secure_file_operations_enabled', diff --git a/releasenotes/notes/scaling-backup-service-7e5058802d2fb3dc.yaml b/releasenotes/notes/scaling-backup-service-7e5058802d2fb3dc.yaml new file mode 100644 index 000000000..8a8e49b2b --- /dev/null +++ b/releasenotes/notes/scaling-backup-service-7e5058802d2fb3dc.yaml @@ -0,0 +1,8 @@ +--- +features: + - cinder-backup service is now decoupled from + cinder-volume, which allows more flexible scaling. +upgrade: + - As cinder-backup was strongly reworked in this + release, the recommended upgrade order when executing + live (rolling) upgrade is c-api->c-sch->c-vol->c-bak. -- 2.45.2