From: Gorka Eguileor Date: Fri, 21 Aug 2015 17:13:50 +0000 (+0200) Subject: Remove API races from attach and detach methods X-Git-Url: https://review.fuel-infra.org/gitweb?a=commitdiff_plain;h=c7bfd636b32cfeebeb45135a6aedcd8e9214ca11;p=openstack-build%2Fcinder-build.git Remove API races from attach and detach methods This patch uses compare-and-swap to perform atomic DB changes to remove API races in attach and detach related methods. Races are removes from the following methods: - attach - roll_detaching - begin_detaching - unreserve_volume - reserve_volume Specs: https://review.openstack.org/232599/ Implements: blueprint cinder-volume-active-active-support Closes-Bug: #1238093 Change-Id: I646d946209e0921d1056ee89e59b82b814bf9c15 --- diff --git a/cinder/db/api.py b/cinder/db/api.py index dcd302ba8..92565a3a4 100644 --- a/cinder/db/api.py +++ b/cinder/db/api.py @@ -275,6 +275,10 @@ def volume_update_status_based_on_attachment(context, volume_id): return IMPL.volume_update_status_based_on_attachment(context, volume_id) +def volume_has_attachments_filter(): + return IMPL.volume_has_attachments_filter() + + #################### @@ -406,10 +410,11 @@ def volume_admin_metadata_delete(context, volume_id, key): return IMPL.volume_admin_metadata_delete(context, volume_id, key) -def volume_admin_metadata_update(context, volume_id, metadata, delete): +def volume_admin_metadata_update(context, volume_id, metadata, delete, + add=True, update=True): """Update metadata if it exists, otherwise create it.""" return IMPL.volume_admin_metadata_update(context, volume_id, metadata, - delete) + delete, add, update) ################## diff --git a/cinder/db/sqlalchemy/api.py b/cinder/db/sqlalchemy/api.py index 0dd40d1c2..ee0af713f 100644 --- a/cinder/db/sqlalchemy/api.py +++ b/cinder/db/sqlalchemy/api.py @@ -39,10 +39,11 @@ import osprofiler.sqlalchemy import six import sqlalchemy from sqlalchemy import MetaData -from sqlalchemy import or_, case +from sqlalchemy import or_, and_, case from sqlalchemy.orm import joinedload, joinedload_all from sqlalchemy.orm import RelationshipProperty from sqlalchemy.schema import Table +from sqlalchemy import sql from sqlalchemy.sql.expression import desc from sqlalchemy.sql.expression import literal_column from sqlalchemy.sql.expression import true @@ -1829,6 +1830,13 @@ def volume_update_status_based_on_attachment(context, volume_id): return volume_ref +def volume_has_attachments_filter(): + return sql.exists().where( + and_(models.Volume.id == models.VolumeAttachment.volume_id, + models.VolumeAttachment.attach_status != 'detached', + ~models.VolumeAttachment.deleted)) + + #################### @@ -1863,7 +1871,7 @@ def _volume_x_metadata_get_item(context, volume_id, key, model, notfound_exec, def _volume_x_metadata_update(context, volume_id, metadata, delete, model, - session=None): + session=None, add=True, update=True): session = session or get_session() metadata = metadata.copy() @@ -1887,7 +1895,7 @@ def _volume_x_metadata_update(context, volume_id, metadata, delete, model, for row in db_meta: if row.key in metadata: value = metadata.pop(row.key) - if row.value != value: + if row.value != value and update: # ORM objects will not be saved until we do the bulk save row.value = value save.append(row) @@ -1895,8 +1903,9 @@ def _volume_x_metadata_update(context, volume_id, metadata, delete, model, skip.append(row) # We also want to save non-existent metadata - save.extend(model(key=key, value=value, volume_id=volume_id) - for key, value in metadata.items()) + if add: + save.extend(model(key=key, value=value, volume_id=volume_id) + for key, value in metadata.items()) # Do a bulk save if save: session.bulk_save_objects(save, update_changed_only=True) @@ -2016,10 +2025,10 @@ def _volume_admin_metadata_get(context, volume_id, session=None): @require_admin_context @require_volume_exists def _volume_admin_metadata_update(context, volume_id, metadata, delete, - session=None): + session=None, add=True, update=True): return _volume_x_metadata_update(context, volume_id, metadata, delete, models.VolumeAdminMetadata, - session=session) + session=session, add=add, update=update) @require_admin_context @@ -2042,8 +2051,10 @@ def volume_admin_metadata_delete(context, volume_id, key): @require_admin_context @require_volume_exists @_retry_on_deadlock -def volume_admin_metadata_update(context, volume_id, metadata, delete): - return _volume_admin_metadata_update(context, volume_id, metadata, delete) +def volume_admin_metadata_update(context, volume_id, metadata, delete, + add=True, update=True): + return _volume_admin_metadata_update(context, volume_id, metadata, delete, + add=add, update=update) ################### diff --git a/cinder/tests/unit/test_volume.py b/cinder/tests/unit/test_volume.py index 344715517..4941362b7 100644 --- a/cinder/tests/unit/test_volume.py +++ b/cinder/tests/unit/test_volume.py @@ -23,6 +23,7 @@ import socket import sys import tempfile import time +import uuid import enum import eventlet @@ -2920,24 +2921,12 @@ class VolumeTestCase(BaseVolumeTestCase): self.assertEqual("uploading", vol['status']) self.assertEqual("detached", vol['attach_status']) - @mock.patch.object(cinder.volume.api.API, 'update') - @mock.patch.object(db.sqlalchemy.api, 'volume_get') - def test_reserve_volume_success(self, volume_get, volume_update): - fake_volume = { - 'id': self.FAKE_UUID, - 'status': 'available' - } - - volume_get.return_value = fake_volume - volume_update.return_value = fake_volume - - self.assertIsNone(cinder.volume.api.API().reserve_volume( - self.context, - fake_volume, - )) - - self.assertTrue(volume_get.called) - self.assertTrue(volume_update.called) + def test_reserve_volume_success(self): + volume = tests_utils.create_volume(self.context, status='available') + cinder.volume.api.API().reserve_volume(self.context, volume) + volume_db = db.volume_get(self.context, volume.id) + self.assertEqual('attaching', volume_db.status) + db.volume_destroy(self.context, volume.id) def test_reserve_volume_in_attaching(self): self._test_reserve_volume_bad_status('attaching') @@ -2946,44 +2935,31 @@ class VolumeTestCase(BaseVolumeTestCase): self._test_reserve_volume_bad_status('maintenance') def _test_reserve_volume_bad_status(self, status): - fake_volume = { - 'id': self.FAKE_UUID, - 'status': status - } + volume = tests_utils.create_volume(self.context, status=status) + self.assertRaises(exception.InvalidVolume, + cinder.volume.api.API().reserve_volume, + self.context, + volume) + db.volume_destroy(self.context, volume.id) - with mock.patch.object(db.sqlalchemy.api, 'volume_get') as mock_get: - mock_get.return_value = fake_volume - self.assertRaises(exception.InvalidVolume, - cinder.volume.api.API().reserve_volume, - self.context, - fake_volume) - self.assertTrue(mock_get.called) + def test_unreserve_volume_success_in_use(self): + UUID = six.text_type(uuid.uuid4()) + volume = tests_utils.create_volume(self.context, status='attaching') + tests_utils.attach_volume(self.context, volume.id, UUID, + 'attached_host', 'mountpoint', mode='rw') - @mock.patch.object(db.sqlalchemy.api, 'volume_get') - @mock.patch.object(db, 'volume_attachment_get_used_by_volume_id') - @mock.patch.object(cinder.volume.api.API, 'update') - def test_unreserve_volume_success(self, volume_get, - volume_attachment_get_used_by_volume_id, - volume_update): - fake_volume = { - 'id': self.FAKE_UUID, - 'status': 'attaching' - } - fake_attachments = [{'volume_id': self.FAKE_UUID, - 'instance_uuid': 'fake_instance_uuid'}] + cinder.volume.api.API().unreserve_volume(self.context, volume) - volume_get.return_value = fake_volume - volume_attachment_get_used_by_volume_id.return_value = fake_attachments - volume_update.return_value = fake_volume + db_volume = db.volume_get(self.context, volume.id) + self.assertEqual('in-use', db_volume.status) - self.assertIsNone(cinder.volume.api.API().unreserve_volume( - self.context, - fake_volume - )) + def test_unreserve_volume_success_available(self): + volume = tests_utils.create_volume(self.context, status='attaching') - self.assertTrue(volume_get.called) - self.assertTrue(volume_attachment_get_used_by_volume_id.called) - self.assertTrue(volume_update.called) + cinder.volume.api.API().unreserve_volume(self.context, volume) + + db_volume = db.volume_get(self.context, volume.id) + self.assertEqual('available', db_volume.status) def test_concurrent_volumes_get_different_targets(self): """Ensure multiple concurrent volumes get different targets.""" @@ -3810,35 +3786,28 @@ class VolumeTestCase(BaseVolumeTestCase): 'name', 'description') - @mock.patch.object(db.sqlalchemy.api, 'volume_get') - def test_begin_detaching_fails_available(self, volume_get): + def test_begin_detaching_fails_available(self): volume_api = cinder.volume.api.API() - volume = tests_utils.create_volume(self.context, **self.volume_params) - volume_get.return_value = volume + volume = tests_utils.create_volume(self.context, status='available') # Volume status is 'available'. self.assertRaises(exception.InvalidVolume, volume_api.begin_detaching, self.context, volume) - volume_get.assert_called_once_with(self.context, volume['id']) - volume_get.reset_mock() - volume['status'] = "in-use" - volume['attach_status'] = "detached" + db.volume_update(self.context, volume.id, + {'status': 'in-use', 'attach_status': 'detached'}) # Should raise an error since not attached self.assertRaises(exception.InvalidVolume, volume_api.begin_detaching, self.context, volume) - volume_get.assert_called_once_with(self.context, volume['id']) - volume_get.reset_mock() - volume['attach_status'] = "attached" + db.volume_update(self.context, volume.id, + {'attach_status': 'attached'}) # Ensure when attached no exception raised volume_api.begin_detaching(self.context, volume) - volume_get.assert_called_once_with(self.context, volume['id']) - volume_get.reset_mock() - volume['status'] = "maintenance" + volume_api.update(self.context, volume, {'status': 'maintenance'}) self.assertRaises(exception.InvalidVolume, volume_api.begin_detaching, self.context, volume) - volume_get.assert_called_once_with(self.context, volume['id']) + db.volume_destroy(self.context, volume.id) def test_begin_roll_detaching_volume(self): """Test begin_detaching and roll_detaching functions.""" @@ -3848,14 +3817,14 @@ class VolumeTestCase(BaseVolumeTestCase): attachment = db.volume_attach(self.context, {'volume_id': volume['id'], 'attached_host': 'fake-host'}) - volume = db.volume_attached( - self.context, attachment['id'], instance_uuid, 'fake-host', 'vdb') + db.volume_attached(self.context, attachment['id'], instance_uuid, + 'fake-host', 'vdb') volume_api = cinder.volume.api.API() volume_api.begin_detaching(self.context, volume) - volume = db.volume_get(self.context, volume['id']) + volume = volume_api.get(self.context, volume['id']) self.assertEqual("detaching", volume['status']) volume_api.roll_detaching(self.context, volume) - volume = db.volume_get(self.context, volume['id']) + volume = volume_api.get(self.context, volume['id']) self.assertEqual("in-use", volume['status']) def test_volume_api_update(self): diff --git a/cinder/utils.py b/cinder/utils.py index c8016ca84..dfb14fe8a 100644 --- a/cinder/utils.py +++ b/cinder/utils.py @@ -972,3 +972,26 @@ def resolve_hostname(hostname): LOG.debug('Asked to resolve hostname %(host)s and got IP %(ip)s.', {'host': hostname, 'ip': sockaddr[0]}) return sockaddr[0] + + +def build_or_str(elements, str_format=None): + """Builds a string of elements joined by 'or'. + + Will join strings with the 'or' word and if a str_format is provided it + will be used to format the resulted joined string. + If there are no elements an empty string will be returned. + + :param elements: Elements we want to join. + :type elements: String or iterable of strings. + :param str_format: String to use to format the response. + :type str_format: String. + """ + if not elements: + return '' + + if not isinstance(elements, six.string_types): + elements = _(' or ').join(elements) + + if str_format: + return str_format % elements + return elements diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 830febb3f..8f7e62cac 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -30,6 +30,7 @@ import six from cinder.api import common from cinder import context +from cinder import db from cinder.db import base from cinder import exception from cinder import flow_utils @@ -142,6 +143,8 @@ def valid_replication_volume(func): class API(base.Base): """API for interacting with the volume manager.""" + AVAILABLE_MIGRATION_STATUS = (None, 'deleting', 'error', 'success') + def __init__(self, db_driver=None, image_service=None): self.image_service = (image_service or glance.get_default_image_service()) @@ -221,8 +224,8 @@ class API(base.Base): # fails to delete after a migration. # All of the statuses above means the volume is not in the process # of a migration. - return volume['migration_status'] not in (None, 'deleting', - 'error', 'success') + return (volume['migration_status'] not in + self.AVAILABLE_MIGRATION_STATUS) def create(self, context, size, name, description, snapshot=None, image_id=None, volume_type=None, metadata=None, @@ -574,68 +577,57 @@ class API(base.Base): @wrap_check_policy def reserve_volume(self, context, volume): - # NOTE(jdg): check for Race condition bug 1096983 - # explicitly get updated ref and check - volume = self.db.volume_get(context, volume['id']) - if volume['status'] == 'available': - self.update(context, volume, {"status": "attaching"}) - elif volume['status'] == 'in-use': - if volume['multiattach']: - self.update(context, volume, {"status": "attaching"}) - else: - msg = _("Volume must be multiattachable to reserve again.") - LOG.error(msg) - raise exception.InvalidVolume(reason=msg) - else: - msg = _("Volume status must be available to reserve.") + expected = {'multiattach': volume.multiattach, + 'status': (('available', 'in-use') if volume.multiattach + else 'available')} + + result = volume.conditional_update({'status': 'attaching'}, expected) + + if not result: + expected_status = utils.build_or_str(expected['status']) + msg = _('Volume status must be %s to reserve.') % expected_status LOG.error(msg) raise exception.InvalidVolume(reason=msg) + LOG.info(_LI("Reserve volume completed successfully."), resource=volume) @wrap_check_policy def unreserve_volume(self, context, volume): - volume = self.db.volume_get(context, volume['id']) - if volume['status'] == 'attaching': - attaches = self.db.volume_attachment_get_used_by_volume_id( - context, volume['id']) - if attaches: - self.update(context, volume, {"status": "in-use"}) - else: - self.update(context, volume, {"status": "available"}) + expected = {'status': 'attaching'} + # Status change depends on whether it has attachments (in-use) or not + # (available) + value = {'status': db.Case([(db.volume_has_attachments_filter(), + 'in-use')], + else_='available')} + volume.conditional_update(value, expected) LOG.info(_LI("Unreserve volume completed successfully."), resource=volume) @wrap_check_policy def begin_detaching(self, context, volume): - # NOTE(vbala): The volume status might be 'detaching' already due to - # a previous begin_detaching call. Get updated volume status so that - # we fail such cases. - volume = self.db.volume_get(context, volume['id']) - # If we are in the middle of a volume migration, we don't want the user - # to see that the volume is 'detaching'. Having 'migration_status' set - # will have the same effect internally. - if self._is_volume_migrating(volume): - return - - if (volume['status'] != 'in-use' or - volume['attach_status'] != 'attached'): - msg = (_("Unable to detach volume. Volume status must be 'in-use' " - "and attach_status must be 'attached' to detach. " - "Currently: status: '%(status)s', " - "attach_status: '%(attach_status)s.'") % - {'status': volume['status'], - 'attach_status': volume['attach_status']}) + # If we are in the middle of a volume migration, we don't want the + # user to see that the volume is 'detaching'. Having + # 'migration_status' set will have the same effect internally. + expected = {'status': 'in-use', + 'attach_status': 'attached', + 'migration_status': self.AVAILABLE_MIGRATION_STATUS} + + result = volume.conditional_update({'status': 'detaching'}, expected) + + if not (result or self._is_volume_migrating(volume)): + msg = _("Unable to detach volume. Volume status must be 'in-use' " + "and attach_status must be 'attached' to detach.") LOG.error(msg) raise exception.InvalidVolume(reason=msg) - self.update(context, volume, {"status": "detaching"}) + LOG.info(_LI("Begin detaching volume completed successfully."), resource=volume) @wrap_check_policy def roll_detaching(self, context, volume): - if volume['status'] == "detaching": - self.update(context, volume, {"status": "in-use"}) + volume.conditional_update({'status': 'in-use'}, + {'status': 'detaching'}) LOG.info(_LI("Roll detaching of volume completed successfully."), resource=volume) @@ -647,15 +639,13 @@ class API(base.Base): 'because it is in maintenance.'), resource=volume) msg = _("The volume cannot be attached in maintenance mode.") raise exception.InvalidVolume(reason=msg) - volume_metadata = self.get_volume_admin_metadata(context.elevated(), - volume) - if 'readonly' not in volume_metadata: - # NOTE(zhiyan): set a default value for read-only flag to metadata. - self.update_volume_admin_metadata(context.elevated(), volume, - {'readonly': 'False'}) - volume_metadata['readonly'] = 'False' - - if volume_metadata['readonly'] == 'True' and mode != 'ro': + + # We add readonly metadata if it doesn't already exist + readonly = self.update_volume_admin_metadata(context.elevated(), + volume, + {'readonly': 'False'}, + update=False)['readonly'] + if readonly == 'True' and mode != 'ro': raise exception.InvalidVolumeAttachMode(mode=mode, volume_id=volume['id']) @@ -1069,7 +1059,7 @@ class API(base.Base): @wrap_check_policy def update_volume_admin_metadata(self, context, volume, metadata, - delete=False): + delete=False, add=True, update=True): """Updates or creates volume administration metadata. If delete is True, metadata items that are not specified in the @@ -1078,7 +1068,8 @@ class API(base.Base): """ self._check_metadata_properties(metadata) db_meta = self.db.volume_admin_metadata_update(context, volume['id'], - metadata, delete) + metadata, delete, add, + update) # TODO(jdg): Implement an RPC call for drivers that may use this info