From ad02f8208b76b27513ca8a8c21b255d98080d8b3 Mon Sep 17 00:00:00 2001 From: Michal Dulko Date: Thu, 12 Feb 2015 09:39:08 +0100 Subject: [PATCH] Fix allocated_capacity tracking when rescheduling This aims to fix capacity tracking in the manager in case of rescheduling. Idea is to inject information if volume was rescheduled into the exception passed up so manager can decide if incrementing allocated_capacity is needed. Then in the manager finally block is used to make sure that allocated_capacity is incremented in case of failure that haven't triggered rescheduling. Also adding two unit tests checking if tracking is correct. Closes-Bug: 1408763 Change-Id: Icd6b04ac60c17cbda225d3be8bcc4435ea0cd7a8 --- cinder/tests/test_volume.py | 43 +++++++++++++++++++- cinder/volume/flows/manager/create_volume.py | 12 +++++- cinder/volume/manager.py | 32 +++++++++++---- 3 files changed, 76 insertions(+), 11 deletions(-) diff --git a/cinder/tests/test_volume.py b/cinder/tests/test_volume.py index fa21ee746..1989bd224 100644 --- a/cinder/tests/test_volume.py +++ b/cinder/tests/test_volume.py @@ -356,6 +356,25 @@ class VolumeTestCase(BaseVolumeTestCase): self.assertEqual(volume.status, "error") db.volume_destroy(context.get_admin_context(), volume_id) + def test_create_driver_not_initialized_rescheduling(self): + self.volume.driver._initialized = False + + volume = tests_utils.create_volume( + self.context, + availability_zone=CONF.storage_availability_zone, + **self.volume_params) + + volume_id = volume['id'] + self.assertRaises(exception.DriverNotInitialized, + self.volume.create_volume, + self.context, volume_id, + {'volume_properties': self.volume_params}) + # NOTE(dulek): Volume should be rescheduled as we passed request_spec, + # assert that it wasn't counted in allocated_capacity tracking. + self.assertEqual(self.volume.stats['pools'], {}) + + db.volume_destroy(context.get_admin_context(), volume_id) + @mock.patch.object(QUOTAS, 'rollback') @mock.patch.object(QUOTAS, 'commit') @mock.patch.object(QUOTAS, 'reserve') @@ -2598,14 +2617,17 @@ class VolumeTestCase(BaseVolumeTestCase): **self.volume_params)['id'] # creating volume testdata try: + request_spec = {'volume_properties': self.volume_params} self.volume.create_volume(self.context, volume_id, + request_spec, image_id=image_id) finally: # cleanup os.unlink(dst_path) volume = db.volume_get(self.context, volume_id) - return volume + + return volume def test_create_volume_from_image_cloned_status_available(self): """Test create volume from image via cloning. @@ -2661,6 +2683,25 @@ class VolumeTestCase(BaseVolumeTestCase): db.volume_destroy(self.context, volume_id) os.unlink(dst_path) + def test_create_volume_from_image_copy_exception_rescheduling(self): + """Test create volume with ImageCopyFailure + + This exception should not trigger rescheduling and allocated_capacity + should be incremented so we're having assert for that here. + """ + def fake_copy_image_to_volume(context, volume, image_service, + image_id): + raise exception.ImageCopyFailure() + + self.stubs.Set(self.volume.driver, 'copy_image_to_volume', + fake_copy_image_to_volume) + self.assertRaises(exception.ImageCopyFailure, + self._create_volume_from_image) + # NOTE(dulek): Rescheduling should not occur, so lets assert that + # allocated_capacity is incremented. + self.assertDictEqual(self.volume.stats['pools'], + {'_pool0': {'allocated_capacity_gb': 1}}) + def test_create_volume_from_exact_sized_image(self): """Verify that an image which is exactly the same size as the volume, will work correctly. diff --git a/cinder/volume/flows/manager/create_volume.py b/cinder/volume/flows/manager/create_volume.py index 6db79057d..59b1fceb6 100644 --- a/cinder/volume/flows/manager/create_volume.py +++ b/cinder/volume/flows/manager/create_volume.py @@ -134,6 +134,7 @@ class OnFailureRescheduleTask(flow_utils.CinderTask): update = { 'status': 'creating', 'scheduled_at': timeutils.utcnow(), + 'host': None } LOG.debug("Updating volume %(volume_id)s with %(update)s." % {'update': update, 'volume_id': volume_id}) @@ -145,6 +146,13 @@ class OnFailureRescheduleTask(flow_utils.CinderTask): volume_id) def revert(self, context, result, flow_failures, **kwargs): + # NOTE(dulek): Revert is occurring and manager need to know if + # rescheduling happened. We're injecting this information into + # exception that will be caught there. This is ugly and we need + # TaskFlow to support better way of returning data from reverted flow. + cause = list(flow_failures.values())[0] + cause.exception.rescheduled = False + # Check if we have a cause which can tell us not to reschedule. for failure in flow_failures.values(): if failure.check(*self.no_reschedule_types): @@ -155,10 +163,11 @@ class OnFailureRescheduleTask(flow_utils.CinderTask): if self.reschedule_context: context = self.reschedule_context try: - cause = list(flow_failures.values())[0] self._pre_reschedule(context, volume_id) self._reschedule(context, cause, **kwargs) self._post_reschedule(context, volume_id) + # Inject information that we rescheduled + cause.exception.rescheduled = True except exception.CinderException: LOG.exception(_LE("Volume %s: rescheduling failed"), volume_id) @@ -180,7 +189,6 @@ class ExtractVolumeRefTask(flow_utils.CinderTask): # In the future we might want to have a lock on the volume_id so that # the volume can not be deleted while its still being created? volume_ref = self.db.volume_get(context, volume_id) - return volume_ref def revert(self, context, volume_id, result, **kwargs): diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index e009f71f1..8c230c9ff 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -47,6 +47,7 @@ from oslo_utils import importutils from oslo_utils import timeutils from oslo_utils import uuidutils from osprofiler import profiler +from taskflow import exceptions as tfe from cinder import compute from cinder import context @@ -420,15 +421,30 @@ class VolumeManager(manager.SchedulerDependentManager): def _run_flow_locked(): _run_flow() - if locked_action is None: - _run_flow() - else: - _run_flow_locked() + # NOTE(dulek): Flag to indicate if volume was rescheduled. Used to + # decide if allocated_capacity should be incremented. + rescheduled = False - # Fetch created volume from storage - vol_ref = flow_engine.storage.fetch('volume') - # Update volume stats - self._update_allocated_capacity(vol_ref) + try: + if locked_action is None: + _run_flow() + else: + _run_flow_locked() + except exception.CinderException as e: + if hasattr(e, 'rescheduled'): + rescheduled = e.rescheduled + raise + finally: + try: + vol_ref = flow_engine.storage.fetch('volume_ref') + except tfe.NotFound as e: + # Flow was reverted, fetching volume_ref from the DB. + vol_ref = self.db.volume_get(context, volume_id) + + if not rescheduled: + # NOTE(dulek): Volume wasn't rescheduled so we need to update + # volume stats as these are decremented on delete. + self._update_allocated_capacity(vol_ref) return vol_ref['id'] -- 2.45.2