]> review.fuel-infra Code Review - openstack-build/cinder-build.git/commitdiff
Fix race condition in RemoteFS create_snapshot_online
authorJordan Pittier <jordan.pittier@scality.com>
Wed, 17 Feb 2016 18:05:49 +0000 (19:05 +0100)
committerJordan Pittier <jordan.pittier@scality.com>
Fri, 19 Feb 2016 09:49:56 +0000 (10:49 +0100)
There's a `while True` loop in create_snapshot_online and each
iteration can make the `cinder-volume` process sleep up to 10sec. In
the mean time, if Nova fails to create the snapshot, the snapshot
status could turn to "error" and someone could want to delete the
erroneous snapshot, which would make its status to be "deleting".

In that case the `while True` loop would never exit. Cinder consumes
100% of CPU and keeps logging "Status of snapshot XX is now deleting".

The patch fix this issue by exiting the `while True` loop if we detect
that the snapshot is to be deleted.

Closes-Bug: #1538496

Change-Id: I5de0e8479a552ce101cecd06a874a170e54d5c18

cinder/exception.py
cinder/tests/unit/test_remotefs.py
cinder/volume/drivers/remotefs.py

index 88b3c3da94fc16396861ae1c1b338be7240d3812..ffdc447ba6939b5963e9ae294ba0d4291625b7ce 100644 (file)
@@ -767,6 +767,11 @@ class RemoteFSException(VolumeDriverException):
     message = _("Unknown RemoteFS exception")
 
 
+class RemoteFSConcurrentRequest(RemoteFSException):
+    message = _("A concurrent, possibly contradictory, request "
+                "has been made.")
+
+
 class RemoteFSNoSharesMounted(RemoteFSException):
     message = _("No mounted shares found")
 
index df077b87ec638e653fc8cdc35b9710e82cba60be..82c985bada46d69d6b778527f72aeb94dd81f6ed 100644 (file)
@@ -40,6 +40,7 @@ class RemoteFsSnapDriverTestCase(test.TestCase):
     _FAKE_SNAPSHOT = {'context': _FAKE_CONTEXT,
                       'id': _FAKE_SNAPSHOT_ID,
                       'volume': _FAKE_VOLUME,
+                      'volume_id': _FAKE_VOLUME_ID,
                       'status': 'available',
                       'volume_size': 1}
     _FAKE_SNAPSHOT_PATH = (_FAKE_VOLUME_PATH + '.' + _FAKE_SNAPSHOT_ID)
@@ -271,6 +272,38 @@ class RemoteFsSnapDriverTestCase(test.TestCase):
                           self._driver._create_snapshot,
                           fake_snapshot)
 
+    @mock.patch('cinder.db.snapshot_get')
+    @mock.patch('time.sleep')
+    def test_create_snapshot_online_with_concurrent_delete(
+            self, mock_sleep, mock_snapshot_get):
+        self._driver._nova = mock.Mock()
+
+        # Test what happens when progress is so slow that someone
+        # decides to delete the snapshot while the last known status is
+        # "creating".
+        mock_snapshot_get.side_effect = [
+            {'status': 'creating', 'progress': '42%'},
+            {'status': 'creating', 'progress': '45%'},
+            {'status': 'deleting'},
+        ]
+
+        with mock.patch.object(self._driver, '_do_create_snapshot') as \
+                mock_do_create_snapshot:
+            self.assertRaises(exception.RemoteFSConcurrentRequest,
+                              self._driver._create_snapshot_online,
+                              self._FAKE_SNAPSHOT,
+                              self._FAKE_VOLUME_NAME,
+                              self._FAKE_SNAPSHOT_PATH)
+
+        mock_do_create_snapshot.assert_called_once_with(
+            self._FAKE_SNAPSHOT, self._FAKE_VOLUME_NAME,
+            self._FAKE_SNAPSHOT_PATH)
+        self.assertEqual([mock.call(1), mock.call(1)],
+                         mock_sleep.call_args_list)
+        self.assertEqual(3, mock_snapshot_get.call_count)
+        mock_snapshot_get.assert_called_with(self._FAKE_SNAPSHOT['context'],
+                                             self._FAKE_SNAPSHOT['id'])
+
     @mock.patch.object(utils, 'synchronized')
     def _locked_volume_operation_test_helper(self, mock_synchronized, func,
                                              expected_exception=False,
index 24a403c2ac2cfa721efc33718ac8f883480a57cd..f8b4870fed333c4f102d1d5a9087b5d6ae6ce006 100644 (file)
@@ -1287,6 +1287,10 @@ class RemoteFSSnapDriver(RemoteFSDriver, driver.SnapshotVD):
         while True:
             s = db.snapshot_get(context, snapshot['id'])
 
+            LOG.debug('Status of snapshot %(id)s is now %(status)s',
+                      {'id': snapshot['id'],
+                       'status': s['status']})
+
             if s['status'] == 'creating':
                 if s['progress'] == '90%':
                     # Nova tasks completed successfully
@@ -1300,9 +1304,12 @@ class RemoteFSSnapDriver(RemoteFSDriver, driver.SnapshotVD):
                         'while creating snapshot.')
                 raise exception.RemoteFSException(msg)
 
-            LOG.debug('Status of snapshot %(id)s is now %(status)s',
-                      {'id': snapshot['id'],
-                       'status': s['status']})
+            elif s['status'] == 'deleting' or s['status'] == 'error_deleting':
+                msg = _('Snapshot %(id)s has been asked to be deleted while '
+                        'waiting for it to become available. Perhaps a '
+                        'concurrent request was made.') % {'id':
+                                                           snapshot['id']}
+                raise exception.RemoteFSConcurrentRequest(msg)
 
             if 10 < seconds_elapsed <= 20:
                 increment = 2