]> review.fuel-infra Code Review - openstack-build/cinder-build.git/commitdiff
Brick: Fix race in removing iSCSI device
authorWalter A. Boring IV <walter.boring@hp.com>
Fri, 27 Mar 2015 22:34:40 +0000 (15:34 -0700)
committerWalter A. Boring IV <walter.boring@hp.com>
Mon, 30 Mar 2015 16:26:57 +0000 (09:26 -0700)
Some folks are seeing race conditions on overloaded
or slow machines, where the time between issuing the
remove volume command, and the volume being from
/dev/disk/by-path by the kernel isn't long enough.

This patch adds a simple verify loop for iSCSI only.
It will try 3 times and wait 2 seconds between tries.

Also, This patch will only fix attaches that happen
in cinder for copy volume <--> image operations.
Nova needs a separate patch to fix any potential
issues there.

Change-Id: I5db9744da6b865b7d903503db8a895b409f32f5c
Closes-Bug: 1437441

cinder/brick/exception.py
cinder/brick/initiator/connector.py
cinder/brick/initiator/linuxscsi.py
cinder/tests/brick/test_brick_linuxscsi.py

index 3078b7ece229fb5c8419dc4edc5f5ebfba90c942..0283c15671761d5570effc882ef0a4a282ec8c65 100644 (file)
@@ -97,6 +97,10 @@ class VolumeDeviceNotFound(BrickException):
     message = _("Volume device not found at %(device)s.")
 
 
+class VolumePathNotRemoved(BrickException):
+    message = _("Volume path %(volume_path)s was not removed in time.")
+
+
 class VolumeGroupNotFound(BrickException):
     message = _('Unable to find Volume Group: %(vg_name)s')
 
index ba469d622532f2682fac131d87cb9b7aec8d1c51..6c7b564641f4dca16f3f08e2b0a22f77a021e928 100644 (file)
@@ -402,6 +402,9 @@ class ISCSIConnector(InitiatorConnector):
         dev_name = self._linuxscsi.get_name_from_path(host_device)
         if dev_name:
             self._linuxscsi.remove_scsi_device(dev_name)
+            # make sure the volume is gone before moving on
+            # Bug #1437441 - Race removing iscsi device
+            self._linuxscsi.wait_for_volume_removal(host_device)
 
         # NOTE(vish): Only disconnect from the target if no luns from the
         #             target are in use.
index 9fae5a5ff594e7d8a269de5557f8018464109d6a..aa1272c083ac960926fcbd78c3ed7d46ea37dcb5 100644 (file)
@@ -22,8 +22,10 @@ import re
 from oslo_concurrency import processutils as putils
 from oslo_log import log as logging
 
+from cinder.brick import exception
 from cinder.brick import executor
-from cinder.i18n import _, _LW
+from cinder.i18n import _, _LW, _LE
+from cinder.openstack.common import loopingcall
 
 LOG = logging.getLogger(__name__)
 
@@ -65,6 +67,36 @@ class LinuxSCSI(executor.Executor):
             LOG.debug("Remove SCSI device(%s) with %s" % (device, path))
             self.echo_scsi_command(path, "1")
 
+    def wait_for_volume_removal(self, volume_path):
+        """This is used to ensure that volumes are gone."""
+
+        def _wait_for_volume_removal(volume_path):
+            LOG.debug("Waiting for SCSI mount point %s to be removed.",
+                      volume_path)
+            if os.path.exists(volume_path):
+                if self.tries >= self.scan_attempts:
+                    msg = _LE("Exceeded the number of attempts to detect "
+                              "volume removal.")
+                    LOG.error(msg)
+                    raise exception.VolumePathNotRemoved(
+                        volume_path=volume_path)
+
+                LOG.debug("%(path)s still exists, rescanning. Try number: "
+                          "%(tries)s",
+                          {'path': volume_path, 'tries': self.tries})
+                self.tries = self.tries + 1
+            else:
+                LOG.debug("SCSI mount point %s has been removed.", volume_path)
+                raise loopingcall.LoopingCallDone()
+
+        # Setup a loop here to give the kernel time
+        # to remove the volume from /dev/disk/by-path/
+        self.tries = 0
+        self.scan_attempts = 3
+        timer = loopingcall.FixedIntervalLoopingCall(
+            _wait_for_volume_removal, volume_path)
+        timer.start(interval=2).wait()
+
     def get_device_info(self, device):
         (out, _err) = self._execute('sg_scan', device, run_as_root=True,
                                     root_helper=self._root_helper)
index ed08d7fe763ad8b89ee22cd430b648be2755f848..cac64b2e84cdf6d2cd6d95734a166a3e1c4e70dc 100644 (file)
@@ -17,6 +17,7 @@ import string
 
 from oslo_log import log as logging
 
+from cinder.brick import exception
 from cinder.brick.initiator import linuxscsi
 from cinder import test
 
@@ -62,6 +63,18 @@ class LinuxSCSITestCase(test.TestCase):
             ('tee -a /sys/block/sdc/device/delete')]
         self.assertEqual(expected_commands, self.cmds)
 
+    def test_wait_for_volume_removal(self):
+        fake_path = '/dev/disk/by-path/fake-iscsi-iqn-lun-0'
+        self.stubs.Set(os.path, "exists", lambda x: True)
+        self.assertRaises(exception.VolumePathNotRemoved,
+                          self.linuxscsi.wait_for_volume_removal,
+                          fake_path)
+
+        self.stubs.Set(os.path, "exists", lambda x: False)
+        self.linuxscsi.wait_for_volume_removal(fake_path)
+        expected_commands = []
+        self.assertEqual(expected_commands, self.cmds)
+
     def test_flush_multipath_device(self):
         self.linuxscsi.flush_multipath_device('/dev/dm-9')
         expected_commands = [('multipath -f /dev/dm-9')]