]> review.fuel-infra Code Review - openstack-build/cinder-build.git/commitdiff
Enhance VNX Cinder volume creation logic
authorpeter_wang <peter.wang13@emc.com>
Wed, 25 Mar 2015 06:35:32 +0000 (02:35 -0400)
committerpeter_wang <peter.wang13@emc.com>
Thu, 26 Mar 2015 23:53:10 +0000 (19:53 -0400)
Sometimes sysadmin needs to remove broken disks and replace them
with some new disks. Before the disks are replaced and fully rebuilt,
the pool would be in "Faulted" state, and all LUNs created in this
pool would be in "Faulted" state as well.
A LUN in this "Faulted" state is still OK for IO access.

Current implementation of create_volume in VNX Cinder Driver will
wait until the VNX LUN gets into Ready State or timeout.
Thus the new volume in the back end corresponding to the pool will
be stuck at creating state for a long time before disks are replaced.
This reduces the high availability of the back end.

The change allows new volume to get into available state as long as
the corresponding VNX LUN can serve IO.

Change-Id: Ifb152b3b9c59ce0d5c875129a7bf0503740d5a3a
Closes-Bug: 1436241

cinder/tests/test_emc_vnxdirect.py
cinder/volume/drivers/emc/emc_vnx_cli.py

index 424d8f398c60cd73234a0515df375282a37d1f9e..e1bd14a1393016f9df59844f03c50548719b7e15 100644 (file)
@@ -372,7 +372,7 @@ class EMCVNXCLIDriverTestData():
 
     test_lun_id = 1
     test_existing_ref = {'id': test_lun_id}
-    test_pool_name = 'Pool_02_SASFLASH'
+    test_pool_name = 'unit_test_pool'
     device_map = {
         '1122334455667788': {
             'initiator_port_wwn_list': ['123456789012345', '123456789054321'],
@@ -777,32 +777,37 @@ Available Capacity (GBs):  3257.851
         "Port Status:         Online\n" +
         "Switch Present:      NO\n", 0)
 
-    def LUN_PROPERTY(self, name, isThin=False, hasSnap=False, size=1):
-        return """\
+    def LUN_PROPERTY(self, name, is_thin=False, has_snap=False, size=1,
+                     state='Ready', faulted='false', operation='None'):
+        return ("""
                LOGICAL UNIT NUMBER 1
-               Name:  %s
+               Name:  %(name)s
                UID:  60:06:01:60:09:20:32:00:13:DF:B4:EF:C2:63:E3:11
                Current Owner:  SP A
                Default Owner:  SP A
                Allocation Owner:  SP A
-               Attached Snapshot: %s
+               Attached Snapshot: %(has_snap)s
                User Capacity (Blocks):  2101346304
-               User Capacity (GBs):  %d
+               User Capacity (GBs):  %(size)d
                Consumed Capacity (Blocks):  2149576704
                Consumed Capacity (GBs):  1024.998
-               Pool Name:  Pool_02_SASFLASH
-               Current State:  Ready
+               Pool Name:  unit_test_pool
+               Current State:  %(state)s
                Status:  OK(0x0)
-               Is Faulted:  false
+               Is Faulted:  %(faulted)s
                Is Transitioning:  false
-               Current Operation:  None
+               Current Operation:  %(operation)s
                Current Operation State:  N/A
                Current Operation Status:  N/A
                Current Operation Percent Completed:  0
-               Is Thin LUN:  %s""" % (name,
-                                      'FakeSnap' if hasSnap else 'N/A',
-                                      size,
-                                      'Yes' if isThin else 'No'), 0
+               Is Thin LUN:  %(is_thin)s""" % {
+            'name': name,
+            'has_snap': 'FakeSnap' if has_snap else 'N/A',
+            'size': size,
+            'state': state,
+            'faulted': faulted,
+            'operation': operation,
+            'is_thin': 'Yes' if is_thin else 'No'}, 0)
 
     def STORAGE_GROUP_NO_MAP(self, sgname):
         return ("""\
@@ -1805,6 +1810,57 @@ Time Remaining:  0 second(s)
             'failed_vol1', 1, 'unit_test_pool', None, None, False))]
         fake_cli.assert_has_calls(expect_cmd)
 
+    @mock.patch('cinder.openstack.common.loopingcall.FixedIntervalLoopingCall',
+                new=utils.ZeroIntervalLoopingCall)
+    def test_create_faulted_volume(self):
+        volume_name = 'faulted_volume'
+        cmd_create = self.testData.LUN_CREATION_CMD(
+            volume_name, 1, 'unit_test_pool', None, None, False)
+        cmd_list_preparing = self.testData.LUN_PROPERTY_ALL_CMD(volume_name)
+        commands = [cmd_create, cmd_list_preparing]
+        results = [SUCCEED,
+                   [self.testData.LUN_PROPERTY(name=volume_name,
+                                               state='Faulted',
+                                               faulted='true',
+                                               operation='Preparing'),
+                    self.testData.LUN_PROPERTY(name=volume_name,
+                                               state='Faulted',
+                                               faulted='true',
+                                               operation='None')]]
+        fake_cli = self.driverSetup(commands, results)
+        faulted_volume = self.testData.test_volume.copy()
+        faulted_volume.update({'name': volume_name})
+        self.driver.create_volume(faulted_volume)
+        expect_cmd = [
+            mock.call(*self.testData.LUN_CREATION_CMD(
+                volume_name, 1, 'unit_test_pool', None, None, False)),
+            mock.call(*self.testData.LUN_PROPERTY_ALL_CMD(volume_name),
+                      poll=False),
+            mock.call(*self.testData.LUN_PROPERTY_ALL_CMD(volume_name),
+                      poll=False)]
+        fake_cli.assert_has_calls(expect_cmd)
+
+    @mock.patch('cinder.openstack.common.loopingcall.FixedIntervalLoopingCall',
+                new=utils.ZeroIntervalLoopingCall)
+    def test_create_offline_volume(self):
+        volume_name = 'offline_volume'
+        cmd_create = self.testData.LUN_CREATION_CMD(
+            volume_name, 1, 'unit_test_pool', None, None, False)
+        cmd_list = self.testData.LUN_PROPERTY_ALL_CMD(volume_name)
+        commands = [cmd_create, cmd_list]
+        results = [SUCCEED,
+                   self.testData.LUN_PROPERTY(name=volume_name,
+                                              state='Offline',
+                                              faulted='true')]
+        self.driverSetup(commands, results)
+        offline_volume = self.testData.test_volume.copy()
+        offline_volume.update({'name': volume_name})
+        self.assertRaisesRegexp(exception.VolumeBackendAPIException,
+                                "Volume %s was created in VNX, but in"
+                                " Offline state." % volume_name,
+                                self.driver.create_volume,
+                                offline_volume)
+
     def test_create_volume_snapshot_failed(self):
         commands = [self.testData.SNAP_CREATE_CMD('failed_snapshot')]
         results = [FAKE_ERROR_RETURN]
@@ -2686,7 +2742,7 @@ Time Remaining:  0 second(s)
         "get_volume_type_extra_specs",
         mock.Mock(return_value={'fast_cache_enabled': 'True'}))
     def test_create_volume_with_fastcache(self):
-        '''enable fastcache when creating volume.'''
+        """Enable fastcache when creating volume."""
         commands = [self.testData.NDU_LIST_CMD,
                     self.testData.POOL_PROPERTY_W_FASTCACHE_CMD,
                     self.testData.LUN_PROPERTY_ALL_CMD('vol_with_type'),
@@ -2733,14 +2789,15 @@ Time Remaining:  0 second(s)
             mock.call('connection', '-getport', '-address', '-vlanid',
                       poll=False),
             mock.call('-np', 'lun', '-create', '-capacity',
-                      1, '-sq', 'gb', '-poolName', 'Pool_02_SASFLASH',
+                      1, '-sq', 'gb', '-poolName',
+                      self.testData.test_pool_name,
                       '-name', 'vol_with_type', '-type', 'NonThin')
         ]
 
         fake_cli.assert_has_calls(expect_cmd)
 
     def test_get_lun_id_provider_location_exists(self):
-        '''test function get_lun_id.'''
+        """Test function get_lun_id."""
         self.driverSetup()
         volume_01 = {
             'name': 'vol_01',
@@ -2761,7 +2818,7 @@ Time Remaining:  0 second(s)
         "get_lun_by_name",
         mock.Mock(return_value={'lun_id': 2}))
     def test_get_lun_id_provider_location_has_no_lun_id(self):
-        '''test function get_lun_id.'''
+        """Test function get_lun_id."""
         self.driverSetup()
         volume_02 = {
             'name': 'vol_02',
@@ -3213,7 +3270,7 @@ class EMCVNXCLIDArrayBasedDriverTestCase(DriverTestCaseBase):
         results = [self.testData.LUN_PROPERTY(testVolume['name'], False)]
         fake_cli = self.driverSetup(commands, results)
         pool = self.driver.get_pool(testVolume)
-        self.assertEqual('Pool_02_SASFLASH', pool)
+        self.assertEqual('unit_test_pool', pool)
         fake_cli.assert_has_calls(
             [mock.call(*self.testData.LUN_PROPERTY_POOL_CMD(
                 testVolume['name']), poll=False)])
@@ -3235,7 +3292,7 @@ class EMCVNXCLIDArrayBasedDriverTestCase(DriverTestCaseBase):
         fake_cli = self.driverSetup(commands, results)
         pool = self.driver.cli.get_target_storagepool(testNewVolume,
                                                       testSrcVolume)
-        self.assertEqual('Pool_02_SASFLASH', pool)
+        self.assertEqual('unit_test_pool', pool)
         fake_cli.assert_has_calls(
             [mock.call(*self.testData.LUN_PROPERTY_POOL_CMD(
                 testSrcVolume['name']), poll=False)])
@@ -3249,7 +3306,7 @@ class EMCVNXCLIDArrayBasedDriverTestCase(DriverTestCaseBase):
         results = [self.testData.LUN_PROPERTY('lun_name', size=test_size)]
         fake_cli = self.driverSetup(commands, results)
         test_volume = self.testData.test_volume2.copy()
-        test_volume['host'] = "host@backendsec#Pool_02_SASFLASH"
+        test_volume['host'] = "host@backendsec#unit_test_pool"
         get_size = self.driver.manage_existing_get_size(
             test_volume,
             self.testData.test_existing_ref)
@@ -3271,7 +3328,7 @@ class EMCVNXCLIDArrayBasedDriverTestCase(DriverTestCaseBase):
         ex = self.assertRaises(
             exception.ManageExistingInvalidReference,
             self.driver.manage_existing_get_size,
-            self.testData.test_volume_with_type,
+            test_volume,
             self.testData.test_existing_ref)
         self.assertTrue(
             re.match(r'.*not managed by the host',
index 75fce1483310c24664c0a66fff588bd6d9d739b6..afd7d4c63d44134ced512714d16708fc1c98f09d 100644 (file)
@@ -413,12 +413,26 @@ class CommandLineHelper(object):
             else:
                 self._raise_cli_error(cmd, rc, out)
 
+        def _lun_state_validation(lun_data):
+            lun_state = lun_data[self.LUN_STATE.key]
+            if lun_state == 'Initializing':
+                return False
+            # Lun in Ready or Faulted state is eligible for IO access,
+            # so if no lun operation, return success.
+            elif lun_state in ['Ready', 'Faulted']:
+                return lun_data[self.LUN_OPERATION.key] == 'None'
+            # Raise exception if lun state is Offline, Invalid, Destroying
+            # or other unexpected states.
+            else:
+                msg = _("Volume %(lun_name)s was created in VNX, but in"
+                        " %(lun_state)s state."
+                        ) % {'lun_name': lun_data[self.LUN_NAME.key],
+                             'lun_state': lun_state}
+                raise exception.VolumeBackendAPIException(data=msg)
+
         def lun_is_ready():
             try:
                 data = self.get_lun_by_name(name, self.LUN_ALL, False)
-                return (data[self.LUN_STATE.key] == 'Ready' and
-                        data[self.LUN_STATUS.key] == 'OK(0x0)' and
-                        data[self.LUN_OPERATION.key] == 'None')
             except exception.EMCVnxCLICmdError as ex:
                 orig_out = "\n".join(ex.kwargs["out"])
                 if orig_out.find(
@@ -426,10 +440,13 @@ class CommandLineHelper(object):
                     return False
                 else:
                     raise ex
+            return _lun_state_validation(data)
 
         self._wait_for_a_condition(lun_is_ready,
                                    None,
-                                   INTERVAL_5_SEC)
+                                   INTERVAL_5_SEC,
+                                   lambda ex:
+                                   isinstance(ex, exception.EMCVnxCLICmdError))
         lun = self.get_lun_by_name(name, self.LUN_ALL, False)
         return lun
 
@@ -474,26 +491,29 @@ class CommandLineHelper(object):
         return hlus
 
     def _wait_for_a_condition(self, testmethod, timeout=None,
-                              interval=INTERVAL_5_SEC):
+                              interval=INTERVAL_5_SEC,
+                              ignorable_exception_arbiter=lambda ex: True):
         start_time = time.time()
         if timeout is None:
             timeout = self.timeout
 
         def _inner():
             try:
-                testValue = testmethod()
+                test_value = testmethod()
             except Exception as ex:
-                testValue = False
-                LOG.debug('CommandLineHelper.'
-                          '_wait_for_condition: %(method_name)s '
-                          'execution failed for %(exception)s',
-                          {'method_name': testmethod.__name__,
-                           'exception': six.text_type(ex)})
-            if testValue:
+                test_value = False
+                with excutils.save_and_reraise_exception(
+                        reraise=not ignorable_exception_arbiter(ex)):
+                    LOG.debug('CommandLineHelper.'
+                              '_wait_for_a_condition: %(method_name)s '
+                              'execution failed for %(exception)s',
+                              {'method_name': testmethod.__name__,
+                               'exception': six.text_type(ex)})
+            if test_value:
                 raise loopingcall.LoopingCallDone()
 
             if int(time.time()) - start_time > timeout:
-                msg = (_('CommandLineHelper._wait_for_condition: %s timeout')
+                msg = (_('CommandLineHelper._wait_for_a_condition: %s timeout')
                        % testmethod.__name__)
                 LOG.error(msg)
                 raise exception.VolumeBackendAPIException(data=msg)
@@ -1584,7 +1604,7 @@ class CommandLineHelper(object):
 class EMCVnxCliBase(object):
     """This class defines the functions to use the native CLI functionality."""
 
-    VERSION = '05.03.03'
+    VERSION = '05.03.04'
     stats = {'driver_version': VERSION,
              'storage_protocol': None,
              'vendor_name': 'EMC',