From b1537c016e74afb403a75158710c4242fd5a31a6 Mon Sep 17 00:00:00 2001 From: peter_wang Date: Wed, 25 Mar 2015 02:35:32 -0400 Subject: [PATCH] Enhance VNX Cinder volume creation logic Sometimes sysadmin needs to remove broken disks and replace them with some new disks. Before the disks are replaced and fully rebuilt, the pool would be in "Faulted" state, and all LUNs created in this pool would be in "Faulted" state as well. A LUN in this "Faulted" state is still OK for IO access. Current implementation of create_volume in VNX Cinder Driver will wait until the VNX LUN gets into Ready State or timeout. Thus the new volume in the back end corresponding to the pool will be stuck at creating state for a long time before disks are replaced. This reduces the high availability of the back end. The change allows new volume to get into available state as long as the corresponding VNX LUN can serve IO. Change-Id: Ifb152b3b9c59ce0d5c875129a7bf0503740d5a3a Closes-Bug: 1436241 --- cinder/tests/test_emc_vnxdirect.py | 101 ++++++++++++++++++----- cinder/volume/drivers/emc/emc_vnx_cli.py | 50 +++++++---- 2 files changed, 114 insertions(+), 37 deletions(-) diff --git a/cinder/tests/test_emc_vnxdirect.py b/cinder/tests/test_emc_vnxdirect.py index 424d8f398..e1bd14a13 100644 --- a/cinder/tests/test_emc_vnxdirect.py +++ b/cinder/tests/test_emc_vnxdirect.py @@ -372,7 +372,7 @@ class EMCVNXCLIDriverTestData(): test_lun_id = 1 test_existing_ref = {'id': test_lun_id} - test_pool_name = 'Pool_02_SASFLASH' + test_pool_name = 'unit_test_pool' device_map = { '1122334455667788': { 'initiator_port_wwn_list': ['123456789012345', '123456789054321'], @@ -777,32 +777,37 @@ Available Capacity (GBs): 3257.851 "Port Status: Online\n" + "Switch Present: NO\n", 0) - def LUN_PROPERTY(self, name, isThin=False, hasSnap=False, size=1): - return """\ + def LUN_PROPERTY(self, name, is_thin=False, has_snap=False, size=1, + state='Ready', faulted='false', operation='None'): + return (""" LOGICAL UNIT NUMBER 1 - Name: %s + Name: %(name)s UID: 60:06:01:60:09:20:32:00:13:DF:B4:EF:C2:63:E3:11 Current Owner: SP A Default Owner: SP A Allocation Owner: SP A - Attached Snapshot: %s + Attached Snapshot: %(has_snap)s User Capacity (Blocks): 2101346304 - User Capacity (GBs): %d + User Capacity (GBs): %(size)d Consumed Capacity (Blocks): 2149576704 Consumed Capacity (GBs): 1024.998 - Pool Name: Pool_02_SASFLASH - Current State: Ready + Pool Name: unit_test_pool + Current State: %(state)s Status: OK(0x0) - Is Faulted: false + Is Faulted: %(faulted)s Is Transitioning: false - Current Operation: None + Current Operation: %(operation)s Current Operation State: N/A Current Operation Status: N/A Current Operation Percent Completed: 0 - Is Thin LUN: %s""" % (name, - 'FakeSnap' if hasSnap else 'N/A', - size, - 'Yes' if isThin else 'No'), 0 + Is Thin LUN: %(is_thin)s""" % { + 'name': name, + 'has_snap': 'FakeSnap' if has_snap else 'N/A', + 'size': size, + 'state': state, + 'faulted': faulted, + 'operation': operation, + 'is_thin': 'Yes' if is_thin else 'No'}, 0) def STORAGE_GROUP_NO_MAP(self, sgname): return ("""\ @@ -1805,6 +1810,57 @@ Time Remaining: 0 second(s) 'failed_vol1', 1, 'unit_test_pool', None, None, False))] fake_cli.assert_has_calls(expect_cmd) + @mock.patch('cinder.openstack.common.loopingcall.FixedIntervalLoopingCall', + new=utils.ZeroIntervalLoopingCall) + def test_create_faulted_volume(self): + volume_name = 'faulted_volume' + cmd_create = self.testData.LUN_CREATION_CMD( + volume_name, 1, 'unit_test_pool', None, None, False) + cmd_list_preparing = self.testData.LUN_PROPERTY_ALL_CMD(volume_name) + commands = [cmd_create, cmd_list_preparing] + results = [SUCCEED, + [self.testData.LUN_PROPERTY(name=volume_name, + state='Faulted', + faulted='true', + operation='Preparing'), + self.testData.LUN_PROPERTY(name=volume_name, + state='Faulted', + faulted='true', + operation='None')]] + fake_cli = self.driverSetup(commands, results) + faulted_volume = self.testData.test_volume.copy() + faulted_volume.update({'name': volume_name}) + self.driver.create_volume(faulted_volume) + expect_cmd = [ + mock.call(*self.testData.LUN_CREATION_CMD( + volume_name, 1, 'unit_test_pool', None, None, False)), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD(volume_name), + poll=False), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD(volume_name), + poll=False)] + fake_cli.assert_has_calls(expect_cmd) + + @mock.patch('cinder.openstack.common.loopingcall.FixedIntervalLoopingCall', + new=utils.ZeroIntervalLoopingCall) + def test_create_offline_volume(self): + volume_name = 'offline_volume' + cmd_create = self.testData.LUN_CREATION_CMD( + volume_name, 1, 'unit_test_pool', None, None, False) + cmd_list = self.testData.LUN_PROPERTY_ALL_CMD(volume_name) + commands = [cmd_create, cmd_list] + results = [SUCCEED, + self.testData.LUN_PROPERTY(name=volume_name, + state='Offline', + faulted='true')] + self.driverSetup(commands, results) + offline_volume = self.testData.test_volume.copy() + offline_volume.update({'name': volume_name}) + self.assertRaisesRegexp(exception.VolumeBackendAPIException, + "Volume %s was created in VNX, but in" + " Offline state." % volume_name, + self.driver.create_volume, + offline_volume) + def test_create_volume_snapshot_failed(self): commands = [self.testData.SNAP_CREATE_CMD('failed_snapshot')] results = [FAKE_ERROR_RETURN] @@ -2686,7 +2742,7 @@ Time Remaining: 0 second(s) "get_volume_type_extra_specs", mock.Mock(return_value={'fast_cache_enabled': 'True'})) def test_create_volume_with_fastcache(self): - '''enable fastcache when creating volume.''' + """Enable fastcache when creating volume.""" commands = [self.testData.NDU_LIST_CMD, self.testData.POOL_PROPERTY_W_FASTCACHE_CMD, self.testData.LUN_PROPERTY_ALL_CMD('vol_with_type'), @@ -2733,14 +2789,15 @@ Time Remaining: 0 second(s) mock.call('connection', '-getport', '-address', '-vlanid', poll=False), mock.call('-np', 'lun', '-create', '-capacity', - 1, '-sq', 'gb', '-poolName', 'Pool_02_SASFLASH', + 1, '-sq', 'gb', '-poolName', + self.testData.test_pool_name, '-name', 'vol_with_type', '-type', 'NonThin') ] fake_cli.assert_has_calls(expect_cmd) def test_get_lun_id_provider_location_exists(self): - '''test function get_lun_id.''' + """Test function get_lun_id.""" self.driverSetup() volume_01 = { 'name': 'vol_01', @@ -2761,7 +2818,7 @@ Time Remaining: 0 second(s) "get_lun_by_name", mock.Mock(return_value={'lun_id': 2})) def test_get_lun_id_provider_location_has_no_lun_id(self): - '''test function get_lun_id.''' + """Test function get_lun_id.""" self.driverSetup() volume_02 = { 'name': 'vol_02', @@ -3213,7 +3270,7 @@ class EMCVNXCLIDArrayBasedDriverTestCase(DriverTestCaseBase): results = [self.testData.LUN_PROPERTY(testVolume['name'], False)] fake_cli = self.driverSetup(commands, results) pool = self.driver.get_pool(testVolume) - self.assertEqual('Pool_02_SASFLASH', pool) + self.assertEqual('unit_test_pool', pool) fake_cli.assert_has_calls( [mock.call(*self.testData.LUN_PROPERTY_POOL_CMD( testVolume['name']), poll=False)]) @@ -3235,7 +3292,7 @@ class EMCVNXCLIDArrayBasedDriverTestCase(DriverTestCaseBase): fake_cli = self.driverSetup(commands, results) pool = self.driver.cli.get_target_storagepool(testNewVolume, testSrcVolume) - self.assertEqual('Pool_02_SASFLASH', pool) + self.assertEqual('unit_test_pool', pool) fake_cli.assert_has_calls( [mock.call(*self.testData.LUN_PROPERTY_POOL_CMD( testSrcVolume['name']), poll=False)]) @@ -3249,7 +3306,7 @@ class EMCVNXCLIDArrayBasedDriverTestCase(DriverTestCaseBase): results = [self.testData.LUN_PROPERTY('lun_name', size=test_size)] fake_cli = self.driverSetup(commands, results) test_volume = self.testData.test_volume2.copy() - test_volume['host'] = "host@backendsec#Pool_02_SASFLASH" + test_volume['host'] = "host@backendsec#unit_test_pool" get_size = self.driver.manage_existing_get_size( test_volume, self.testData.test_existing_ref) @@ -3271,7 +3328,7 @@ class EMCVNXCLIDArrayBasedDriverTestCase(DriverTestCaseBase): ex = self.assertRaises( exception.ManageExistingInvalidReference, self.driver.manage_existing_get_size, - self.testData.test_volume_with_type, + test_volume, self.testData.test_existing_ref) self.assertTrue( re.match(r'.*not managed by the host', diff --git a/cinder/volume/drivers/emc/emc_vnx_cli.py b/cinder/volume/drivers/emc/emc_vnx_cli.py index 75fce1483..afd7d4c63 100644 --- a/cinder/volume/drivers/emc/emc_vnx_cli.py +++ b/cinder/volume/drivers/emc/emc_vnx_cli.py @@ -413,12 +413,26 @@ class CommandLineHelper(object): else: self._raise_cli_error(cmd, rc, out) + def _lun_state_validation(lun_data): + lun_state = lun_data[self.LUN_STATE.key] + if lun_state == 'Initializing': + return False + # Lun in Ready or Faulted state is eligible for IO access, + # so if no lun operation, return success. + elif lun_state in ['Ready', 'Faulted']: + return lun_data[self.LUN_OPERATION.key] == 'None' + # Raise exception if lun state is Offline, Invalid, Destroying + # or other unexpected states. + else: + msg = _("Volume %(lun_name)s was created in VNX, but in" + " %(lun_state)s state." + ) % {'lun_name': lun_data[self.LUN_NAME.key], + 'lun_state': lun_state} + raise exception.VolumeBackendAPIException(data=msg) + def lun_is_ready(): try: data = self.get_lun_by_name(name, self.LUN_ALL, False) - return (data[self.LUN_STATE.key] == 'Ready' and - data[self.LUN_STATUS.key] == 'OK(0x0)' and - data[self.LUN_OPERATION.key] == 'None') except exception.EMCVnxCLICmdError as ex: orig_out = "\n".join(ex.kwargs["out"]) if orig_out.find( @@ -426,10 +440,13 @@ class CommandLineHelper(object): return False else: raise ex + return _lun_state_validation(data) self._wait_for_a_condition(lun_is_ready, None, - INTERVAL_5_SEC) + INTERVAL_5_SEC, + lambda ex: + isinstance(ex, exception.EMCVnxCLICmdError)) lun = self.get_lun_by_name(name, self.LUN_ALL, False) return lun @@ -474,26 +491,29 @@ class CommandLineHelper(object): return hlus def _wait_for_a_condition(self, testmethod, timeout=None, - interval=INTERVAL_5_SEC): + interval=INTERVAL_5_SEC, + ignorable_exception_arbiter=lambda ex: True): start_time = time.time() if timeout is None: timeout = self.timeout def _inner(): try: - testValue = testmethod() + test_value = testmethod() except Exception as ex: - testValue = False - LOG.debug('CommandLineHelper.' - '_wait_for_condition: %(method_name)s ' - 'execution failed for %(exception)s', - {'method_name': testmethod.__name__, - 'exception': six.text_type(ex)}) - if testValue: + test_value = False + with excutils.save_and_reraise_exception( + reraise=not ignorable_exception_arbiter(ex)): + LOG.debug('CommandLineHelper.' + '_wait_for_a_condition: %(method_name)s ' + 'execution failed for %(exception)s', + {'method_name': testmethod.__name__, + 'exception': six.text_type(ex)}) + if test_value: raise loopingcall.LoopingCallDone() if int(time.time()) - start_time > timeout: - msg = (_('CommandLineHelper._wait_for_condition: %s timeout') + msg = (_('CommandLineHelper._wait_for_a_condition: %s timeout') % testmethod.__name__) LOG.error(msg) raise exception.VolumeBackendAPIException(data=msg) @@ -1584,7 +1604,7 @@ class CommandLineHelper(object): class EMCVnxCliBase(object): """This class defines the functions to use the native CLI functionality.""" - VERSION = '05.03.03' + VERSION = '05.03.04' stats = {'driver_version': VERSION, 'storage_protocol': None, 'vendor_name': 'EMC', -- 2.45.2