From a73646d777a77c4820792b7d59ac5330643f9ad0 Mon Sep 17 00:00:00 2001 From: Jeegn Chen Date: Fri, 8 May 2015 15:41:23 +0800 Subject: [PATCH] Address 0x712d8e0e error in VNX Cinder Driver In original implementation, 0x712d8e0e error is not considered. Thus, some snapshot creation and volume extending operations may fail when the system is in a transition state. This patch is to add some waiting and retry logic when 0x712d8e0e error comes out so that the system is more error-tolerant. Change-Id: I615e49693ada7cf25fd2d2fa224f76c6f20cf630 Closes-Bug: #1443338 --- cinder/tests/unit/test_emc_vnxdirect.py | 64 ++++++++++++++++++++++++ cinder/volume/drivers/emc/emc_vnx_cli.py | 55 ++++++++++++++++++-- 2 files changed, 116 insertions(+), 3 deletions(-) diff --git a/cinder/tests/unit/test_emc_vnxdirect.py b/cinder/tests/unit/test_emc_vnxdirect.py index a1a546da5..a318151de 100644 --- a/cinder/tests/unit/test_emc_vnxdirect.py +++ b/cinder/tests/unit/test_emc_vnxdirect.py @@ -660,6 +660,12 @@ State: Ready of another consistency group. (0x716d8045) """, 71 + def LUN_PREP_ERROR(self): + return ("The operation cannot be performed because " + "the LUN is 'Preparing'. Wait for the LUN's " + "Current Operation to complete 'Preparing' " + "and retry the operation. (0x712d8e0e)", 14) + POOL_PROPERTY = ("""\ Pool Name: unit_test_pool Pool ID: 1 @@ -1600,6 +1606,34 @@ Time Remaining: 0 second(s) fake_cli.assert_has_calls(expect_cmd) + @mock.patch('cinder.openstack.common.loopingcall.FixedIntervalLoopingCall', + new=utils.ZeroIntervalLoopingCall) + def test_snapshot_preparing_volume(self): + commands = [self.testData.SNAP_CREATE_CMD('snapshot1'), + self.testData.LUN_PROPERTY_ALL_CMD('vol1')] + results = [[self.testData.LUN_PREP_ERROR(), SUCCEED], + [self.testData.LUN_PROPERTY('vol1', size=1, + operation='Preparing'), + self.testData.LUN_PROPERTY('vol1', size=1, + operation='Optimizing'), + self.testData.LUN_PROPERTY('vol1', size=1, + operation='None')]] + + fake_cli = self.driverSetup(commands, results) + + self.driver.create_snapshot(self.testData.test_snapshot) + expected = [mock.call(*self.testData.SNAP_CREATE_CMD('snapshot1'), + poll=False), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD('vol1'), + poll=False), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD('vol1'), + poll=False), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD('vol1'), + poll=False), + mock.call(*self.testData.SNAP_CREATE_CMD('snapshot1'), + poll=False)] + fake_cli.assert_has_calls(expected) + @mock.patch( "oslo_concurrency.processutils.execute", mock.Mock( @@ -2287,6 +2321,36 @@ Time Remaining: 0 second(s) poll=False)] fake_cli.assert_has_calls(expected) + @mock.patch('cinder.openstack.common.loopingcall.FixedIntervalLoopingCall', + new=utils.ZeroIntervalLoopingCall) + def test_extend_preparing_volume(self): + commands = [self.testData.LUN_EXTEND_CMD('vol1', 2), + self.testData.LUN_PROPERTY_ALL_CMD('vol1')] + results = [[self.testData.LUN_PREP_ERROR(), SUCCEED], + [self.testData.LUN_PROPERTY('vol1', size=1, + operation='Preparing'), + self.testData.LUN_PROPERTY('vol1', size=1, + operation='Optimizing'), + self.testData.LUN_PROPERTY('vol1', size=1, + operation='None'), + self.testData.LUN_PROPERTY('vol1', size=2)]] + fake_cli = self.driverSetup(commands, results) + + self.driver.extend_volume(self.testData.test_volume, 2) + expected = [mock.call(*self.testData.LUN_EXTEND_CMD('vol1', 2), + poll=False), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD('vol1'), + poll=False), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD('vol1'), + poll=False), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD('vol1'), + poll=False), + mock.call(*self.testData.LUN_EXTEND_CMD('vol1', 2), + poll=False), + mock.call(*self.testData.LUN_PROPERTY_ALL_CMD('vol1'), + poll=False)] + fake_cli.assert_has_calls(expected) + def test_manage_existing(self): lun_rename_cmd = ('lun', '-modify', '-l', self.testData.test_lun_id, '-newName', 'vol_with_type', '-o') diff --git a/cinder/volume/drivers/emc/emc_vnx_cli.py b/cinder/volume/drivers/emc/emc_vnx_cli.py index 0b996d5df..d2ae23421 100644 --- a/cinder/volume/drivers/emc/emc_vnx_cli.py +++ b/cinder/volume/drivers/emc/emc_vnx_cli.py @@ -271,6 +271,7 @@ class CommandLineHelper(object): CLI_RESP_PATTERN_LUN_IN_SG_2 = 'Host LUN/LUN mapping still exists' CLI_RESP_PATTERN_LUN_NOT_MIGRATING = ('The specified source LUN ' 'is not currently migrating') + CLI_RESP_PATTERN_LUN_IS_PREPARING = '0x712d8e0e' def __init__(self, configuration): configuration.append_config_values(san.san_opts) @@ -1129,6 +1130,16 @@ class CommandLineHelper(object): properties, poll=poll) return data + def get_lun_current_ops_state(self, name, poll=False): + data = self.get_lun_by_name(name, poll=False) + return data[self.LUN_OPERATION.key] + + def wait_until_lun_ready_for_ops(self, name): + def is_lun_ready_for_ops(): + data = self.get_lun_current_ops_state(name, False) + return data == 'None' + self._wait_for_a_condition(is_lun_ready_for_ops) + def get_pool(self, name, properties=POOL_ALL, poll=True): data = self.get_pool_properties(('-name', name), properties=properties, @@ -1848,12 +1859,31 @@ class EMCVnxCliBase(object): self._client.delete_lun(volume['name']) else: with excutils.save_and_reraise_exception(): - # Reraise the original exceiption + # Reraise the original exception pass def extend_volume(self, volume, new_size): """Extends an EMC volume.""" - self._client.expand_lun_and_wait(volume['name'], new_size) + + try: + self._client.expand_lun_and_wait(volume['name'], new_size) + except exception.EMCVnxCLICmdError as ex: + with excutils.save_and_reraise_exception(ex) as ctxt: + out = "\n".join(ex.kwargs["out"]) + if (self._client.CLI_RESP_PATTERN_LUN_IS_PREPARING + in out): + # The error means the operation cannot be performed + # because the LUN is 'Preparing'. Wait for a while + # so that the LUN may get out of the transitioning + # state. + LOG.warning(_LW("LUN %(name)s is not ready for extension: " + "%(out)s"), + {'name': volume['name'], 'out': out}) + self._client.wait_until_lun_ready_for_ops(volume['name']) + self._client.expand_lun_and_wait(volume['name'], new_size) + ctxt.reraise = False + else: + ctxt.reraise = True def _get_original_status(self, volume): if not volume['volume_attachment']: @@ -2127,7 +2157,26 @@ class EMCVnxCliBase(object): {'snapshot': snapshot_name, 'volume': volume_name}) lun_id = self.get_lun_id(volume) - self._client.create_snapshot(lun_id, snapshot_name) + + try: + self._client.create_snapshot(lun_id, snapshot_name) + except exception.EMCVnxCLICmdError as ex: + with excutils.save_and_reraise_exception(ex) as ctxt: + out = "\n".join(ex.kwargs["out"]) + if (self._client.CLI_RESP_PATTERN_LUN_IS_PREPARING + in out): + # The error means the operation cannot be performed + # because the LUN is 'Preparing'. Wait for a while + # so that the LUN may get out of the transitioning + # state. + LOG.warning(_LW("LUN %(name)s is not ready for snapshot: " + "%(out)s"), + {'name': volume_name, 'out': out}) + self._client.wait_until_lun_ready_for_ops(volume['name']) + self._client.create_snapshot(lun_id, snapshot_name) + ctxt.reraise = False + else: + ctxt.reraise = True def delete_snapshot(self, snapshot): """Deletes a snapshot.""" -- 2.45.2