for mgr in self.volume_managers.values():
self._init_volume_driver(ctxt, mgr.driver)
+ try:
+ self._cleanup_incomplete_backup_operations(ctxt)
+ except Exception:
+ # Don't block startup of the backup service.
+ LOG.exception(_LE("Problem cleaning incomplete backup "
+ "operations."))
+
+ def _cleanup_incomplete_backup_operations(self, ctxt):
LOG.info(_LI("Cleaning up incomplete backup operations."))
volumes = self.db.volume_get_all_by_host(ctxt, self.host)
+
for volume in volumes:
- volume_host = volume_utils.extract_host(volume['host'], 'backend')
- backend = self._get_volume_backend(host=volume_host)
- mgr = self._get_manager(backend)
- if volume['status'] == 'backing-up':
- self._detach_all_attachments(ctxt, mgr, volume)
- LOG.info(_LI('Resetting volume %(vol_id)s to previous '
- 'status %(status)s (was backing-up).'),
- {'vol_id': volume['id'],
- 'status': volume['previous_status']})
- self.db.volume_update(ctxt, volume['id'],
- {'status': volume['previous_status']})
- elif volume['status'] == 'restoring-backup':
- self._detach_all_attachments(ctxt, mgr, volume)
- LOG.info(_LI('setting volume %s to error_restoring '
- '(was restoring-backup).'), volume['id'])
- self.db.volume_update(ctxt, volume['id'],
- {'status': 'error_restoring'})
+ try:
+ self._cleanup_one_volume(ctxt, volume)
+ except Exception:
+ LOG.exception(_LE("Problem cleaning up volume %(vol)s."),
+ {'vol': volume['id']})
# TODO(smulcahy) implement full resume of backup and restore
# operations on restart (rather than simply resetting)
backups = objects.BackupList.get_all_by_host(ctxt, self.host)
for backup in backups:
- if backup['status'] == 'creating':
- LOG.info(_LI('Resetting backup %s to error (was creating).'),
- backup['id'])
- err = 'incomplete backup reset on manager restart'
- self._update_backup_error(backup, ctxt, err)
- if backup['status'] == 'restoring':
- LOG.info(_LI('Resetting backup %s to '
- 'available (was restoring).'),
- backup['id'])
- backup.status = 'available'
- backup.save()
- if backup['status'] == 'deleting':
- LOG.info(_LI('Resuming delete on backup: %s.'), backup['id'])
- if CONF.backup_service_inithost_offload:
- # Offload all the pending backup delete operations to the
- # threadpool to prevent the main backup service thread
- # from being blocked.
- self._add_to_threadpool(self.delete_backup, ctxt, backup)
- else:
- # By default, delete backups sequentially
- self.delete_backup(ctxt, backup)
-
- self._cleanup_temp_volumes_snapshots(backups)
+ try:
+ self._cleanup_one_backup(ctxt, backup)
+ except Exception:
+ LOG.exception(_LE("Problem cleaning up backup %(bkup)s."),
+ {'bkup': backup['id']})
+ try:
+ self._cleanup_temp_volumes_snapshots_for_one_backup(ctxt,
+ backup)
+ except Exception:
+ LOG.exception(_LE("Problem cleaning temp volumes and "
+ "snapshots for backup %(bkup)s."),
+ {'bkup': backup['id']})
+
+ def _cleanup_one_volume(self, ctxt, volume):
+ volume_host = volume_utils.extract_host(volume['host'], 'backend')
+ backend = self._get_volume_backend(host=volume_host)
+ mgr = self._get_manager(backend)
+ if volume['status'] == 'backing-up':
+ self._detach_all_attachments(ctxt, mgr, volume)
+ LOG.info(_LI('Resetting volume %(vol_id)s to previous '
+ 'status %(status)s (was backing-up).'),
+ {'vol_id': volume['id'],
+ 'status': volume['previous_status']})
+ self.db.volume_update(ctxt, volume['id'],
+ {'status': volume['previous_status']})
+ elif volume['status'] == 'restoring-backup':
+ self._detach_all_attachments(ctxt, mgr, volume)
+ LOG.info(_LI('setting volume %s to error_restoring '
+ '(was restoring-backup).'), volume['id'])
+ self.db.volume_update(ctxt, volume['id'],
+ {'status': 'error_restoring'})
+
+ def _cleanup_one_backup(self, ctxt, backup):
+ if backup['status'] == 'creating':
+ LOG.info(_LI('Resetting backup %s to error (was creating).'),
+ backup['id'])
+ err = 'incomplete backup reset on manager restart'
+ self._update_backup_error(backup, ctxt, err)
+ if backup['status'] == 'restoring':
+ LOG.info(_LI('Resetting backup %s to '
+ 'available (was restoring).'),
+ backup['id'])
+ backup.status = 'available'
+ backup.save()
+ if backup['status'] == 'deleting':
+ LOG.info(_LI('Resuming delete on backup: %s.'), backup['id'])
+ if CONF.backup_service_inithost_offload:
+ # Offload all the pending backup delete operations to the
+ # threadpool to prevent the main backup service thread
+ # from being blocked.
+ self._add_to_threadpool(self.delete_backup, ctxt, backup)
+ else:
+ # By default, delete backups sequentially
+ self.delete_backup(ctxt, backup)
def _detach_all_attachments(self, ctxt, mgr, volume):
attachments = volume['volume_attachment'] or []
for attachment in attachments:
if (attachment['attached_host'] == self.host and
attachment['instance_uuid'] is None):
- mgr.detach_volume(ctxt, volume['id'],
- attachment['id'])
-
- def _cleanup_temp_volumes_snapshots(self, backups):
+ try:
+ mgr.detach_volume(ctxt, volume['id'],
+ attachment['id'])
+ except Exception:
+ LOG.exception(_LE("Detach attachment %(attach_id)s"
+ " failed."),
+ {'attach_id': attachment['id']},
+ resource=volume)
+
+ def _cleanup_temp_volumes_snapshots_for_one_backup(self, ctxt, backup):
# NOTE(xyang): If the service crashes or gets restarted during the
# backup operation, there could be temporary volumes or snapshots
# that are not deleted. Make sure any temporary volumes or snapshots
# create by the backup job are deleted when service is started.
- ctxt = context.get_admin_context()
- for backup in backups:
+ try:
+ volume = self.db.volume_get(ctxt, backup.volume_id)
+ volume_host = volume_utils.extract_host(volume['host'],
+ 'backend')
+ backend = self._get_volume_backend(host=volume_host)
+ mgr = self._get_manager(backend)
+ except (KeyError, exception.VolumeNotFound):
+ LOG.debug("Could not find a volume to clean up for "
+ "backup %s.", backup.id)
+ return
+
+ if backup.temp_volume_id and backup.status == 'error':
try:
- volume = self.db.volume_get(ctxt, backup.volume_id)
- volume_host = volume_utils.extract_host(volume['host'],
- 'backend')
- backend = self._get_volume_backend(host=volume_host)
- mgr = self._get_manager(backend)
- except (KeyError, exception.VolumeNotFound):
- LOG.debug("Could not find a volume to clean up for "
- "backup %s.", backup.id)
- continue
- if backup.temp_volume_id and backup.status == 'error':
- try:
- temp_volume = self.db.volume_get(ctxt,
- backup.temp_volume_id)
- # The temp volume should be deleted directly thru the
- # the volume driver, not thru the volume manager.
- mgr.driver.delete_volume(temp_volume)
- self.db.volume_destroy(ctxt, temp_volume['id'])
- except exception.VolumeNotFound:
- LOG.debug("Could not find temp volume %(vol)s to clean up "
- "for backup %(backup)s.",
- {'vol': backup.temp_volume_id,
- 'backup': backup.id})
- backup.temp_volume_id = None
- backup.save()
- if backup.temp_snapshot_id and backup.status == 'error':
- try:
- temp_snapshot = objects.Snapshot.get_by_id(
- ctxt, backup.temp_snapshot_id)
- # The temp snapshot should be deleted directly thru the
- # volume driver, not thru the volume manager.
- mgr.driver.delete_snapshot(temp_snapshot)
- with temp_snapshot.obj_as_admin():
- self.db.volume_glance_metadata_delete_by_snapshot(
- ctxt, temp_snapshot.id)
- temp_snapshot.destroy()
- except exception.SnapshotNotFound:
- LOG.debug("Could not find temp snapshot %(snap)s to clean "
- "up for backup %(backup)s.",
- {'snap': backup.temp_snapshot_id,
- 'backup': backup.id})
- backup.temp_snapshot_id = None
- backup.save()
+ temp_volume = self.db.volume_get(ctxt,
+ backup.temp_volume_id)
+ # The temp volume should be deleted directly thru the
+ # the volume driver, not thru the volume manager.
+ mgr.driver.delete_volume(temp_volume)
+ self.db.volume_destroy(ctxt, temp_volume['id'])
+ except exception.VolumeNotFound:
+ LOG.debug("Could not find temp volume %(vol)s to clean up "
+ "for backup %(backup)s.",
+ {'vol': backup.temp_volume_id,
+ 'backup': backup.id})
+ backup.temp_volume_id = None
+ backup.save()
+
+ if backup.temp_snapshot_id and backup.status == 'error':
+ try:
+ temp_snapshot = objects.Snapshot.get_by_id(
+ ctxt, backup.temp_snapshot_id)
+ # The temp snapshot should be deleted directly thru the
+ # volume driver, not thru the volume manager.
+ mgr.driver.delete_snapshot(temp_snapshot)
+ with temp_snapshot.obj_as_admin():
+ self.db.volume_glance_metadata_delete_by_snapshot(
+ ctxt, temp_snapshot.id)
+ temp_snapshot.destroy()
+ except exception.SnapshotNotFound:
+ LOG.debug("Could not find temp snapshot %(snap)s to clean "
+ "up for backup %(backup)s.",
+ {'snap': backup.temp_snapshot_id,
+ 'backup': backup.id})
+ backup.temp_snapshot_id = None
+ backup.save()
def create_backup(self, context, backup):
"""Create volume backups using configured backup service."""
temp_snapshot_id=temp_snap.id)
self.backup_mgr.init_host()
+
vol1 = db.volume_get(self.ctxt, vol1_id)
self.assertEqual('available', vol1['status'])
vol2 = db.volume_get(self.ctxt, vol2_id)
mock_add_threadpool.assert_has_calls(calls, any_order=True)
self.assertEqual(2, mock_add_threadpool.call_count)
- @mock.patch.object(db, 'volume_get')
+ def test_init_host_handles_exception(self):
+ """Test that exception in cleanup is handled."""
+
+ self.mock_object(self.backup_mgr, '_init_volume_driver')
+ mock_cleanup = self.mock_object(
+ self.backup_mgr,
+ '_cleanup_incomplete_backup_operations')
+ mock_cleanup.side_effect = [Exception]
+
+ self.assertIsNone(self.backup_mgr.init_host())
+
+ def test_cleanup_incomplete_backup_operations_with_exceptions(self):
+ """Test cleanup resilience in the face of exceptions."""
+
+ fake_volume_list = [{'id': 'vol1'}, {'id': 'vol2'}]
+ mock_volume_get_by_host = self.mock_object(
+ db, 'volume_get_all_by_host')
+ mock_volume_get_by_host.return_value = fake_volume_list
+
+ mock_volume_cleanup = self.mock_object(
+ self.backup_mgr, '_cleanup_one_volume')
+ mock_volume_cleanup.side_effect = [Exception]
+
+ fake_backup_list = [{'id': 'bkup1'}, {'id': 'bkup2'}, {'id': 'bkup3'}]
+ mock_backup_get_by_host = self.mock_object(
+ objects.BackupList, 'get_all_by_host')
+ mock_backup_get_by_host.return_value = fake_backup_list
+
+ mock_backup_cleanup = self.mock_object(
+ self.backup_mgr, '_cleanup_one_backup')
+ mock_backup_cleanup.side_effect = [Exception]
+
+ mock_temp_cleanup = self.mock_object(
+ self.backup_mgr, '_cleanup_temp_volumes_snapshots_for_one_backup')
+ mock_temp_cleanup.side_effect = [Exception]
+
+ self.assertIsNone(
+ self.backup_mgr._cleanup_incomplete_backup_operations(
+ self.ctxt))
+
+ self.assertEqual(len(fake_volume_list), mock_volume_cleanup.call_count)
+ self.assertEqual(len(fake_backup_list), mock_backup_cleanup.call_count)
+ self.assertEqual(len(fake_backup_list), mock_temp_cleanup.call_count)
+
+ def test_cleanup_one_backing_up_volume(self):
+ """Test cleanup_one_volume for volume status 'backing-up'."""
+
+ mock_get_manager = self.mock_object(
+ self.backup_mgr, '_get_manager')
+ mock_get_manager.return_value = 'fake_manager'
+
+ volume_id = self._create_volume_db_entry(status='backing-up',
+ previous_status='available')
+ volume = db.volume_get(self.ctxt, volume_id)
+
+ self.backup_mgr._cleanup_one_volume(self.ctxt, volume)
+
+ volume = db.volume_get(self.ctxt, volume_id)
+ self.assertEqual('available', volume['status'])
+
+ def test_cleanup_one_restoring_backup_volume(self):
+ """Test cleanup_one_volume for volume status 'restoring-backup'."""
+
+ mock_get_manager = self.mock_object(
+ self.backup_mgr, '_get_manager')
+ mock_get_manager.return_value = 'fake_manager'
+
+ volume_id = self._create_volume_db_entry(status='restoring-backup')
+ volume = db.volume_get(self.ctxt, volume_id)
+
+ self.backup_mgr._cleanup_one_volume(self.ctxt, volume)
+
+ volume = db.volume_get(self.ctxt, volume_id)
+ self.assertEqual('error_restoring', volume['status'])
+
+ def test_cleanup_one_creating_backup(self):
+ """Test cleanup_one_backup for volume status 'creating'."""
+
+ backup = self._create_backup_db_entry(status='creating')
+
+ self.backup_mgr._cleanup_one_backup(self.ctxt, backup)
+
+ self.assertEqual('error', backup.status)
+
+ def test_cleanup_one_restoring_backup(self):
+ """Test cleanup_one_backup for volume status 'restoring'."""
+
+ backup = self._create_backup_db_entry(status='restoring')
+
+ self.backup_mgr._cleanup_one_backup(self.ctxt, backup)
+
+ self.assertEqual('available', backup.status)
+
+ def test_cleanup_one_deleting_backup(self):
+ """Test cleanup_one_backup for volume status 'deleting'."""
+
+ backup = self._create_backup_db_entry(status='deleting')
+
+ self.backup_mgr._cleanup_one_backup(self.ctxt, backup)
+
+ self.assertRaises(exception.BackupNotFound,
+ db.backup_get,
+ self.ctxt,
+ backup.id)
+
+ def test_detach_all_attachments_handles_exceptions(self):
+ """Test detach_all_attachments with exceptions."""
+
+ mock_log = self.mock_object(manager, 'LOG')
+ mock_volume_mgr = mock.Mock()
+ mock_detach_volume = mock_volume_mgr.detach_volume
+ mock_detach_volume.side_effect = [Exception]
+
+ fake_attachments = [
+ {
+ 'id': 'attachment1',
+ 'attached_host': 'testhost',
+ 'instance_uuid': None,
+ },
+ {
+ 'id': 'attachment2',
+ 'attached_host': 'testhost',
+ 'instance_uuid': None,
+ }
+ ]
+ fake_volume = {
+ 'id': 'fake_volume_id',
+ 'volume_attachment': fake_attachments
+ }
+
+ self.backup_mgr._detach_all_attachments(self.ctxt,
+ mock_volume_mgr,
+ fake_volume)
+
+ self.assertEqual(len(fake_attachments), mock_log.exception.call_count)
+
@ddt.data(KeyError, exception.VolumeNotFound)
- def test_cleanup_temp_volumes_snapshots_volume_not_found(
- self, err, mock_volume_get):
+ def test_cleanup_temp_volumes_snapshots_for_one_backup_volume_not_found(
+ self, err):
"""Ensure we handle missing volume for a backup."""
+
+ mock_volume_get = self.mock_object(db, 'volume_get')
mock_volume_get.side_effect = [err]
- backup1 = self._create_backup_db_entry(status='creating')
- backups = [backup1]
+ backup = self._create_backup_db_entry(status='creating')
- self.assertIsNone(self.backup_mgr._cleanup_temp_volumes_snapshots(
- backups))
+ self.assertIsNone(
+ self.backup_mgr._cleanup_temp_volumes_snapshots_for_one_backup(
+ self.ctxt,
+ backup))
- @mock.patch.object(lvm.LVMVolumeDriver, 'delete_snapshot')
- def test_cleanup_temp_snapshot_not_found(self,
- mock_delete_snapshot):
+ def test_cleanup_temp_snapshot_for_one_backup_not_found(self):
"""Ensure we handle missing temp snapshot for a backup."""
+ mock_delete_snapshot = self.mock_object(
+ lvm.LVMVolumeDriver, 'delete_snapshot')
+
vol1_id = self._create_volume_db_entry()
self._create_volume_attach(vol1_id)
db.volume_update(self.ctxt, vol1_id, {'status': 'backing-up'})
- backup1 = self._create_backup_db_entry(status='error',
- volume_id=vol1_id,
- temp_snapshot_id='fake')
- backups = [backup1]
- self.assertEqual('fake', backups[0].temp_snapshot_id)
- self.assertIsNone(self.backup_mgr._cleanup_temp_volumes_snapshots(
- backups))
+ backup = self._create_backup_db_entry(status='error',
+ volume_id=vol1_id,
+ temp_snapshot_id='fake')
+
+ self.assertIsNone(
+ self.backup_mgr._cleanup_temp_volumes_snapshots_for_one_backup(
+ self.ctxt,
+ backup))
+
self.assertFalse(mock_delete_snapshot.called)
- self.assertIsNone(backups[0].temp_snapshot_id)
- backup1.destroy()
+ self.assertIsNone(backup.temp_snapshot_id)
+
+ backup.destroy()
db.volume_destroy(self.ctxt, vol1_id)
- @mock.patch.object(lvm.LVMVolumeDriver, 'delete_volume')
- def test_cleanup_temp_volume_not_found(self,
- mock_delete_volume):
+ def test_cleanup_temp_volume_for_one_backup_not_found(self):
"""Ensure we handle missing temp volume for a backup."""
+ mock_delete_volume = self.mock_object(
+ lvm.LVMVolumeDriver, 'delete_volume')
+
vol1_id = self._create_volume_db_entry()
self._create_volume_attach(vol1_id)
db.volume_update(self.ctxt, vol1_id, {'status': 'backing-up'})
- backup1 = self._create_backup_db_entry(status='error',
- volume_id=vol1_id,
- temp_volume_id='fake')
- backups = [backup1]
- self.assertEqual('fake', backups[0].temp_volume_id)
- self.assertIsNone(self.backup_mgr._cleanup_temp_volumes_snapshots(
- backups))
+ backup = self._create_backup_db_entry(status='error',
+ volume_id=vol1_id,
+ temp_volume_id='fake')
+
+ self.assertIsNone(
+ self.backup_mgr._cleanup_temp_volumes_snapshots_for_one_backup(
+ self.ctxt,
+ backup))
+
self.assertFalse(mock_delete_volume.called)
- self.assertIsNone(backups[0].temp_volume_id)
- backup1.destroy()
+ self.assertIsNone(backup.temp_volume_id)
+
+ backup.destroy()
db.volume_destroy(self.ctxt, vol1_id)
def test_create_backup_with_bad_volume_status(self):