From e3d03eae48695ce093361cc143161efa3daaaaa2 Mon Sep 17 00:00:00 2001 From: Salvatore Orlando Date: Mon, 9 Dec 2013 07:53:24 -0800 Subject: [PATCH] NVP plugin: Avoid timeouts if creating routers in parallel There is a well-known issue of eventlet causing deadlocks with mysql transactions; such condition might occur when creating NVP routers in parallel. To avoid this, this patch moves the long-running method _update_router_gw_info outside of the mysql transaction, adding the appropriate failure management code, and adds a lock to the method _nvp_create_ext_gw_port, to ensure serial access to it. Unit tests for verifying correct behaviour in case of failures are added as well. Closes Bug: #1251847 Change-Id: I2864e2f5bfd3982875ef9df97186f64a9fcaa0e0 --- neutron/plugins/nicira/NeutronPlugin.py | 25 ++++++++- .../tests/unit/nicira/test_nicira_plugin.py | 56 ++++++++++++++++--- 2 files changed, 71 insertions(+), 10 deletions(-) diff --git a/neutron/plugins/nicira/NeutronPlugin.py b/neutron/plugins/nicira/NeutronPlugin.py index 3742f9709..a624c910c 100644 --- a/neutron/plugins/nicira/NeutronPlugin.py +++ b/neutron/plugins/nicira/NeutronPlugin.py @@ -57,6 +57,7 @@ from neutron.extensions import portsecurity as psec from neutron.extensions import providernet as pnet from neutron.extensions import securitygroup as ext_sg from neutron.openstack.common import excutils +from neutron.openstack.common import lockutils from neutron.plugins.common import constants as plugin_const from neutron.plugins.nicira.common import config # noqa from neutron.plugins.nicira.common import exceptions as nvp_exc @@ -600,6 +601,7 @@ class NvpPluginV2(addr_pair_db.AllowedAddressPairsMixin, % router_id)) return lr_port + @lockutils.synchronized('nicira', 'neutron-') def _nvp_create_ext_gw_port(self, context, port_data): """Driver for creating an external gateway port on NVP platform.""" # TODO(salvatore-orlando): Handle NVP resource @@ -1457,7 +1459,8 @@ class NvpPluginV2(addr_pair_db.AllowedAddressPairsMixin, # did not specify any value for the 'distributed' attribute # Platforms older than 3.x do not support the attribute r['distributed'] = lrouter.get('distributed', False) - + # TODO(salv-orlando): Deal with backend object removal in case + # of db failures with context.session.begin(subtransactions=True): # Transaction nesting is needed to avoid foreign key violations # when processing the distributed router binding @@ -1469,8 +1472,26 @@ class NvpPluginV2(addr_pair_db.AllowedAddressPairsMixin, status=lrouter['status']) self._process_nsx_router_create(context, router_db, r) context.session.add(router_db) - if has_gw_info: + if has_gw_info: + # NOTE(salv-orlando): This operation has been moved out of the + # database transaction since it performs several NVP queries, + # ithis ncreasing the risk of deadlocks between eventlet and + # sqlalchemy operations. + # Set external gateway and remove router in case of failure + try: self._update_router_gw_info(context, router_db['id'], gw_info) + except (q_exc.NeutronException, NvpApiClient.NvpApiException): + with excutils.save_and_reraise_exception(): + # As setting gateway failed, the router must be deleted + # in order to ensure atomicity + router_id = router_db['id'] + LOG.warn(_("Failed to set gateway info for router being " + "created:%s - removing router"), router_id) + self.delete_router(context, router_id) + LOG.info(_("Create router failed while setting external " + "gateway. Router:%s has been removed from " + "DB and backend"), + router_id) router = self._make_router_dict(router_db) return router diff --git a/neutron/tests/unit/nicira/test_nicira_plugin.py b/neutron/tests/unit/nicira/test_nicira_plugin.py index a239235be..0406122d3 100644 --- a/neutron/tests/unit/nicira/test_nicira_plugin.py +++ b/neutron/tests/unit/nicira/test_nicira_plugin.py @@ -622,20 +622,25 @@ class TestNiciraL3NatTestCase(NiciraL3NatTest, nvplib, 'create_lrouter', new=obsolete_response): self._test_router_create_with_distributed(None, False, '2.2') + def _create_router_with_gw_info_for_test(self, subnet): + data = {'router': {'tenant_id': 'whatever', + 'name': 'router1', + 'external_gateway_info': + {'network_id': subnet['subnet']['network_id']}}} + router_req = self.new_create_request( + 'routers', data, self.fmt) + return router_req.get_response(self.ext_api) + def test_router_create_nvp_error_returns_500(self, vlan_id=None): with mock.patch.object(nvplib, 'create_router_lport', side_effect=NvpApiClient.NvpApiException): with self._create_l3_ext_network(vlan_id) as net: with self.subnet(network=net) as s: - data = {'router': {'tenant_id': 'whatever'}} - data['router']['name'] = 'router1' - data['router']['external_gateway_info'] = { - 'network_id': s['subnet']['network_id']} - router_req = self.new_create_request( - 'routers', data, self.fmt) - res = router_req.get_response(self.ext_api) - self.assertEqual(500, res.status_int) + res = self._create_router_with_gw_info_for_test(s) + self.assertEqual( + webob.exc.HTTPInternalServerError.code, + res.status_int) def test_router_add_gateway_invalid_network_returns_404(self): # NOTE(salv-orlando): This unit test has been overriden @@ -647,6 +652,41 @@ class TestNiciraL3NatTestCase(NiciraL3NatTest, uuidutils.generate_uuid(), expected_code=webob.exc.HTTPNotFound.code) + def _verify_router_rollback(self): + # Check that nothing is left on DB + # TODO(salv-orlando): Verify whehter this is thread-safe + # w.r.t. sqllite and parallel testing + self._test_list_resources('router', []) + # Check that router is not in NVP + self.assertFalse(self.fc._fake_lrouter_dict) + + def test_router_create_with_gw_info_neutron_fail_does_rollback(self): + # Simulate get subnet error while building list of ips with prefix + with mock.patch.object(self._plugin_class, + '_build_ip_address_list', + side_effect=ntn_exc.SubnetNotFound( + subnet_id='xxx')): + with self._create_l3_ext_network() as net: + with self.subnet(network=net) as s: + res = self._create_router_with_gw_info_for_test(s) + self.assertEqual( + webob.exc.HTTPNotFound.code, + res.status_int) + self._verify_router_rollback() + + def test_router_create_with_gw_info_nvp_fail_does_rollback(self): + # Simulate error while fetching nvp router gw port + with mock.patch.object(self._plugin_class, + '_find_router_gw_port', + side_effect=NvpApiClient.NvpApiException): + with self._create_l3_ext_network() as net: + with self.subnet(network=net) as s: + res = self._create_router_with_gw_info_for_test(s) + self.assertEqual( + webob.exc.HTTPInternalServerError.code, + res.status_int) + self._verify_router_rollback() + def _test_router_update_gateway_on_l3_ext_net(self, vlan_id=None, validate_ext_gw=True): with self.router() as r: -- 2.45.2