]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
NVP plugin: Avoid timeouts if creating routers in parallel
authorSalvatore Orlando <salv.orlando@gmail.com>
Mon, 9 Dec 2013 15:53:24 +0000 (07:53 -0800)
committerSalvatore Orlando <salv.orlando@gmail.com>
Fri, 13 Dec 2013 11:26:01 +0000 (03:26 -0800)
There is a well-known issue of eventlet causing deadlocks with
mysql transactions; such condition might occur when
creating NVP routers in parallel.

To avoid this, this patch moves the long-running method
_update_router_gw_info outside of the mysql transaction, adding
the appropriate failure management code, and adds a lock to the
method _nvp_create_ext_gw_port, to ensure serial access to it.

Unit tests for verifying correct behaviour in case of failures
are added as well.

Closes Bug: #1251847

Change-Id: I2864e2f5bfd3982875ef9df97186f64a9fcaa0e0

neutron/plugins/nicira/NeutronPlugin.py
neutron/tests/unit/nicira/test_nicira_plugin.py

index 3742f97093fc2a4dc4998300094b1d66d8f689ec..a624c910cb233d673e379eba6f1d6bd02e71338d 100644 (file)
@@ -57,6 +57,7 @@ from neutron.extensions import portsecurity as psec
 from neutron.extensions import providernet as pnet
 from neutron.extensions import securitygroup as ext_sg
 from neutron.openstack.common import excutils
+from neutron.openstack.common import lockutils
 from neutron.plugins.common import constants as plugin_const
 from neutron.plugins.nicira.common import config  # noqa
 from neutron.plugins.nicira.common import exceptions as nvp_exc
@@ -600,6 +601,7 @@ class NvpPluginV2(addr_pair_db.AllowedAddressPairsMixin,
                          % router_id))
         return lr_port
 
+    @lockutils.synchronized('nicira', 'neutron-')
     def _nvp_create_ext_gw_port(self, context, port_data):
         """Driver for creating an external gateway port on NVP platform."""
         # TODO(salvatore-orlando): Handle NVP resource
@@ -1457,7 +1459,8 @@ class NvpPluginV2(addr_pair_db.AllowedAddressPairsMixin,
         # did not specify any value for the 'distributed' attribute
         # Platforms older than 3.x do not support the attribute
         r['distributed'] = lrouter.get('distributed', False)
-
+        # TODO(salv-orlando): Deal with backend object removal in case
+        # of db failures
         with context.session.begin(subtransactions=True):
             # Transaction nesting is needed to avoid foreign key violations
             # when processing the distributed router binding
@@ -1469,8 +1472,26 @@ class NvpPluginV2(addr_pair_db.AllowedAddressPairsMixin,
                                          status=lrouter['status'])
                 self._process_nsx_router_create(context, router_db, r)
                 context.session.add(router_db)
-            if has_gw_info:
+        if has_gw_info:
+            # NOTE(salv-orlando): This operation has been moved out of the
+            # database transaction since it performs several NVP queries,
+            # ithis ncreasing the risk of deadlocks between eventlet and
+            # sqlalchemy operations.
+            # Set external gateway and remove router in case of failure
+            try:
                 self._update_router_gw_info(context, router_db['id'], gw_info)
+            except (q_exc.NeutronException, NvpApiClient.NvpApiException):
+                with excutils.save_and_reraise_exception():
+                    # As setting gateway failed, the router must be deleted
+                    # in order to ensure atomicity
+                    router_id = router_db['id']
+                    LOG.warn(_("Failed to set gateway info for router being "
+                               "created:%s - removing router"), router_id)
+                    self.delete_router(context, router_id)
+                    LOG.info(_("Create router failed while setting external "
+                               "gateway. Router:%s has been removed from "
+                               "DB and backend"),
+                             router_id)
         router = self._make_router_dict(router_db)
         return router
 
index a239235becbec644f0b8af1f2552abd15ddc5329..0406122d395ca510c3708f065f0436dddfb7f65b 100644 (file)
@@ -622,20 +622,25 @@ class TestNiciraL3NatTestCase(NiciraL3NatTest,
             nvplib, 'create_lrouter', new=obsolete_response):
             self._test_router_create_with_distributed(None, False, '2.2')
 
+    def _create_router_with_gw_info_for_test(self, subnet):
+        data = {'router': {'tenant_id': 'whatever',
+                           'name': 'router1',
+                           'external_gateway_info':
+                           {'network_id': subnet['subnet']['network_id']}}}
+        router_req = self.new_create_request(
+            'routers', data, self.fmt)
+        return router_req.get_response(self.ext_api)
+
     def test_router_create_nvp_error_returns_500(self, vlan_id=None):
         with mock.patch.object(nvplib,
                                'create_router_lport',
                                side_effect=NvpApiClient.NvpApiException):
             with self._create_l3_ext_network(vlan_id) as net:
                 with self.subnet(network=net) as s:
-                    data = {'router': {'tenant_id': 'whatever'}}
-                    data['router']['name'] = 'router1'
-                    data['router']['external_gateway_info'] = {
-                        'network_id': s['subnet']['network_id']}
-                    router_req = self.new_create_request(
-                        'routers', data, self.fmt)
-                    res = router_req.get_response(self.ext_api)
-                    self.assertEqual(500, res.status_int)
+                    res = self._create_router_with_gw_info_for_test(s)
+                    self.assertEqual(
+                        webob.exc.HTTPInternalServerError.code,
+                        res.status_int)
 
     def test_router_add_gateway_invalid_network_returns_404(self):
         # NOTE(salv-orlando): This unit test has been overriden
@@ -647,6 +652,41 @@ class TestNiciraL3NatTestCase(NiciraL3NatTest,
                 uuidutils.generate_uuid(),
                 expected_code=webob.exc.HTTPNotFound.code)
 
+    def _verify_router_rollback(self):
+        # Check that nothing is left on DB
+        # TODO(salv-orlando): Verify whehter this is thread-safe
+        # w.r.t. sqllite and parallel testing
+        self._test_list_resources('router', [])
+        # Check that router is not in NVP
+        self.assertFalse(self.fc._fake_lrouter_dict)
+
+    def test_router_create_with_gw_info_neutron_fail_does_rollback(self):
+        # Simulate get subnet error while building list of ips with prefix
+        with mock.patch.object(self._plugin_class,
+                               '_build_ip_address_list',
+                               side_effect=ntn_exc.SubnetNotFound(
+                                   subnet_id='xxx')):
+            with self._create_l3_ext_network() as net:
+                with self.subnet(network=net) as s:
+                    res = self._create_router_with_gw_info_for_test(s)
+                    self.assertEqual(
+                        webob.exc.HTTPNotFound.code,
+                        res.status_int)
+                    self._verify_router_rollback()
+
+    def test_router_create_with_gw_info_nvp_fail_does_rollback(self):
+        # Simulate error while fetching nvp router gw port
+        with mock.patch.object(self._plugin_class,
+                               '_find_router_gw_port',
+                               side_effect=NvpApiClient.NvpApiException):
+            with self._create_l3_ext_network() as net:
+                with self.subnet(network=net) as s:
+                    res = self._create_router_with_gw_info_for_test(s)
+                    self.assertEqual(
+                        webob.exc.HTTPInternalServerError.code,
+                        res.status_int)
+                    self._verify_router_rollback()
+
     def _test_router_update_gateway_on_l3_ext_net(self, vlan_id=None,
                                                   validate_ext_gw=True):
         with self.router() as r: