]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Fix race condition on processing DVR floating IPs
authorrajeev <rajeev.grover@hp.com>
Mon, 13 Oct 2014 20:25:36 +0000 (16:25 -0400)
committerrajeev <rajeev.grover@hp.com>
Mon, 20 Oct 2014 23:06:45 +0000 (19:06 -0400)
Fip namespace and agent gateway port can be shared by multiple dvr routers.
This change uses a set as the control variable for these shared resources
and ensures that Test and Set operation on the control variable are
performed atomically so that race conditions do not occur among
multiple threads processing floating IPs.
Limitation: The scope of this change is limited to addressing the race
condition described in the bug report. It may not address other issues
such as pre-existing issue with handling of DVR floatingips on agent
restart.

closes-bug: #1381238

Change-Id: I6dc2b7bad6e8ddbaa86c1f7a1e2028aeacc3afef

neutron/agent/l3_agent.py
neutron/tests/unit/test_l3_agent.py

index 112bf77ad27184c876b865f2aec329e2ce0052fe..2bb784277f834e4d2697f07bb6eec69ba117f634 100644 (file)
@@ -561,7 +561,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
 
         # dvr data
         self.agent_gateway_port = None
-        self.agent_fip_count = 0
+        self.fip_ns_subscribers = set()
         self.local_subnets = LinkLocalAllocator(
             os.path.join(self.conf.state_path, 'fip-linklocal-networks'),
             FIP_LL_SUBNET)
@@ -573,6 +573,15 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
         self.target_ex_net_id = None
         self.use_ipv6 = ipv6_utils.is_enabled()
 
+    def _fip_ns_subscribe(self, router_id):
+        is_first = (len(self.fip_ns_subscribers) == 0)
+        self.fip_ns_subscribers.add(router_id)
+        return is_first
+
+    def _fip_ns_unsubscribe(self, router_id):
+        self.fip_ns_subscribers.discard(router_id)
+        return len(self.fip_ns_subscribers) == 0
+
     def _check_config_params(self):
         """Check items in configuration files.
 
@@ -1096,9 +1105,11 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
         if ri.router['distributed']:
             # filter out only FIPs for this host/agent
             floating_ips = [i for i in floating_ips if i['host'] == self.host]
-            if floating_ips and self.agent_gateway_port is None:
-                self._create_agent_gateway_port(ri, floating_ips[0]
-                                                ['floating_network_id'])
+            if floating_ips:
+                is_first = self._fip_ns_subscribe(ri.router_id)
+                if is_first:
+                    self._create_agent_gateway_port(ri, floating_ips[0]
+                                                    ['floating_network_id'])
 
             if self.agent_gateway_port:
                 if floating_ips and ri.dist_fip_count == 0:
@@ -1662,7 +1673,6 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
                                          interface_name, floating_ip,
                                          distributed=True)
         # update internal structures
-        self.agent_fip_count = self.agent_fip_count + 1
         ri.dist_fip_count = ri.dist_fip_count + 1
 
     def floating_ip_removed_dist(self, ri, fip_cidr):
@@ -1696,10 +1706,10 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
             self.local_subnets.release(ri.router_id)
             ri.rtr_fip_subnet = None
             ns_ip.del_veth(fip_2_rtr_name)
-        # clean up fip-namespace if this is the last FIP
-        self.agent_fip_count = self.agent_fip_count - 1
-        if self.agent_fip_count == 0:
-            self._destroy_fip_namespace(fip_ns_name)
+            is_last = self._fip_ns_unsubscribe(ri.router_id)
+            # clean up fip-namespace if this is the last FIP
+            if is_last:
+                self._destroy_fip_namespace(fip_ns_name)
 
     def floating_forward_rules(self, floating_ip, fixed_ip):
         return [('PREROUTING', '-d %s -j DNAT --to %s' %
index a8b74ea76f79247050ab559afcf107e94805630d..ed7f7b611e2fe353e4c519c9cc8b4fe7f8ae7e06 100644 (file)
@@ -2178,8 +2178,9 @@ vrrp_instance VR_1 {
                                                         16, FIP_PRI)
         # TODO(mrsmith): add more asserts
 
+    @mock.patch.object(l3_agent.L3NATAgent, '_fip_ns_unsubscribe')
     @mock.patch.object(l3_agent.LinkLocalAllocator, '_write')
-    def test_floating_ip_removed_dist(self, write):
+    def test_floating_ip_removed_dist(self, write, unsubscribe):
         agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
         router = prepare_router_data()
         agent_gw_port = {'fixed_ips': [{'ip_address': '20.0.0.30',
@@ -2194,6 +2195,7 @@ vrrp_instance VR_1 {
         ri = l3_agent.RouterInfo(router['id'], self.conf.root_helper,
                                  self.conf.use_namespaces, router=router)
         ri.dist_fip_count = 2
+        agent.fip_ns_subscribers.add(ri.router_id)
         ri.floating_ips_dict['11.22.33.44'] = FIP_PRI
         ri.fip_2_rtr = '11.22.33.42'
         ri.rtr_2_fip = '11.22.33.40'
@@ -2204,9 +2206,10 @@ vrrp_instance VR_1 {
         self.mock_rule.delete_rule_priority.assert_called_with(FIP_PRI)
         self.mock_ip_dev.route.delete_route.assert_called_with(fip_cidr,
                                                                str(s.ip))
+        self.assertFalse(unsubscribe.called, '_fip_ns_unsubscribe called!')
+
         with mock.patch.object(agent, '_destroy_fip_namespace') as f:
             ri.dist_fip_count = 1
-            agent.agent_fip_count = 1
             fip_ns_name = agent.get_fip_ns_name(
                 str(agent._fetch_external_net_id()))
             ri.rtr_fip_subnet = agent.local_subnets.allocate(ri.router_id)
@@ -2217,6 +2220,7 @@ vrrp_instance VR_1 {
             self.mock_ip_dev.route.delete_gateway.assert_called_once_with(
                 str(fip_to_rtr.ip), table=16)
             f.assert_called_once_with(fip_ns_name)
+        unsubscribe.assert_called_once_with(ri.router_id)
 
     def test_get_service_plugin_list(self):
         service_plugins = [p_const.L3_ROUTER_NAT]
@@ -2252,6 +2256,40 @@ vrrp_instance VR_1 {
         self.assertRaises(messaging.MessagingTimeout, l3_agent.L3NATAgent,
                           HOSTNAME, self.conf)
 
+    def test__fip_ns_subscribe_is_first_true(self):
+        agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
+        router_id = _uuid()
+        is_first = agent._fip_ns_subscribe(router_id)
+        self.assertTrue(is_first)
+        self.assertEqual(len(agent.fip_ns_subscribers), 1)
+
+    def test__fip_ns_subscribe_is_first_false(self):
+        agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
+        router_id = _uuid()
+        router2_id = _uuid()
+        agent._fip_ns_subscribe(router_id)
+        is_first = agent._fip_ns_subscribe(router2_id)
+        self.assertFalse(is_first)
+        self.assertEqual(len(agent.fip_ns_subscribers), 2)
+
+    def test__fip_ns_unsubscribe_is_last_true(self):
+        agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
+        router_id = _uuid()
+        agent.fip_ns_subscribers.add(router_id)
+        is_last = agent._fip_ns_unsubscribe(router_id)
+        self.assertTrue(is_last)
+        self.assertEqual(len(agent.fip_ns_subscribers), 0)
+
+    def test__fip_ns_unsubscribe_is_last_false(self):
+        agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
+        router_id = _uuid()
+        router2_id = _uuid()
+        agent.fip_ns_subscribers.add(router_id)
+        agent.fip_ns_subscribers.add(router2_id)
+        is_last = agent._fip_ns_unsubscribe(router_id)
+        self.assertFalse(is_last)
+        self.assertEqual(len(agent.fip_ns_subscribers), 1)
+
 
 class TestL3AgentEventHandler(base.BaseTestCase):