]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Use a conntrack zone per port in OVS
authorKevin Benton <blak111@gmail.com>
Fri, 31 Jul 2015 01:07:03 +0000 (18:07 -0700)
committerKevin Benton <blak111@gmail.com>
Sat, 15 Aug 2015 13:36:22 +0000 (06:36 -0700)
Conntrack zones per network are not adequate because VMs
on the same host communicating with each other cross iptables
twice. If conntrack is sharing the same zone for each cross,
the first one can remove the connection from the table on a RST
and then the second one marks the RST as invalid.

This patch adjusts the logic to use a conntrack zone per port
instead of per network. In order to avoid interrupting upgrades
or restarts, the initial zone map is built from the existing
iptables rules so existing port->zone mappings are maintained.

Closes-Bug: #1478925
Change-Id: Ibe9e49653b2a280ea72cb95c2da64cd94c7739da

neutron/agent/linux/ip_conntrack.py
neutron/agent/linux/iptables_firewall.py
neutron/agent/linux/iptables_manager.py
neutron/agent/securitygroups_rpc.py
neutron/tests/unit/agent/linux/test_iptables_firewall.py
neutron/tests/unit/agent/test_securitygroups_rpc.py

index 97c94e0f62c44b07a90319b8faff68350f75c32b..3e988ee391813e3e59e15116737b8f8bd73e6130 100644 (file)
@@ -23,7 +23,8 @@ LOG = logging.getLogger(__name__)
 class IpConntrackManager(object):
     """Smart wrapper for ip conntrack."""
 
-    def __init__(self, execute=None, namespace=None):
+    def __init__(self, zone_lookup_func, execute=None, namespace=None):
+        self.get_device_zone = zone_lookup_func
         self.execute = execute or linux_utils.execute
         self.namespace = namespace
 
@@ -48,9 +49,7 @@ class IpConntrackManager(object):
         cmd = self._generate_conntrack_cmd_by_rule(rule, self.namespace)
         ethertype = rule.get('ethertype')
         for device_info in device_info_list:
-            zone_id = device_info.get('zone_id')
-            if not zone_id:
-                continue
+            zone_id = self.get_device_zone(device_info['device'])
             ips = device_info.get('fixed_ips', [])
             for ip in ips:
                 net = netaddr.IPNetwork(ip)
index 9684e331390bdd8ece67dbca9262a7befa113238..2daae7355cf370ccc5d5c990eeb6096c756b937e 100644 (file)
@@ -14,6 +14,8 @@
 #    under the License.
 
 import collections
+import re
+
 import netaddr
 from oslo_config import cfg
 from oslo_log import log as logging
@@ -41,7 +43,10 @@ DIRECTION_IP_PREFIX = {firewall.INGRESS_DIRECTION: 'source_ip_prefix',
                        firewall.EGRESS_DIRECTION: 'dest_ip_prefix'}
 IPSET_DIRECTION = {firewall.INGRESS_DIRECTION: 'src',
                    firewall.EGRESS_DIRECTION: 'dst'}
+# length of all device prefixes (e.g. qvo, tap, qvb)
+LINUX_DEV_PREFIX_LEN = 3
 LINUX_DEV_LEN = 14
+MAX_CONNTRACK_ZONES = 65535
 comment_rule = iptables_manager.comment_rule
 
 
@@ -57,7 +62,9 @@ class IptablesFirewallDriver(firewall.FirewallDriver):
         # TODO(majopela, shihanzhang): refactor out ipset to a separate
         # driver composed over this one
         self.ipset = ipset_manager.IpsetManager(namespace=namespace)
-        self.ipconntrack = ip_conntrack.IpConntrackManager(namespace=namespace)
+        self.ipconntrack = ip_conntrack.IpConntrackManager(
+            self.get_device_zone, namespace=namespace)
+        self._populate_initial_zone_map()
         # list of port which has security group
         self.filtered_ports = {}
         self.unfiltered_ports = {}
@@ -795,6 +802,68 @@ class IptablesFirewallDriver(firewall.FirewallDriver):
             self._pre_defer_filtered_ports = None
             self._pre_defer_unfiltered_ports = None
 
+    def _populate_initial_zone_map(self):
+        """Setup the map between devices and zones based on current rules."""
+        self._device_zone_map = {}
+        rules = self.iptables.get_rules_for_table('raw')
+        for rule in rules:
+            match = re.match(r'.* --physdev-in (?P<dev>[a-zA-Z0-9\-]+)'
+                             r'.* -j CT --zone (?P<zone>\d+).*', rule)
+            if match:
+                # strip off any prefix that the interface is using
+                short_port_id = match.group('dev')[LINUX_DEV_PREFIX_LEN:]
+                self._device_zone_map[short_port_id] = int(match.group('zone'))
+        LOG.debug("Populated conntrack zone map: %s", self._device_zone_map)
+
+    def get_device_zone(self, port_id):
+        # we have to key the device_zone_map based on the fragment of the port
+        # UUID that shows up in the interface name. This is because the initial
+        # map is populated strictly based on interface names that we don't know
+        # the full UUID of.
+        short_port_id = port_id[:(LINUX_DEV_LEN - LINUX_DEV_PREFIX_LEN)]
+        try:
+            return self._device_zone_map[short_port_id]
+        except KeyError:
+            self._free_zones_from_removed_ports()
+            return self._generate_device_zone(short_port_id)
+
+    def _free_zones_from_removed_ports(self):
+        """Clears any entries from the zone map of removed ports."""
+        existing_ports = [
+            port['device'][:(LINUX_DEV_LEN - LINUX_DEV_PREFIX_LEN)]
+            for port in (list(self.filtered_ports.values()) +
+                         list(self.unfiltered_ports.values()))
+        ]
+        removed = set(self._device_zone_map) - set(existing_ports)
+        for dev in removed:
+            self._device_zone_map.pop(dev, None)
+
+    def _generate_device_zone(self, short_port_id):
+        """Generates a unique conntrack zone for the passed in ID."""
+        zone = self._find_open_zone()
+        self._device_zone_map[short_port_id] = zone
+        LOG.debug("Assigned CT zone %(z)s to port %(dev)s.",
+                  {'z': zone, 'dev': short_port_id})
+        return self._device_zone_map[short_port_id]
+
+    def _find_open_zone(self):
+        # call set to dedup because old ports may be mapped to the same zone.
+        zones_in_use = sorted(set(self._device_zone_map.values()))
+        if not zones_in_use:
+            return 1
+        # attempt to increment onto the highest used zone first. if we hit the
+        # end, go back and look for any gaps left by removed devices.
+        last = zones_in_use[-1]
+        if last < MAX_CONNTRACK_ZONES:
+            return last + 1
+        for index, used in enumerate(zones_in_use):
+            if used - index != 1:
+                # gap found, let's use it!
+                return index + 1
+        # conntrack zones exhausted :( :(
+        raise RuntimeError("iptables conntrack zones exhausted. "
+                           "iptables rules cannot be applied.")
+
 
 class OVSHybridIptablesFirewallDriver(IptablesFirewallDriver):
     OVS_HYBRID_TAP_PREFIX = constants.TAP_DEVICE_PREFIX
@@ -815,20 +884,18 @@ class OVSHybridIptablesFirewallDriver(IptablesFirewallDriver):
         else:
             device = self._get_device_name(port)
         jump_rule = '-m physdev --physdev-in %s -j CT --zone %s' % (
-            device, port['zone_id'])
+            device, self.get_device_zone(port['device']))
         return jump_rule
 
     def _add_raw_chain_rules(self, port, direction):
-        if port['zone_id']:
-            jump_rule = self._get_jump_rule(port, direction)
-            self.iptables.ipv4['raw'].add_rule('PREROUTING', jump_rule)
-            self.iptables.ipv6['raw'].add_rule('PREROUTING', jump_rule)
+        jump_rule = self._get_jump_rule(port, direction)
+        self.iptables.ipv4['raw'].add_rule('PREROUTING', jump_rule)
+        self.iptables.ipv6['raw'].add_rule('PREROUTING', jump_rule)
 
     def _remove_raw_chain_rules(self, port, direction):
-        if port['zone_id']:
-            jump_rule = self._get_jump_rule(port, direction)
-            self.iptables.ipv4['raw'].remove_rule('PREROUTING', jump_rule)
-            self.iptables.ipv6['raw'].remove_rule('PREROUTING', jump_rule)
+        jump_rule = self._get_jump_rule(port, direction)
+        self.iptables.ipv4['raw'].remove_rule('PREROUTING', jump_rule)
+        self.iptables.ipv6['raw'].remove_rule('PREROUTING', jump_rule)
 
     def _add_chain(self, port, direction):
         super(OVSHybridIptablesFirewallDriver, self)._add_chain(port,
index a65e769c0b6c4dafd69e3c65432cf3a64277dc28..d72cdd587274e4f2e4dcee7bcf04f29bdc49ece4 100644 (file)
@@ -426,6 +426,13 @@ class IptablesManager(object):
         with lockutils.lock(lock_name, utils.SYNCHRONIZED_PREFIX, True):
             return self._apply_synchronized()
 
+    def get_rules_for_table(self, table):
+        """Runs iptables-save on a table and returns the results."""
+        args = ['iptables-save', '-t', table]
+        if self.namespace:
+            args = ['ip', 'netns', 'exec', self.namespace] + args
+        return self.execute(args, run_as_root=True).split('\n')
+
     def _apply_synchronized(self):
         """Apply the current in-memory set of iptables rules.
 
index ec1ad6b2c9f4453178d2fcb90e4932b9c5c1513b..a0ac9ed3c6fbd634e3f321cc02d12a09d233bdd7 100644 (file)
@@ -110,23 +110,6 @@ class SecurityGroupAgentRpc(object):
         self.global_refresh_firewall = False
         self._use_enhanced_rpc = None
 
-    def set_local_zone(self, device):
-        """Set local zone id for device
-
-        In order to separate conntrack in different networks, a local zone
-        id is needed to generate related iptables rules. This routine sets
-        zone id to device according to the network it belongs to. For OVS
-        agent, vlan id of each network can be used as zone id.
-
-        :param device: dictionary of device information, get network id by
-        device['network_id'], and set zone id by device['zone_id']
-        """
-        net_id = device['network_id']
-        zone_id = None
-        if self.local_vlan_map and net_id in self.local_vlan_map:
-            zone_id = self.local_vlan_map[net_id].vlan
-        device['zone_id'] = zone_id
-
     @property
     def use_enhanced_rpc(self):
         if self._use_enhanced_rpc is None:
@@ -176,7 +159,6 @@ class SecurityGroupAgentRpc(object):
 
         with self.firewall.defer_apply():
             for device in devices.values():
-                self.set_local_zone(device)
                 self.firewall.prepare_port_filter(device)
             if self.use_enhanced_rpc:
                 LOG.debug("Update security group information for ports %s",
@@ -267,7 +249,6 @@ class SecurityGroupAgentRpc(object):
         with self.firewall.defer_apply():
             for device in devices.values():
                 LOG.debug("Update port filter for %s", device['device'])
-                self.set_local_zone(device)
                 self.firewall.update_port_filter(device)
             if self.use_enhanced_rpc:
                 LOG.debug("Update security group information for ports %s",
index 8c9b9e2a4bd3a38e1ba7992d34f0210112af9db3..967a1461b16df4aabbd1daae634d1a2e13f7bc3b 100644 (file)
@@ -18,6 +18,7 @@ import copy
 import mock
 from oslo_config import cfg
 import six
+import testtools
 
 from neutron.agent.common import config as a_cfg
 from neutron.agent.linux import ipset_manager
@@ -41,6 +42,27 @@ OTHER_SGID = 'other_sgid'
 _IPv6 = constants.IPv6
 _IPv4 = constants.IPv4
 
+RAW_TABLE_OUTPUT = """
+# Generated by iptables-save v1.4.21 on Fri Jul 31 16:13:28 2015
+*raw
+:PREROUTING ACCEPT [11561:3470468]
+:OUTPUT ACCEPT [11504:4064044]
+:neutron-openvswi-OUTPUT - [0:0]
+:neutron-openvswi-PREROUTING - [0:0]
+-A PREROUTING -j neutron-openvswi-PREROUTING
+ -A OUTPUT -j neutron-openvswi-OUTPUT
+-A neutron-openvswi-PREROUTING -m physdev --physdev-in qvbe804433b-61 -j CT --zone 1
+-A neutron-openvswi-PREROUTING -m physdev --physdev-in tape804433b-61 -j CT --zone 1
+-A neutron-openvswi-PREROUTING -m physdev --physdev-in qvb95c24827-02 -j CT --zone 2
+-A neutron-openvswi-PREROUTING -m physdev --physdev-in tap95c24827-02 -j CT --zone 2
+-A neutron-openvswi-PREROUTING -m physdev --physdev-in qvb61634509-31 -j CT --zone 2
+-A neutron-openvswi-PREROUTING -m physdev --physdev-in tap61634509-31 -j CT --zone 2
+-A neutron-openvswi-PREROUTING -m physdev --physdev-in qvb8f46cf18-12 -j CT --zone 9
+-A neutron-openvswi-PREROUTING -m physdev --physdev-in tap8f46cf18-12 -j CT --zone 9
+COMMIT
+# Completed on Fri Jul 31 16:13:28 2015
+"""  # noqa
+
 
 class BaseIptablesFirewallTestCase(base.BaseTestCase):
     def setUp(self):
@@ -65,6 +87,8 @@ class BaseIptablesFirewallTestCase(base.BaseTestCase):
                                    }
         iptables_cls.return_value = self.iptables_inst
 
+        self.iptables_inst.get_rules_for_table.return_value = (
+            RAW_TABLE_OUTPUT.splitlines())
         self.firewall = iptables_firewall.IptablesFirewallDriver()
         self.firewall.iptables = self.iptables_inst
 
@@ -1030,7 +1054,6 @@ class IptablesFirewallTestCase(BaseIptablesFirewallTestCase):
     def _test_remove_conntrack_entries(self, ethertype, protocol,
                                        direction):
         port = self._fake_port()
-        port['zone_id'] = 1
         port['security_groups'] = 'fake_sg_id'
         self.firewall.filtered_ports[port['device']] = port
         self.firewall.updated_rule_sg_ids = set(['fake_sg_id'])
@@ -1076,7 +1099,6 @@ class IptablesFirewallTestCase(BaseIptablesFirewallTestCase):
 
     def test_remove_conntrack_entries_for_port_sec_group_change(self):
         port = self._fake_port()
-        port['zone_id'] = 1
         port['security_groups'] = ['fake_sg_id']
         self.firewall.filtered_ports[port['device']] = port
         self.firewall.updated_sg_members = set(['tapfake_dev'])
@@ -1802,3 +1824,46 @@ class IptablesFirewallEnhancedIpsetTestCase(BaseIptablesFirewallTestCase):
         self.firewall._update_ipset_members(sg_info)
         calls = [mock.call.set_members(FAKE_SGID, constants.IPv4, [])]
         self.firewall.ipset.assert_has_calls(calls)
+
+
+class OVSHybridIptablesFirewallTestCase(BaseIptablesFirewallTestCase):
+
+    def setUp(self):
+        super(OVSHybridIptablesFirewallTestCase, self).setUp()
+        self.firewall = iptables_firewall.OVSHybridIptablesFirewallDriver()
+
+    def test__populate_initial_zone_map(self):
+        expected = {'61634509-31': 2, '8f46cf18-12': 9,
+                    '95c24827-02': 2, 'e804433b-61': 1}
+        self.assertEqual(expected, self.firewall._device_zone_map)
+
+    def test__generate_device_zone(self):
+        # inital data has 1, 2, and 9 in use.
+        # we fill from top up first.
+        self.assertEqual(10, self.firewall._generate_device_zone('test'))
+
+        # once it's maxed out, it scans for gaps
+        self.firewall._device_zone_map['someport'] = (
+            iptables_firewall.MAX_CONNTRACK_ZONES)
+        for i in range(3, 9):
+            self.assertEqual(i, self.firewall._generate_device_zone(i))
+
+        # 9 and 10 are taken so next should be 11
+        self.assertEqual(11, self.firewall._generate_device_zone('p11'))
+
+        # take out zone 1 and make sure it's selected
+        self.firewall._device_zone_map.pop('e804433b-61')
+        self.assertEqual(1, self.firewall._generate_device_zone('p1'))
+
+        # fill it up and then make sure an extra throws an error
+        for i in range(1, 65536):
+            self.firewall._device_zone_map['dev-%s' % i] = i
+        with testtools.ExpectedException(RuntimeError):
+            self.firewall._find_open_zone()
+
+    def test_get_device_zone(self):
+        # calling get_device_zone should clear out all of the other entries
+        # since they aren't in the filtered ports list
+        self.assertEqual(1, self.firewall.get_device_zone('12345678901234567'))
+        # should have been truncated to 11 chars
+        self.assertEqual({'12345678901': 1}, self.firewall._device_zone_map)
index 1cc7afd594d9445f418ad7dd32e3a1ca0b1f7756..3b34ab0e07c1614f7d6b40821c95616545fcf9ef 100644 (file)
@@ -1706,8 +1706,8 @@ IPTABLES_RAW_DEVICE_2 = """# Generated by iptables_manager
 -j CT --zone 1
 [0:0] -A %(bn)s-PREROUTING -m physdev --physdev-in tap_%(port1)s -j CT --zone 1
 [0:0] -A %(bn)s-PREROUTING -m physdev --physdev-in qvbtap_%(port2)s \
--j CT --zone 1
-[0:0] -A %(bn)s-PREROUTING -m physdev --physdev-in tap_%(port2)s -j CT --zone 1
+-j CT --zone 2
+[0:0] -A %(bn)s-PREROUTING -m physdev --physdev-in tap_%(port2)s -j CT --zone 2
 COMMIT
 # Completed by iptables_manager
 """ % IPTABLES_ARG
@@ -2609,9 +2609,9 @@ class TestSecurityGroupAgentWithIptables(base.BaseTestCase):
         value = value.replace('physdev-INGRESS', self.PHYSDEV_INGRESS)
         value = value.replace('physdev-EGRESS', self.PHYSDEV_EGRESS)
         value = value.replace('\n', '\\n')
-        value = value.replace('[', '\[')
-        value = value.replace(']', '\]')
-        value = value.replace('*', '\*')
+        value = value.replace('[', r'\[')
+        value = value.replace(']', r'\]')
+        value = value.replace('*', r'\*')
         return value
 
     def _register_mock_call(self, *args, **kwargs):