]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Race condition of L3-agent to add/remove routers
authorLi Ma <skywalker.nick@gmail.com>
Fri, 21 Feb 2014 08:57:25 +0000 (00:57 -0800)
committerLi Ma <skywalker.nick@gmail.com>
Tue, 29 Jul 2014 08:52:18 +0000 (01:52 -0700)
This race condition happens when repeatedly calling
l3-agent-router-add and l3-agent-router-remove
by different neutron-servers at the same time.

The primary key constraint is added for the pair of
(router_id and l3_agent_id).

During migration, verification is done if the current
records violate the PK constraint defined in this bug
fix, and sanitize the data before schema modification.

Due to different dialects of database engines, different
sql statements are executed correspondingly to do
the verification.

Change-Id: Ia541e023b757b2e77c4eec9bb1670632c7a271fa
Closes-Bug: #1230323

neutron/db/l3_agentschedulers_db.py
neutron/db/migration/alembic_migrations/versions/31d7f831a591_add_constraint_for_routerid.py [new file with mode: 0644]
neutron/db/migration/alembic_migrations/versions/HEAD
neutron/scheduler/l3_agent_scheduler.py
neutron/tests/unit/test_l3_schedulers.py

index e9a63d28d42d8fec5c64b582f60acd81c9cab717..4c64b7b86745c8c8ee21dde01dc510167cbc49cd 100644 (file)
@@ -24,7 +24,6 @@ from neutron.common import constants
 from neutron.db import agents_db
 from neutron.db import agentschedulers_db
 from neutron.db import model_base
-from neutron.db import models_v2
 from neutron.extensions import l3agentscheduler
 
 
@@ -40,15 +39,16 @@ L3_AGENTS_SCHEDULER_OPTS = [
 cfg.CONF.register_opts(L3_AGENTS_SCHEDULER_OPTS)
 
 
-class RouterL3AgentBinding(model_base.BASEV2, models_v2.HasId):
+class RouterL3AgentBinding(model_base.BASEV2):
     """Represents binding between neutron routers and L3 agents."""
 
     router_id = sa.Column(sa.String(36),
-                          sa.ForeignKey("routers.id", ondelete='CASCADE'))
+                          sa.ForeignKey("routers.id", ondelete='CASCADE'),
+                          primary_key=True)
     l3_agent = orm.relation(agents_db.Agent)
     l3_agent_id = sa.Column(sa.String(36),
-                            sa.ForeignKey("agents.id",
-                                          ondelete='CASCADE'))
+                            sa.ForeignKey("agents.id", ondelete='CASCADE'),
+                            primary_key=True)
 
 
 class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
diff --git a/neutron/db/migration/alembic_migrations/versions/31d7f831a591_add_constraint_for_routerid.py b/neutron/db/migration/alembic_migrations/versions/31d7f831a591_add_constraint_for_routerid.py
new file mode 100644 (file)
index 0000000..8d79d38
--- /dev/null
@@ -0,0 +1,127 @@
+# Copyright 2014 OpenStack Foundation
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+"""add constraint for routerid
+
+Revision ID: 31d7f831a591
+Revises: 37f322991f59
+Create Date: 2014-02-26 06:47:16.494393
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '31d7f831a591'
+down_revision = '37f322991f59'
+
+from alembic import op
+import sqlalchemy as sa
+
+TABLE_NAME = 'routerl3agentbindings'
+PK_NAME = 'pk_routerl3agentbindings'
+
+fk_names = {'postgresql':
+            {'router_id':
+                'routerl3agentbindings_router_id_fkey',
+             'l3_agent_id':
+                'routerl3agentbindings_l3_agent_id_fkey'},
+            'mysql':
+            {'router_id':
+                'routerl3agentbindings_ibfk_2',
+             'l3_agent_id':
+                'routerl3agentbindings_ibfk_1'}}
+
+
+def upgrade(active_plugins=None, options=None):
+    # In order to sanitize the data during migration,
+    # the current records in the table need to be verified
+    # and all the duplicate records which violate the PK
+    # constraint need to be removed.
+    context = op.get_context()
+    if context.bind.dialect.name == 'postgresql':
+        op.execute('DELETE FROM %(table)s WHERE id in ('
+                   'SELECT %(table)s.id FROM %(table)s LEFT OUTER JOIN '
+                   '(SELECT MIN(id) as id, router_id, l3_agent_id '
+                   ' FROM %(table)s GROUP BY router_id, l3_agent_id) AS temp '
+                   'ON %(table)s.id = temp.id WHERE temp.id is NULL);'
+                   % {'table': TABLE_NAME})
+    else:
+        op.execute('DELETE %(table)s FROM %(table)s LEFT OUTER JOIN '
+                   '(SELECT MIN(id) as id, router_id, l3_agent_id '
+                   ' FROM %(table)s GROUP BY router_id, l3_agent_id) AS temp '
+                   'ON %(table)s.id = temp.id WHERE temp.id is NULL;'
+                   % {'table': TABLE_NAME})
+
+    op.drop_column(TABLE_NAME, 'id')
+
+    op.create_primary_key(
+        name=PK_NAME,
+        table_name=TABLE_NAME,
+        cols=['router_id', 'l3_agent_id']
+    )
+
+
+def downgrade(active_plugins=None, options=None):
+
+    context = op.get_context()
+    dialect = context.bind.dialect.name
+
+    # Drop the existed foreign key constraints
+    # In order to perform primary key changes
+    op.drop_constraint(
+        name=fk_names[dialect]['l3_agent_id'],
+        table_name=TABLE_NAME,
+        type_='foreignkey'
+    )
+    op.drop_constraint(
+        name=fk_names[dialect]['router_id'],
+        table_name=TABLE_NAME,
+        type_='foreignkey'
+    )
+
+    op.drop_constraint(
+        name=PK_NAME,
+        table_name=TABLE_NAME,
+        type_='primary'
+    )
+
+    op.add_column(
+        TABLE_NAME,
+        sa.Column('id', sa.String(32))
+    )
+
+    # Restore the foreign key constraints
+    op.create_foreign_key(
+        name=fk_names[dialect]['router_id'],
+        source=TABLE_NAME,
+        referent='routers',
+        local_cols=['router_id'],
+        remote_cols=['id'],
+        ondelete='CASCADE'
+    )
+
+    op.create_foreign_key(
+        name=fk_names[dialect]['l3_agent_id'],
+        source=TABLE_NAME,
+        referent='agents',
+        local_cols=['l3_agent_id'],
+        remote_cols=['id'],
+        ondelete='CASCADE'
+    )
+
+    op.create_primary_key(
+        name=PK_NAME,
+        table_name=TABLE_NAME,
+        cols=['id']
+    )
index 4bd668eab4994fedb5c9ae8030c251be9d197d81..2478aa43332d6eec4acb012daf56a45690958b2a 100644 (file)
@@ -1 +1 @@
-37f322991f59
\ No newline at end of file
+31d7f831a591
index 8c0beca08bb7a085db4b21cbd747634a608f5496..d6b4dc7d4ddb93e5f5c6fd841188e30a77f710cf 100644 (file)
@@ -16,6 +16,7 @@
 import abc
 import random
 
+from oslo.db import exception as db_exc
 import six
 from sqlalchemy.orm import exc
 from sqlalchemy import sql
@@ -145,15 +146,22 @@ class L3Scheduler(object):
 
     def bind_router(self, context, router_id, chosen_agent):
         """Bind the router to the l3 agent which has been chosen."""
-        with context.session.begin(subtransactions=True):
-            binding = l3_agentschedulers_db.RouterL3AgentBinding()
-            binding.l3_agent = chosen_agent
-            binding.router_id = router_id
-            context.session.add(binding)
-            LOG.debug(_('Router %(router_id)s is scheduled to '
-                        'L3 agent %(agent_id)s'),
-                      {'router_id': router_id,
-                       'agent_id': chosen_agent.id})
+        try:
+            with context.session.begin(subtransactions=True):
+                binding = l3_agentschedulers_db.RouterL3AgentBinding()
+                binding.l3_agent = chosen_agent
+                binding.router_id = router_id
+                context.session.add(binding)
+        except db_exc.DBDuplicateEntry:
+            LOG.debug('Router %(router_id)s has already been scheduled '
+                      'to L3 agent %(agent_id)s.',
+                      {'agent_id': chosen_agent.id,
+                       'router_id': router_id})
+            return
+
+        LOG.debug('Router %(router_id)s is scheduled to L3 agent '
+                  '%(agent_id)s', {'router_id': router_id,
+                                   'agent_id': chosen_agent.id})
 
 
 class ChanceScheduler(L3Scheduler):
index 99bf8d83ead40b445337633a64f7346321fab960..5e619db8d8632ec5cc3b6605b4b1a4c4e01fea40 100644 (file)
@@ -31,6 +31,7 @@ from neutron.db import l3_agentschedulers_db
 from neutron.extensions import l3 as ext_l3
 from neutron import manager
 from neutron.openstack.common import timeutils
+from neutron.scheduler import l3_agent_scheduler
 from neutron.tests.unit import test_db_plugin
 from neutron.tests.unit import test_l3_plugin
 
@@ -93,6 +94,7 @@ class L3SchedulerTestCase(l3_agentschedulers_db.L3AgentSchedulerDbMixin,
         agent_db = self.plugin.get_agents_db(self.adminContext,
                                              filters={'host': [HOST]})
         self.agent_id1 = agent_db[0].id
+        self.agent1 = agent_db[0]
 
         callback.report_state(self.adminContext,
                               agent_state={'agent_state': SECOND_L3_AGENT},
@@ -124,6 +126,39 @@ class L3SchedulerTestCase(l3_agentschedulers_db.L3AgentSchedulerDbMixin,
             router['router']['id'], subnet['subnet']['network_id'])
         self._delete('routers', router['router']['id'])
 
+    def _test_schedule_bind_router(self, agent, router):
+        ctx = self.adminContext
+        session = ctx.session
+        db = l3_agentschedulers_db.RouterL3AgentBinding
+        scheduler = l3_agent_scheduler.ChanceScheduler()
+
+        rid = router['router']['id']
+        scheduler.bind_router(ctx, rid, agent)
+        results = (session.query(db).filter_by(router_id=rid).all())
+        self.assertTrue(len(results) > 0)
+        self.assertIn(agent.id, [bind.l3_agent_id for bind in results])
+
+    def test_bind_new_router(self):
+        router = self._make_router(self.fmt,
+                                   tenant_id=str(uuid.uuid4()),
+                                   name='r1')
+        with mock.patch.object(l3_agent_scheduler.LOG, 'debug') as flog:
+            self._test_schedule_bind_router(self.agent1, router)
+            self.assertEqual(1, flog.call_count)
+            args, kwargs = flog.call_args
+            self.assertIn('is scheduled', args[0])
+
+    def test_bind_existing_router(self):
+        router = self._make_router(self.fmt,
+                                   tenant_id=str(uuid.uuid4()),
+                                   name='r2')
+        self._test_schedule_bind_router(self.agent1, router)
+        with mock.patch.object(l3_agent_scheduler.LOG, 'debug') as flog:
+            self._test_schedule_bind_router(self.agent1, router)
+            self.assertEqual(1, flog.call_count)
+            args, kwargs = flog.call_args
+            self.assertIn('has already been scheduled', args[0])
+
 
 class L3AgentChanceSchedulerTestCase(L3SchedulerTestCase):