from sqlalchemy import sql
from neutron.common import constants
+from neutron.common import rpc as n_rpc
from neutron.common import utils as n_utils
from neutron import context as n_ctx
from neutron.db import agents_db
from neutron.db import model_base
from neutron.extensions import l3agentscheduler
from neutron import manager
-from neutron.openstack.common.gettextutils import _LI, _LW
+from neutron.openstack.common.gettextutils import _LE, _LI, _LW
from neutron.openstack.common import log as logging
from neutron.openstack.common import loopingcall
from neutron.openstack.common import timeutils
RouterL3AgentBinding.router_id).
filter(sa.or_(l3_attrs_db.RouterExtraAttributes.ha == sql.false(),
l3_attrs_db.RouterExtraAttributes.ha == sql.null())))
-
- for binding in down_bindings:
- LOG.warn(_LW("Rescheduling router %(router)s from agent %(agent)s "
- "because the agent did not report to the server in "
- "the last %(dead_time)s seconds."),
- {'router': binding.router_id,
- 'agent': binding.l3_agent_id,
- 'dead_time': agent_dead_limit})
- self.reschedule_router(context, binding.router_id)
+ try:
+ for binding in down_bindings:
+ LOG.warn(_LW(
+ "Rescheduling router %(router)s from agent %(agent)s "
+ "because the agent did not report to the server in "
+ "the last %(dead_time)s seconds."),
+ {'router': binding.router_id,
+ 'agent': binding.l3_agent_id,
+ 'dead_time': agent_dead_limit})
+ try:
+ self.reschedule_router(context, binding.router_id)
+ except (l3agentscheduler.RouterReschedulingFailed,
+ n_rpc.RemoteError):
+ # Catch individual router rescheduling errors here
+ # so one broken one doesn't stop the iteration.
+ LOG.exception(_LE("Failed to reschedule router %s"),
+ binding.router_id)
+ except db_exc.DBError:
+ # Catch DB errors here so a transient DB connectivity issue
+ # doesn't stop the loopingcall.
+ LOG.exception(_LE("Exception encountered during router "
+ "rescheduling."))
def validate_agent_router_combination(self, context, agent, router):
"""Validate if the router can be correctly assigned to the agent.
import mock
from oslo.config import cfg
+from oslo.db import exception as db_exc
from webob import exc
from neutron.api import extensions
from neutron.api.rpc.handlers import l3_rpc
from neutron.api.v2 import attributes
from neutron.common import constants
+from neutron.common import rpc as n_rpc
from neutron import context
from neutron.db import agents_db
from neutron.db import l3_agentschedulers_db
agt_db.admin_state_up = state
self.adminContext.session.commit()
+ def test_router_rescheduler_catches_rpc_db_and_reschedule_exceptions(self):
+ with self.router():
+ l3_rpc_cb = l3_rpc.L3RpcCallback()
+ self._register_agent_states()
+ # schedule the router to host A
+ l3_rpc_cb.sync_routers(self.adminContext, host=L3_HOSTA)
+
+ plugin = manager.NeutronManager.get_service_plugins().get(
+ service_constants.L3_ROUTER_NAT)
+ mock.patch.object(
+ plugin, 'reschedule_router',
+ side_effect=[
+ db_exc.DBError(), n_rpc.RemoteError(),
+ l3agentscheduler.RouterReschedulingFailed(router_id='f',
+ agent_id='f'),
+ ValueError('this raises')
+ ]).start()
+ # these first three should not raise any errors
+ self._take_down_agent_and_run_reschedule(L3_HOSTA) # DBError
+ self._take_down_agent_and_run_reschedule(L3_HOSTA) # RemoteError
+ self._take_down_agent_and_run_reschedule(L3_HOSTA) # schedule err
+
+ # ValueError is not caught so it should raise
+ self.assertRaises(ValueError,
+ self._take_down_agent_and_run_reschedule,
+ L3_HOSTA)
+
+ def test_router_rescheduler_iterates_after_reschedule_failure(self):
+ plugin = manager.NeutronManager.get_service_plugins().get(
+ service_constants.L3_ROUTER_NAT)
+ l3_rpc_cb = l3_rpc.L3RpcCallback()
+ self._register_agent_states()
+ with contextlib.nested(self.router(), self.router()) as (r1, r2):
+ # schedule the routers to host A
+ l3_rpc_cb.sync_routers(self.adminContext, host=L3_HOSTA)
+
+ rs_mock = mock.patch.object(
+ plugin, 'reschedule_router',
+ side_effect=l3agentscheduler.RouterReschedulingFailed(
+ router_id='f', agent_id='f'),
+ ).start()
+ self._take_down_agent_and_run_reschedule(L3_HOSTA)
+ # make sure both had a reschedule attempt even though first failed
+ rs_mock.assert_has_calls([mock.call(mock.ANY, r1['router']['id']),
+ mock.call(mock.ANY, r2['router']['id'])],
+ any_order=True)
+
def test_router_is_not_rescheduled_from_alive_agent(self):
with self.router():
l3_rpc_cb = l3_rpc.L3RpcCallback()