]> review.fuel-infra Code Review - openstack-build/neutron-build.git/commitdiff
Add proccess monitor to keepalived
authorMike Kolesnik <mkolesni@redhat.com>
Tue, 3 Feb 2015 08:58:47 +0000 (10:58 +0200)
committerMike Kolesnik <mkolesni@redhat.com>
Mon, 9 Mar 2015 13:31:34 +0000 (15:31 +0200)
Adding process monitor to keepalived code, so that keepalived processes
launched by the L3 agent will get monitored the same as other processes
launched by the L3 agent.

Old monitoring code was removed since it's not needed anymore.

Implements: blueprint agent-child-processes-status

Change-Id: I94a889ee07286ab3c6cdab9ab15e5aee6fbd133a

neutron/agent/l3/ha.py
neutron/agent/l3/ha_router.py
neutron/agent/linux/keepalived.py
neutron/tests/functional/agent/linux/test_keepalived.py
neutron/tests/functional/agent/linux/test_process_monitor.py
neutron/tests/functional/agent/test_l3_agent.py

index dc34b93d18e700c59a64593f576ebeeef04b9b69..5758abba09a1065750d34ee44efaae5cc9446a84 100644 (file)
@@ -21,7 +21,6 @@ from neutron.agent.linux import keepalived
 from neutron.common import constants as l3_constants
 from neutron.i18n import _LE
 from neutron.openstack.common import log as logging
-from neutron.openstack.common import periodic_task
 
 LOG = logging.getLogger(__name__)
 
@@ -69,17 +68,8 @@ class AgentMixin(object):
                             ha_port['mac_address'])
         ri.ha_port = ha_port
 
-        ri._init_keepalived_manager()
+        ri._init_keepalived_manager(self.process_monitor)
         ri._add_keepalived_notifiers()
 
     def process_ha_router_removed(self, ri):
         ri.ha_network_removed()
-
-    def get_ha_routers(self):
-        return (router for router in self.router_info.values() if router.is_ha)
-
-    @periodic_task.periodic_task
-    def _ensure_keepalived_alive(self, context):
-        # TODO(amuller): Use external_process.ProcessMonitor
-        for router in self.get_ha_routers():
-            router.keepalived_manager.revive()
index b12813314ebc77ffb87c070b7106fb5c9e9dabda..0147463547dab6154dd5d60c1eaa2d533166941e 100644 (file)
@@ -68,16 +68,13 @@ class HaRouter(router.RouterInfo):
             LOG.debug('Error while reading HA state for %s', self.router_id)
             return None
 
-    def get_keepalived_manager(self):
-        return keepalived.KeepalivedManager(
+    def _init_keepalived_manager(self, process_monitor):
+        self.keepalived_manager = keepalived.KeepalivedManager(
             self.router['id'],
             keepalived.KeepalivedConf(),
             conf_path=self.agent_conf.ha_confs_path,
-            namespace=self.ns_name)
-
-    def _init_keepalived_manager(self):
-        # TODO(Carl) This looks a bit funny, doesn't it?
-        self.keepalived_manager = self.get_keepalived_manager()
+            namespace=self.ns_name,
+            process_monitor=process_monitor)
 
         config = self.keepalived_manager.config
 
@@ -102,7 +99,7 @@ class HaRouter(router.RouterInfo):
         config.add_instance(instance)
 
     def spawn_keepalived(self):
-        self.keepalived_manager.spawn_or_restart()
+        self.keepalived_manager.spawn()
 
     def disable_keepalived(self):
         self.keepalived_manager.disable()
@@ -213,13 +210,8 @@ class HaRouter(router.RouterInfo):
         it manage IPv4 addresses. In order to do that, we must delete
         the address first as it is autoconfigured by the kernel.
         """
-        process = keepalived.KeepalivedManager.get_process(
-            self.agent_conf,
-            self.router_id,
-            self.ns_name,
-            self.agent_conf.ha_confs_path)
-        if process.active:
-            manager = self.get_keepalived_manager()
+        manager = self.keepalived_manager
+        if manager.get_process().active:
             conf = manager.get_conf_on_disk()
             managed_by_keepalived = conf and ipv6_lladdr in conf
             if managed_by_keepalived:
index 0286815558fe1e8e42c2c23f6bd3792345ab4119..c448f41ae3599cba88e98e05ba3bacd3e2595d8e 100644 (file)
@@ -23,7 +23,6 @@ from oslo_config import cfg
 from neutron.agent.linux import external_process
 from neutron.agent.linux import utils
 from neutron.common import exceptions
-from neutron.i18n import _LW
 from neutron.openstack.common import log as logging
 
 VALID_STATES = ['MASTER', 'BACKUP']
@@ -33,6 +32,7 @@ HA_DEFAULT_PRIORITY = 50
 PRIMARY_VIP_RANGE_SIZE = 24
 # TODO(amuller): Use L3 agent constant when new constants module is introduced.
 FIP_LL_SUBNET = '169.254.30.0/23'
+KEEPALIVED_SERVICE_NAME = 'keepalived'
 
 
 LOG = logging.getLogger(__name__)
@@ -365,14 +365,13 @@ class KeepalivedManager(KeepalivedNotifierMixin):
     """
 
     def __init__(self, resource_id, config, conf_path='/tmp',
-                 namespace=None):
+                 namespace=None, process_monitor=None):
         self.resource_id = resource_id
         self.config = config
         self.namespace = namespace
+        self.process_monitor = process_monitor
         self.conf_path = conf_path
-        self.conf = cfg.CONF
         self.process = None
-        self.spawned = False
 
     def _output_config_file(self):
         config_str = self.config.get_config_str()
@@ -393,11 +392,6 @@ class KeepalivedManager(KeepalivedNotifierMixin):
     def spawn(self):
         config_path = self._output_config_file()
 
-        self.process = self.get_process(self.conf,
-                                        self.resource_id,
-                                        self.namespace,
-                                        self.conf_path)
-
         def callback(pid_file):
             cmd = ['keepalived', '-P',
                    '-f', config_path,
@@ -405,41 +399,26 @@ class KeepalivedManager(KeepalivedNotifierMixin):
                    '-r', '%s-vrrp' % pid_file]
             return cmd
 
-        self.process.enable(callback, reload_cfg=True)
+        pm = self.get_process(callback=callback)
+        pm.enable(reload_cfg=True)
 
-        self.spawned = True
-        LOG.debug('Keepalived spawned with config %s', config_path)
+        self.process_monitor.register(uuid=self.resource_id,
+                                      service_name=KEEPALIVED_SERVICE_NAME,
+                                      monitored_process=pm)
 
-    def spawn_or_restart(self):
-        if self.process:
-            self.restart()
-        else:
-            self.spawn()
-
-    def restart(self):
-        if self.process.active:
-            self._output_config_file()
-            self.process.reload_cfg()
-        else:
-            LOG.warn(_LW('A previous instance of keepalived seems to be dead, '
-                         'unable to restart it, a new instance will be '
-                         'spawned'))
-            self.process.disable()
-            self.spawn()
+        LOG.debug('Keepalived spawned with config %s', config_path)
 
     def disable(self):
-        if self.process:
-            self.process.disable(sig='15')
-            self.spawned = False
+        self.process_monitor.unregister(uuid=self.resource_id,
+                                        service_name=KEEPALIVED_SERVICE_NAME)
 
-    def revive(self):
-        if self.spawned and not self.process.active:
-            self.restart()
+        pm = self.get_process()
+        pm.disable(sig='15')
 
-    @classmethod
-    def get_process(cls, conf, resource_id, namespace, conf_path):
+    def get_process(self, callback=None):
         return external_process.ProcessManager(
-            conf,
-            resource_id,
-            namespace,
-            pids_path=conf_path)
+            cfg.CONF,
+            self.resource_id,
+            self.namespace,
+            pids_path=self.conf_path,
+            default_cmd_callback=callback)
index 656905eea4ab7ed21e7e3d76edd0b1ab51864b9d..9e1060ea07ee374df49946e20c558d151adc868f 100644 (file)
@@ -17,6 +17,7 @@ from oslo_config import cfg
 
 from neutron.agent.linux import external_process
 from neutron.agent.linux import keepalived
+from neutron.agent.linux import utils
 from neutron.tests import base
 from neutron.tests.unit.agent.linux import test_keepalived
 
@@ -24,13 +25,21 @@ from neutron.tests.unit.agent.linux import test_keepalived
 class KeepalivedManagerTestCase(base.BaseTestCase,
                                 test_keepalived.KeepalivedConfBaseMixin):
 
-    def test_keepalived_spawn(self):
-        expected_config = self._get_config()
-        manager = keepalived.KeepalivedManager('router1', expected_config,
-                                               conf_path=cfg.CONF.state_path)
-        self.addCleanup(manager.disable)
+    def setUp(self):
+        super(KeepalivedManagerTestCase, self).setUp()
+        cfg.CONF.set_override('check_child_processes_interval', 1, 'AGENT')
+
+        self.expected_config = self._get_config()
+        self.process_monitor = external_process.ProcessMonitor(cfg.CONF,
+                                                               'router')
+        self.manager = keepalived.KeepalivedManager(
+            'router1', self.expected_config, conf_path=cfg.CONF.state_path,
+            process_monitor=self.process_monitor)
+        self.addCleanup(self.manager.get_process().disable)
+        self.addCleanup(self.process_monitor.stop)
 
-        manager.spawn()
+    def test_keepalived_spawn(self):
+        self.manager.spawn()
         process = external_process.ProcessManager(
             cfg.CONF,
             'router1',
@@ -38,5 +47,18 @@ class KeepalivedManagerTestCase(base.BaseTestCase,
             pids_path=cfg.CONF.state_path)
         self.assertTrue(process.active)
 
-        self.assertEqual(expected_config.get_config_str(),
-                         manager.get_conf_on_disk())
+        self.assertEqual(self.expected_config.get_config_str(),
+                         self.manager.get_conf_on_disk())
+
+    def test_keepalived_respawns(self):
+        self.manager.spawn()
+        process = self.manager.get_process()
+        self.assertTrue(process.active)
+
+        process.disable(sig='15')
+
+        utils.wait_until_true(
+            lambda: process.active,
+            timeout=5,
+            sleep=0.01,
+            exception=RuntimeError(_("Keepalived didn't respawn")))
index aca151dd6f7707c21fad0cb469c4778925fcb1d4..609c79cae763f3f287585f380aa147cda4f1e498 100644 (file)
@@ -34,6 +34,7 @@ class BaseTestProcessMonitor(base.BaseTestCase):
         self._process_monitor = None
         self.create_child_processes_manager('respawn')
         self.addCleanup(self.cleanup_spawned_children)
+        self.addCleanup(self._process_monitor.stop)
 
     def create_child_processes_manager(self, action):
         cfg.CONF.set_override('check_child_processes_action', action, 'AGENT')
index 9400916642c9e9471ad4f58ae024d8c067948d7f..f8569370bbb54db47cbc0dff5445117e15984603 100755 (executable)
@@ -428,14 +428,14 @@ class L3AgentTestCase(L3AgentTestFramework):
 
         if enable_ha:
             self._assert_ha_device(router)
-            self.assertTrue(router.keepalived_manager.process.active)
+            self.assertTrue(router.keepalived_manager.get_process().active)
 
         self._delete_router(self.agent, router.router_id)
 
         self._assert_interfaces_deleted_from_ovs()
         self._assert_router_does_not_exist(router)
         if enable_ha:
-            self.assertFalse(router.keepalived_manager.process.active)
+            self.assertFalse(router.keepalived_manager.get_process().active)
 
     def _assert_external_device(self, router):
         external_port = router.get_ex_gw_port()