From: Hong Hui Xiao Date: Wed, 4 Nov 2015 06:44:43 +0000 (-0500) Subject: Kill the vrrp orphan process when (re)spawn keepalived X-Git-Url: https://review.fuel-infra.org/gitweb?a=commitdiff_plain;h=2d1b53bcfa6c4d6fa5bca2ba4da9aaca66245a5b;p=openstack-build%2Fneutron-build.git Kill the vrrp orphan process when (re)spawn keepalived When keepalived crashed unexpectedly, the vrrp process that it associates with will be orphan process. This will make the VIP unable to migrate to the router in the same host. Also, neutron code is not able to respawn the keepalived process, because keepalived thinks itself is still running, according to [1-3]. As a result, neutron will report respawning keepalived all the time. Restart l3-agent will not help. This patch will check and delete the orphan vrrp process if there is any, in the processmonitor of l3 agent. More details can be found in the bug description and comments. [1] https://goo.gl/W3GL9I [2] https://goo.gl/F0Ixfb [3] https://goo.gl/dUqhTo Change-Id: Ia1759ed1365b845d404686a8cd25f882cce35caf Closes-Bug: #1511311 --- diff --git a/neutron/agent/linux/keepalived.py b/neutron/agent/linux/keepalived.py index ba9c74e6b..96b832c7f 100644 --- a/neutron/agent/linux/keepalived.py +++ b/neutron/agent/linux/keepalived.py @@ -383,19 +383,18 @@ class KeepalivedManager(object): def spawn(self): config_path = self._output_config_file() - def callback(pid_file): - cmd = ['keepalived', '-P', - '-f', config_path, - '-p', pid_file, - '-r', '%s-vrrp' % pid_file] - return cmd + keepalived_pm = self.get_process() + vrrp_pm = self._get_vrrp_process( + '%s-vrrp' % keepalived_pm.get_pid_file_name()) - pm = self.get_process(callback=callback) - pm.enable(reload_cfg=True) + keepalived_pm.default_cmd_callback = ( + self._get_keepalived_process_callback(vrrp_pm, config_path)) + + keepalived_pm.enable(reload_cfg=True) self.process_monitor.register(uuid=self.resource_id, service_name=KEEPALIVED_SERVICE_NAME, - monitored_process=pm) + monitored_process=keepalived_pm) LOG.debug('Keepalived spawned with config %s', config_path) @@ -413,3 +412,27 @@ class KeepalivedManager(object): self.namespace, pids_path=self.conf_path, default_cmd_callback=callback) + + def _get_vrrp_process(self, pid_file): + return external_process.ProcessManager( + cfg.CONF, + self.resource_id, + self.namespace, + pid_file=pid_file) + + def _get_keepalived_process_callback(self, vrrp_pm, config_path): + + def callback(pid_file): + # If keepalived process crashed unexpectedly, the vrrp process + # will be orphan and prevent keepalived process to be spawned. + # A check here will let the l3-agent to kill the orphan process + # and spawn keepalived successfully. + if vrrp_pm.active: + vrrp_pm.disable() + cmd = ['keepalived', '-P', + '-f', config_path, + '-p', pid_file, + '-r', '%s-vrrp' % pid_file] + return cmd + + return callback diff --git a/neutron/tests/functional/agent/linux/test_keepalived.py b/neutron/tests/functional/agent/linux/test_keepalived.py index d58f364ca..47c2613cb 100644 --- a/neutron/tests/functional/agent/linux/test_keepalived.py +++ b/neutron/tests/functional/agent/linux/test_keepalived.py @@ -13,12 +13,8 @@ # License for the specific language governing permissions and limitations # under the License. -from distutils import version -import re - from oslo_config import cfg from oslo_log import log as logging -import testtools from neutron._i18n import _ from neutron.agent.linux import external_process @@ -31,18 +27,6 @@ from neutron.tests.unit.agent.linux import test_keepalived LOG = logging.getLogger(__name__) -def keepalived_version_not_supported(): - try: - cmd = ['keepalived', '--version'] - out = utils.execute(cmd, return_stderr=True) - ver = re.search(r"Keepalived v(\d+\.\d+\.\d+)", out[1]).group(1) - return version.LooseVersion(ver) >= version.LooseVersion("1.2.11") - except (OSError, RuntimeError, IndexError, ValueError) as e: - LOG.debug("Exception while checking keepalived version. " - "Exception: %s", e) - return False - - class KeepalivedManagerTestCase(base.BaseTestCase, test_keepalived.KeepalivedConfBaseMixin): @@ -70,8 +54,7 @@ class KeepalivedManagerTestCase(base.BaseTestCase, self.assertEqual(self.expected_config.get_config_str(), self.manager.get_conf_on_disk()) - @testtools.skipIf(keepalived_version_not_supported(), 'bug/1511311') - def test_keepalived_respawns(self): + def _test_keepalived_respawns(self, normal_exit=True): self.manager.spawn() process = self.manager.get_process() pid = process.pid @@ -81,11 +64,19 @@ class KeepalivedManagerTestCase(base.BaseTestCase, sleep=0.01, exception=RuntimeError(_("Keepalived didn't spawn"))) - # force process crash, and see that when it comes back - # it's indeed a different process - utils.execute(['kill', '-9', pid], run_as_root=True) + exit_code = '-15' if normal_exit else '-9' + + # Exit the process, and see that when it comes back + # It's indeed a different process + utils.execute(['kill', exit_code, pid], run_as_root=True) utils.wait_until_true( lambda: process.active and pid != process.pid, timeout=5, sleep=0.01, exception=RuntimeError(_("Keepalived didn't respawn"))) + + def test_keepalived_respawns(self): + self._test_keepalived_respawns() + + def test_keepalived_respawn_with_unexpected_exit(self): + self._test_keepalived_respawns(False)