# vnic_type = mlnx_direct
# (StrOpt) Eswitch daemon end point connection url
-# daemon_endpoint = 'tcp://127.0.0.1:5001'
+# daemon_endpoint = 'tcp://127.0.0.1:60001'
# The number of milliseconds the agent will wait for
# response on request to daemon
# request_timeout = 3000
+# The number of retries the agent will send request
+# to daemon before giving up
+# retries = 3
+
+# The backoff rate multiplier for waiting period between retries
+# on request to daemon, i.e. value of 2 will double
+# the request timeout each retry
+# backoff_rate = 2
[agent]
# Agent's polling interval in seconds
# If treat devices fails - must resync with plugin
sync = self.process_network_ports(port_info)
ports = port_info['current']
+ except exceptions.RequestTimeout:
+ LOG.exception(_("Request timeout in agent event loop "
+ "eSwitchD is not responding - exiting..."))
+ raise SystemExit(1)
except Exception:
LOG.exception(_("Error in agent event loop"))
sync = True
from neutron.openstack.common import jsonutils
from neutron.openstack.common import log as logging
+from neutron.plugins.mlnx.common.comm_utils import RetryDecorator
from neutron.plugins.mlnx.common import exceptions
LOG = logging.getLogger(__name__)
self.poller.register(self._conn, zmq.POLLIN)
return self.__conn
+ @RetryDecorator(exceptions.RequestTimeout)
def send_msg(self, msg):
self._conn.send(msg)
self._conn.close()
self.poller.unregister(self._conn)
self.__conn = None
- raise exceptions.MlnxException(_("eSwitchD: Request timeout"))
+ raise exceptions.RequestTimeout()
def parse_response_msg(self, recv_msg):
msg = jsonutils.loads(recv_msg)
else:
error_msg = _("Unknown operation status %s") % msg['status']
LOG.error(error_msg)
- raise exceptions.MlnxException(error_msg)
+ raise exceptions.OperationFailed(err_msg=error_msg)
def get_attached_vnics(self):
LOG.debug(_("get_attached_vnics"))
--- /dev/null
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+#
+# Copyright 2013 Mellanox Technologies, Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+from oslo.config import cfg
+
+from neutron.openstack.common import log as logging
+from neutron.plugins.mlnx.common import config # noqa
+
+LOG = logging.getLogger(__name__)
+
+
+class RetryDecorator(object):
+ """Retry decorator reruns a method 'retries' times if an exception occurs.
+
+ Decorator for retrying a method if exceptionToCheck exception occurs
+ If method raises exception, retries 'retries' times with increasing
+ back off period between calls with 'interval' multiplier
+
+ :param exceptionToCheck: the exception to check
+ :param interval: initial delay between retries in seconds
+ :param retries: number of times to try before giving up
+ :raises: exceptionToCheck
+ """
+ sleep_fn = time.sleep
+
+ def __init__(self, exceptionToCheck,
+ interval=cfg.CONF.ESWITCH.request_timeout / 1000,
+ retries=cfg.CONF.ESWITCH.retries,
+ backoff_rate=cfg.CONF.ESWITCH.backoff_rate):
+ self.exc = exceptionToCheck
+ self.interval = interval
+ self.retries = retries
+ self.backoff_rate = backoff_rate
+
+ def __call__(self, original_func):
+ def decorated(*args, **kwargs):
+ sleep_interval = self.interval
+ num_of_iter = self.retries
+ while num_of_iter > 0:
+ try:
+ return original_func(*args, **kwargs)
+ except self.exc:
+ LOG.debug(_("Request timeout - call again after "
+ "%s seconds"), sleep_interval)
+ RetryDecorator.sleep_fn(sleep_interval)
+ num_of_iter -= 1
+ sleep_interval *= self.backoff_rate
+
+ return original_func(*args, **kwargs)
+ return decorated
cfg.IntOpt('request_timeout', default=3000,
help=_("The number of milliseconds the agent will wait for "
"response on request to daemon.")),
+ cfg.IntOpt('retries', default=3,
+ help=_("The number of retries the agent will send request "
+ "to daemon before giving up")),
+ cfg.IntOpt('backoff_rate', default=2,
+ help=_("backoff rate multiplier for waiting period between "
+ "retries for request to daemon, i.e. value of 2 will "
+ " double the request timeout each retry")),
]
agent_opts = [
class MlnxException(qexc.NeutronException):
message = _("Mlnx Exception: %(err_msg)s")
+
+
+class RequestTimeout(qexc.NeutronException):
+ message = _("Request Timeout: no response from eSwitchD")
+
+
+class OperationFailed(qexc.NeutronException):
+ message = _("Operation Failed: %(err_msg)s")
--- /dev/null
+# Copyright (c) 2013 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import mock
+from oslo.config import cfg
+
+from neutron.plugins.mlnx.common.comm_utils import RetryDecorator
+from neutron.plugins.mlnx.common import config # noqa
+from neutron.plugins.mlnx.common import exceptions
+from neutron.tests import base
+
+
+class WrongException(Exception):
+ pass
+
+
+class TestRetryDecorator(base.BaseTestCase):
+ def setUp(self):
+ super(TestRetryDecorator, self).setUp()
+ self.sleep_fn_p = mock.patch.object(RetryDecorator, 'sleep_fn')
+ self.sleep_fn = self.sleep_fn_p.start()
+ self.addCleanup(self.sleep_fn_p.stop)
+
+ def test_no_retry_required(self):
+ self.counter = 0
+
+ @RetryDecorator(exceptions.RequestTimeout, interval=2,
+ retries=3, backoff_rate=2)
+ def succeeds():
+ self.counter += 1
+ return 'success'
+
+ ret = succeeds()
+ self.assertFalse(self.sleep_fn.called)
+ self.assertEqual(ret, 'success')
+ self.assertEqual(self.counter, 1)
+
+ def test_retry_zero_times(self):
+ self.counter = 0
+ interval = 2
+ backoff_rate = 2
+ retries = 0
+
+ @RetryDecorator(exceptions.RequestTimeout, interval,
+ retries, backoff_rate)
+ def always_fails():
+ self.counter += 1
+ raise exceptions.RequestTimeout()
+
+ self.assertRaises(exceptions.RequestTimeout, always_fails)
+ self.assertEqual(self.counter, 1)
+ self.assertFalse(self.sleep_fn.called)
+
+ def test_retries_once(self):
+ self.counter = 0
+ interval = 2
+ backoff_rate = 2
+ retries = 3
+
+ @RetryDecorator(exceptions.RequestTimeout, interval,
+ retries, backoff_rate)
+ def fails_once():
+ self.counter += 1
+ if self.counter < 2:
+ raise exceptions.RequestTimeout()
+ else:
+ return 'success'
+
+ ret = fails_once()
+ self.assertEqual(ret, 'success')
+ self.assertEqual(self.counter, 2)
+ self.assertEqual(self.sleep_fn.call_count, 1)
+ self.sleep_fn.assert_called_with(interval)
+
+ def test_limit_is_reached(self):
+ self.counter = 0
+ retries = 3
+ interval = 2
+ backoff_rate = 4
+
+ @RetryDecorator(exceptions.RequestTimeout, interval,
+ retries, backoff_rate)
+ def always_fails():
+ self.counter += 1
+ raise exceptions.RequestTimeout()
+
+ self.assertRaises(exceptions.RequestTimeout, always_fails)
+ self.assertEqual(self.counter, retries + 1)
+ self.assertEqual(self.sleep_fn.call_count, retries)
+
+ expected_sleep_fn_arg = []
+ for i in range(retries):
+ expected_sleep_fn_arg.append(interval)
+ interval *= backoff_rate
+
+ self.sleep_fn.assert_has_calls(map(mock.call, expected_sleep_fn_arg))
+
+ def test_limit_is_reached_with_conf(self):
+ self.counter = 0
+
+ @RetryDecorator(exceptions.RequestTimeout)
+ def always_fails():
+ self.counter += 1
+ raise exceptions.RequestTimeout()
+
+ retry = cfg.CONF.ESWITCH.retries
+ interval = cfg.CONF.ESWITCH.request_timeout / 1000
+ delay_rate = cfg.CONF.ESWITCH.backoff_rate
+
+ expected_sleep_fn_arg = []
+ for i in range(retry):
+ expected_sleep_fn_arg.append(interval)
+ interval *= delay_rate
+
+ self.assertRaises(exceptions.RequestTimeout, always_fails)
+ self.assertEqual(self.counter, retry + 1)
+ self.assertEqual(self.sleep_fn.call_count, retry)
+ self.sleep_fn.assert_has_calls(map(mock.call, expected_sleep_fn_arg))
+
+ def test_wrong_exception_no_retry(self):
+
+ @RetryDecorator(exceptions.RequestTimeout)
+ def raise_unexpected_error():
+ raise WrongException("wrong exception")
+
+ self.assertRaises(WrongException, raise_unexpected_error)
+ self.assertFalse(self.sleep_fn.called)