]> review.fuel-infra Code Review - packages/trusty/rabbitmq-server.git/commitdiff
Backport infinite loop detection 12/15512/3
authorAlexey Lebedeff <alebedev@mirantis.com>
Thu, 24 Dec 2015 11:21:48 +0000 (14:21 +0300)
committerAlexey Lebedeff <alebedev@mirantis.com>
Thu, 24 Dec 2015 11:32:16 +0000 (14:32 +0300)
Upstream fix https://github.com/rabbitmq/rabbitmq-common/pull/26 (patch
modified to reflect current state of upstream code).

Sudden death of cluster node could result in a stuck queue process -
this will result in redeclare attempts to hang. With this patch such
condition will be detected - AMQP channel will be closed and error will
be logged. And probably it could help us to discover underlying bug, by
localizing it in time.

And for referenced partial bugs it'll allow us to confirm or reject
hypothesis that it's related.

Change-Id: I09df5c5f2333cc462475798260cdfa9f4f5de654
Partial-Bug: #1515223
Partial-Bug: #1523622

debian/changelog
debian/patches/detect-stuck-queue-on-declare.diff [new file with mode: 0644]
debian/patches/series

index 2ee9d6178107228f490adb5d8f2d9edc19ee04e2..cc4dff8d5d85e23e61bfa7ffff472e3bc52de13a 100644 (file)
@@ -1,3 +1,9 @@
+rabbitmq-server (3.5.6-1~u14.04+mos4) mos8.0; urgency=medium
+
+  * Backport https://github.com/rabbitmq/rabbitmq-common/pull/26
+
+ -- Alexey Lebedeff <alebedev@mirantis.com>  Thu, 24 Dec 2015 14:28:56 +0300
+
 rabbitmq-server (3.5.6-1~u14.04+mos3) mos8.0; urgency=medium
 
   * Backport https://github.com/rabbitmq/rabbitmq-management/pull/84
diff --git a/debian/patches/detect-stuck-queue-on-declare.diff b/debian/patches/detect-stuck-queue-on-declare.diff
new file mode 100644 (file)
index 0000000..00c43f3
--- /dev/null
@@ -0,0 +1,35 @@
+Description: Detect infinite loop in AMQP channel code
+    Sudden death of cluster node could result in a stuck queue process - this will result in
+    redeclare attempts to hang. With this patch such condition will be detected - AMQP channel will
+    be closed and error will be logged. And probably it could help us to discover underlying bug, by
+    localizing the event in time.
+Author: Alexey Lebedeff <alebedev@mirantis.com>
+Origin: upstream, https://github.com/rabbitmq/rabbitmq-common/pull/26
+Bug:https://github.com/rabbitmq/rabbitmq-server/issues/349
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- a/src/rabbit_amqqueue.erl
++++ b/src/rabbit_amqqueue.erl
+@@ -385,6 +385,11 @@ not_found_or_absent_dirty(Name) ->
+     end.
+ with(Name, F, E) ->
++    with(Name, F, E, 2000).
++
++with(Name, _F, E, 0) ->
++    E(not_found_or_absent_dirty(Name));
++with(Name, F, E, RetriesLeft) ->
+     case lookup(Name) of
+         {ok, Q = #amqqueue{state = crashed}} ->
+             E({absent, Q, crashed});
+@@ -397,8 +402,8 @@ with(Name, F, E) ->
+             %% the retry loop.
+             rabbit_misc:with_exit_handler(
+               fun () -> false = rabbit_mnesia:is_process_alive(QPid),
+-                        timer:sleep(25),
+-                        with(Name, F, E)
++                        timer:sleep(30),
++                        with(Name, F, E, RetriesLeft - 1)
+               end, fun () -> F(Q) end);
+         {error, not_found} ->
+             E(not_found_or_absent_dirty(Name))
index f735084e098ec72d51cbbfdb021e0a3c711e421e..651a942ba8039c76510c9a0e7322ab203c29820a 100644 (file)
@@ -1,2 +1,3 @@
 fix-pmon-demonitor-function.diff
 fix-management-startup-after-split.diff
+detect-stuck-queue-on-declare.diff