]> review.fuel-infra Code Review - packages/trusty/rabbitmq-server.git/commitdiff
Autotune erlang thread pool size 43/19143/4 89/19189/1 mos-9.0
authorAlexey Lebedeff <alebedev@mirantis.com>
Mon, 4 Apr 2016 15:16:15 +0000 (18:16 +0300)
committerAlexey Lebedeff <alebedev@mirantis.com>
Mon, 4 Apr 2016 17:29:42 +0000 (20:29 +0300)
Backport from upstream https://github.com/binarin/rabbitmq-server/tree/rabbitmq-server-151

Other patches are moved around and added to streamline centos and ubuntu packaging.

Change-Id: I669523ee1db5e1d30af247dd94e36cd31613c4a9
Closes-Bug: 1565829

debian/changelog
debian/patches/erlang-thread-pool-autotune.patch [new file with mode: 0644]
debian/patches/native-code-path.patch [new file with mode: 0644]
debian/patches/rabbitmq-probe-ephemeral-port.patch [new file with mode: 0644]
debian/patches/series [new file with mode: 0644]
debian/patches/zero-deps-systemd-1.patch [new file with mode: 0644]
rabbitmq-server/scripts/rabbitmq-server
tests/runtests.sh

index eb7e9efeb1e80c9c0c32a022cb23040c3c214b11..5dd36934569640984ea0d2807f1b9369d5037be5 100644 (file)
@@ -1,3 +1,9 @@
+rabbitmq-server (3.6.1-1~u14.04+mos3) mos9.0; urgency=medium
+
+  * Autodetect erlang async thread pool size.
+
+ -- Alexey Lebedeff <binarin@binarin.ru>  Mon, 04 Apr 2016 17:50:00 +0300
+
 rabbitmq-server (3.6.1-1~u14.04+mos2) mos8.0; urgency=medium
 
   * Use HiPE compilation.
diff --git a/debian/patches/erlang-thread-pool-autotune.patch b/debian/patches/erlang-thread-pool-autotune.patch
new file mode 100644 (file)
index 0000000..5a1214d
--- /dev/null
@@ -0,0 +1,85 @@
+Description: Autotune erlang thread pool size
+ Useful for hosts with big number of CPU cores
+Origin: upstream, https://github.com/binarin/rabbitmq-server/tree/rabbitmq-server-151
+Bug: https://github.com/binarin/rabbitmq-server/tree/rabbitmq-server-151
+Applied-Upstream: 3.6.2
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- a/deps/rabbit_common/src/rabbit_misc.erl
++++ b/deps/rabbit_common/src/rabbit_misc.erl
+@@ -73,6 +73,7 @@
+ -export([get_env/3]).
+ -export([get_channel_operation_timeout/0]).
+ -export([random/1]).
++-export([report_default_thread_pool_size/0]).
+ %% Horrible macro to use in guards
+ -define(IS_BENIGN_EXIT(R),
+@@ -263,6 +264,7 @@
+ -spec(get_env/3 :: (atom(), atom(), term())  -> term()).
+ -spec(get_channel_operation_timeout/0 :: () -> non_neg_integer()).
+ -spec(random/1 :: (non_neg_integer()) -> non_neg_integer()).
++-spec(report_default_thread_pool_size/0 :: () -> 'ok').
+ -endif.
+@@ -1160,6 +1162,24 @@ random(N) ->
+     end,
+     random:uniform(N).
++guess_number_of_cpu_cores() ->
++    case erlang:system_info(logical_processors_available) of
++        unknown -> % Happens on Mac OS X.
++            erlang:system_info(schedulers);
++        N -> N
++    end.
++
++%% Discussion of choosen values is at
++%% https://github.com/rabbitmq/rabbitmq-server/issues/151
++guess_default_thread_pool_size() ->
++    PoolSize = 16 * guess_number_of_cpu_cores(),
++    min(1024, max(64, PoolSize)).
++
++report_default_thread_pool_size() ->
++    io:format("~b", [guess_default_thread_pool_size()]),
++    erlang:halt(0),
++    ok.
++
+ %% -------------------------------------------------------------------------
+ %% Begin copypasta from gen_server2.erl
+--- a/scripts/rabbitmq-defaults
++++ b/scripts/rabbitmq-defaults
+@@ -40,6 +40,5 @@ MNESIA_BASE=${SYS_PREFIX}/var/lib/rabbit
+ ENABLED_PLUGINS_FILE=${SYS_PREFIX}/etc/rabbitmq/enabled_plugins
+ PLUGINS_DIR="${RABBITMQ_HOME}/plugins"
+-IO_THREAD_POOL_SIZE=64
+ CONF_ENV_FILE=${SYS_PREFIX}/etc/rabbitmq/rabbitmq-env.conf
+--- a/scripts/rabbitmq-server
++++ b/scripts/rabbitmq-server
+@@ -117,7 +117,23 @@ fi
+ # there is no other way of preventing their expansion.
+ set -f
++# Lazy initialization of threed pool size - if it wasn't set
++# explicitly. This parameter is only needed when server is starting,
++# so it makes no sense to do this calculations in rabbitmq-env or
++# rabbitmq-defaults scripts.
++ensure_thread_pool_size() {
++    if [ -z "${RABBITMQ_IO_THREAD_POOL_SIZE}" ]; then
++        RABBITMQ_IO_THREAD_POOL_SIZE=$(
++            ${ERL_DIR}erl -pa "$RABBITMQ_EBIN_ROOT" \
++                      -boot "${CLEAN_BOOT_FILE}" \
++                      -noinput \
++                      -s rabbit_misc report_default_thread_pool_size
++        )
++    fi
++}
++
+ start_rabbitmq_server() {
++    ensure_thread_pool_size
+     RABBITMQ_CONFIG_FILE=$RABBITMQ_CONFIG_FILE \
+     exec ${ERL_DIR}erl \
+     -pa /var/lib/rabbitmq/native-code \
diff --git a/debian/patches/native-code-path.patch b/debian/patches/native-code-path.patch
new file mode 100644 (file)
index 0000000..4044749
--- /dev/null
@@ -0,0 +1,12 @@
+Только в rabbitmq-server: .pc
+diff -r -u rabbitmq-server-3.6.1/scripts/rabbitmq-server rabbitmq-server/scripts/rabbitmq-server
+--- rabbitmq-server-3.6.1/scripts/rabbitmq-server      2016-02-09 15:55:29.000000000 +0300
++++ rabbitmq-server/scripts/rabbitmq-server    2016-04-04 20:18:01.000000000 +0300
+@@ -115,6 +115,7 @@
+ start_rabbitmq_server() {
+     RABBITMQ_CONFIG_FILE=$RABBITMQ_CONFIG_FILE \
+     exec ${ERL_DIR}erl \
++    -pa /var/lib/rabbitmq/native-code \
+         -pa ${RABBITMQ_EBIN_ROOT} \
+         ${RABBITMQ_START_RABBIT} \
+         ${RABBITMQ_NAME_TYPE} ${RABBITMQ_NODENAME} \
diff --git a/debian/patches/rabbitmq-probe-ephemeral-port.patch b/debian/patches/rabbitmq-probe-ephemeral-port.patch
new file mode 100644 (file)
index 0000000..4c031a9
--- /dev/null
@@ -0,0 +1,13 @@
+diff -ru rabbitmq-server-3.6.0.orig/deps/rabbit_common/src/rabbit_networking.erl rabbitmq-server-3.6.0/deps/rabbit_common/src/rabbit_networking.erl
+--- rabbitmq-server-3.6.0.orig/deps/rabbit_common/src/rabbit_networking.erl    2015-12-14 13:41:01.000000000 +0300
++++ rabbitmq-server-3.6.0/deps/rabbit_common/src/rabbit_networking.erl 2016-03-01 14:51:14.203046564 +0300
+@@ -50,7 +50,7 @@
+ -include("rabbit.hrl").
+ -include_lib("kernel/include/inet.hrl").
+--define(FIRST_TEST_BIND_PORT, 10000).
++-define(FIRST_TEST_BIND_PORT, 32768).
+ %% POODLE
+ -define(BAD_SSL_PROTOCOL_VERSIONS, [sslv3]).
+Только в rabbitmq-server-3.6.0/deps/rabbit_common/src: rabbit_networking.erl.orig
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644 (file)
index 0000000..fd33cc5
--- /dev/null
@@ -0,0 +1,4 @@
+rabbitmq-probe-ephemeral-port.patch
+native-code-path.patch
+zero-deps-systemd-1.patch
+erlang-thread-pool-autotune.patch
diff --git a/debian/patches/zero-deps-systemd-1.patch b/debian/patches/zero-deps-systemd-1.patch
new file mode 100644 (file)
index 0000000..8a88842
--- /dev/null
@@ -0,0 +1,216 @@
+Только в rabbitmq-server-3.6.1/docs: rabbitmq-server.service.example
+--- a/scripts/rabbitmq-server
++++ b/scripts/rabbitmq-server
+@@ -47,7 +47,7 @@ case "$(uname -s)" in
+                    exit $EX_CANTCREAT
+                fi
+                if ! echo $$ > ${RABBITMQ_PID_FILE}; then
+-                   # Bettern diagnostics - otherwise the only report in logs is about failed 'echo'
++                   # Better diagnostics - otherwise the only report in logs is about failed 'echo'
+                    # command, but without any other details: neither what script has failed nor what
+                    # file output was redirected to.
+                    echo "Failed to write pid file: ${RABBITMQ_PID_FILE}"
+@@ -58,8 +58,13 @@ esac
+ RABBITMQ_EBIN_ROOT="${RABBITMQ_HOME}/ebin"
++[ "$NOTIFY_SOCKET" ] && RUNNING_UNDER_SYSTEMD=true
++
+ set +e
++# NOTIFY_SOCKET is needed here to prevent epmd from impersonating the
++# success of our startup sequence to systemd.
++NOTIFY_SOCKET= \
+ RABBITMQ_CONFIG_FILE=$RABBITMQ_CONFIG_FILE \
+ RABBITMQ_DIST_PORT=$RABBITMQ_DIST_PORT \
+     ${ERL_DIR}erl -pa "$RABBITMQ_EBIN_ROOT" \
+@@ -152,7 +157,20 @@ stop_rabbitmq_server() {
+     fi
+ }
+-if [ 'x' = "x$RABBITMQ_ALLOW_INPUT" -a -z "$detached" ]; then
++if [ "$RABBITMQ_ALLOW_INPUT" -o "$RUNNING_UNDER_SYSTEMD" -o "$detached" ]; then
++    # Run erlang VM directly, completely replacing current shell
++    # process - so the pid file written in the code above will be
++    # valid (unless detached, which is also handled in the code
++    # above).
++    #
++    # And also this is the correct mode to run the broker under
++    # systemd - there is no need in a proxy process that converts
++    # signals to graceful shutdown command, the unit file should already
++    # contain instructions for graceful shutdown. Also by removing
++    # this additional process we could simply use value returned by
++    # `os:getpid/0` for a systemd ready notification.
++    start_rabbitmq_server "$@"
++else
+     # When RabbitMQ runs in the foreground but the Erlang shell is
+     # disabled, we setup signal handlers to stop RabbitMQ properly. This
+     # is at least useful in the case of Docker.
+@@ -161,7 +179,7 @@ if [ 'x' = "x$RABBITMQ_ALLOW_INPUT" -a -
+     RABBITMQ_SERVER_START_ARGS="${RABBITMQ_SERVER_START_ARGS} +B i"
+     # Signal handlers. They all stop RabbitMQ properly (using
+-    # rabbitmqctl stop). Depending on the signal, this script will exwit
++    # rabbitmqctl stop). Depending on the signal, this script will exit
+     # with a non-zero error code:
+     #   SIGHUP SIGTERM SIGTSTP
+     #     They are considered a normal process termination, so the script
+@@ -177,6 +195,4 @@ if [ 'x' = "x$RABBITMQ_ALLOW_INPUT" -a -
+     # Block until RabbitMQ exits or a signal is caught.
+     # Waits for last command (which is start_rabbitmq_server)
+     wait $!
+-else
+-    start_rabbitmq_server "$@"
+ fi
+--- a/src/rabbit.erl
++++ b/src/rabbit.erl
+@@ -284,16 +284,120 @@ broker_start() ->
+     Plugins = rabbit_plugins:setup(),
+     ToBeLoaded = Plugins ++ ?APPS,
+     start_apps(ToBeLoaded),
+-    case os:type() of
+-        {win32, _} -> ok;
+-        _ -> case code:load_file(sd_notify) of
+-                 {module, sd_notify} -> SDNotify = sd_notify,
+-                                        SDNotify:sd_notify(0, "READY=1");
+-                 {error, _} -> os:cmd("systemd-notify --ready")
+-             end
+-    end,
++    maybe_sd_notify(),
+     ok = log_broker_started(rabbit_plugins:active()).
++%% Try to send systemd ready notification if it makes sense in the
++%% current environment. standard_error is used intentionally in all
++%% logging statements, so all this messages will end in systemd
++%% journal.
++maybe_sd_notify() ->
++    case sd_notify_ready() of
++        false ->
++            io:format(standard_error, "systemd READY notification failed, beware of timeouts~n", []);
++        _ ->
++            ok
++    end.
++
++sd_notify_ready() ->
++    case {os:type(), os:getenv("NOTIFY_SOCKET")} of
++        {{win32, _}, _} ->
++            true;
++        {_, [_|_]} -> %% Non-empty NOTIFY_SOCKET, give it a try
++            sd_notify_legacy() orelse sd_notify_socat();
++        _ ->
++            true
++    end.
++
++sd_notify_data() ->
++    "READY=1\nSTATUS=Initialized\nMAINPID=" ++ os:getpid() ++ "\n".
++
++sd_notify_legacy() ->
++    case code:load_file(sd_notify) of
++        {module, sd_notify} ->
++            SDNotify = sd_notify,
++            SDNotify:sd_notify(0, sd_notify_data()),
++            true;
++        {error, _} ->
++            false
++    end.
++
++%% socat(1) is the most portable way the sd_notify could be
++%% implemented in erlang, without introducing some NIF. Currently the
++%% following issues prevent us from implementing it in a more
++%% reasonable way:
++%% - systemd-notify(1) is unstable for non-root users
++%% - erlang doesn't support unix domain sockets.
++%%
++%% Some details on how we ended with such a solution:
++%%   https://github.com/rabbitmq/rabbitmq-server/issues/664
++sd_notify_socat() ->
++    case sd_current_unit() of
++        {ok, Unit} ->
++            io:format(standard_error, "systemd unit for activation check: \"~s\"~n", [Unit]),
++            sd_notify_socat(Unit);
++        _ ->
++            false
++    end.
++
++socat_socket_arg("@" ++ AbstractUnixSocket) ->
++    "abstract-sendto:" ++ AbstractUnixSocket;
++socat_socket_arg(UnixSocket) ->
++    "unix-sendto:" ++ UnixSocket.
++
++sd_open_port() ->
++    open_port(
++      {spawn_executable, os:find_executable("socat")},
++      [{args, [socat_socket_arg(os:getenv("NOTIFY_SOCKET")), "STDIO"]},
++       use_stdio, out]).
++
++sd_notify_socat(Unit) ->
++    case sd_open_port() of
++        {'EXIT', Exit} ->
++            io:format(standard_error, "Failed to start socat ~p~n", [Exit]),
++            false;
++        Port ->
++            Port ! {self(), {command, sd_notify_data()}},
++            Result = sd_wait_activation(Port, Unit),
++            port_close(Port),
++            Result
++    end.
++
++sd_current_unit() ->
++    case catch re:run(os:cmd("systemctl status " ++ os:getpid()), "([-.@0-9a-zA-Z]+)", [unicode, {capture, all_but_first, list}]) of
++        {'EXIT', _} ->
++            error;
++        {match, [Unit]} ->
++            {ok, Unit};
++        _ ->
++            error
++    end.
++
++sd_wait_activation(Port, Unit) ->
++    case os:find_executable("systemctl") of
++        false ->
++            io:format(standard_error, "'systemctl' unavailable, falling back to sleep~n", []),
++            timer:sleep(5000),
++            true;
++        _ ->
++            sd_wait_activation(Port, Unit, 10)
++    end.
++
++sd_wait_activation(_, _, 0) ->
++    io:format(standard_error, "Service still in 'activating' state, bailing out~n", []),
++    false;
++sd_wait_activation(Port, Unit, AttemptsLeft) ->
++    case os:cmd("systemctl show --property=ActiveState " ++ Unit) of
++        "ActiveState=activating\n" ->
++            timer:sleep(1000),
++            sd_wait_activation(Port, Unit, AttemptsLeft - 1);
++        "ActiveState=" ++ _ ->
++            true;
++        _ = Err->
++            io:format(standard_error, "Unexpected status from systemd ~p~n", [Err]),
++            false
++    end.
++
+ start_it(StartFun) ->
+     Marker = spawn_link(fun() -> receive stop -> ok end end),
+     case catch register(rabbit_boot, Marker) of
+@@ -332,6 +436,10 @@ stop_and_halt() ->
+         stop()
+     after
+         rabbit_log:info("Halting Erlang VM~n", []),
++        %% Also duplicate this information to stderr, so console where
++        %% foreground broker was running (or systemd journal) will
++        %% contain information about graceful termination.
++        io:format(standard_error, "Gracefully halting Erlang VM~n", []),
+         init:stop()
+     end,
+     ok.
+@@ -693,7 +801,8 @@ print_banner() ->
+               "~n  ##########  Logs: ~s"
+               "~n  ######  ##        ~s"
+               "~n  ##########"
+-              "~n              Starting broker...",
++              "~n              Starting broker..."
++              "~n",
+               [Product, Version, ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE,
+                log_location(kernel), log_location(sasl)]).
index 65387788d063922ff97e6cf4c3fc10730d13f7d3..548a085434ef759d51c4ba5acccb9da39791e115 100755 (executable)
@@ -115,7 +115,6 @@ set -f
 start_rabbitmq_server() {
     RABBITMQ_CONFIG_FILE=$RABBITMQ_CONFIG_FILE \
     exec ${ERL_DIR}erl \
-    -pa /var/lib/rabbitmq/native-code \
         -pa ${RABBITMQ_EBIN_ROOT} \
         ${RABBITMQ_START_RABBIT} \
         ${RABBITMQ_NAME_TYPE} ${RABBITMQ_NODENAME} \
index 08a5d169912f535316f1f22bf274a9086a7a1a85..5178de139d326aedd33087e0c1f1ebf39bd63cb7 100755 (executable)
@@ -134,6 +134,41 @@ report-hipe-status() {
     run-ctl eval "lists:module_info(native)."
 }
 
+# rabbit may fail to notify systemd about successfull startup,
+# which'll result in timeouts and failures.
+test-repeated-restart() {
+    local try_no
+
+    stop-service rabbitmq-server
+    for try_no in $(seq 1 20); do
+        start-service rabbitmq-server
+        stop-service rabbitmq-server
+    done
+}
+
+# Logrotate should be able to successfully signal rabbit about a need
+# to reopen its logs.
+test-logrotate-sanity() {
+    logrotate -f /etc/logrotate.d/rabbitmq-server
+}
+
+validate-erlang-thread-pool-size() {
+    local pool_size_arg
+    start-service rabbitmq-server
+    pool_size_arg=$(ps ax | perl -nE '/-s rabbit boot/ && /^\s*(\d+).*?-A (\d+)/ && say "$2"' | head -n 1)
+    num_cores=$(erl -boot start_clean -noinput -eval 'io:format("~b", [erlang:system_info(logical_processors_available)]), erlang:halt(0).')
+    expected=$(($num_cores * 16))
+    if [[ $expected -lt 64 ]]; then
+        expected=64
+    elif [[ $expected -gt 1024 ]]; then
+        expected=1024
+    fi
+    if [[ $pool_size_arg -ne $expected ]]; then
+        echo "Async pool $pool_size_arg, but expected $expected"
+        exit 1
+    fi
+}
+
 case $PACKAGE in
     rabbitmq-server)
         install-helper-packages
@@ -142,9 +177,12 @@ case $PACKAGE in
         configure-rabbitmq-server
         enable-management-plugin
         start-rabbitmq-server
+        test-logrotate-sanity
         rabbitmq-health-check
         rabbitmq-management-aliveness-test
         report-hipe-status
+        validate-erlang-thread-pool-size
+        test-repeated-restart
         ;;
     *)
         echo "test not defined, skipping...."