From: Alexey Lebedeff Date: Mon, 4 Apr 2016 15:16:15 +0000 (+0300) Subject: Autotune erlang thread pool size X-Git-Tag: mos-9.0^0 X-Git-Url: https://review.fuel-infra.org/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F43%2F19143%2F4;p=packages%2Ftrusty%2Frabbitmq-server.git Autotune erlang thread pool size Backport from upstream https://github.com/binarin/rabbitmq-server/tree/rabbitmq-server-151 Other patches are moved around and added to streamline centos and ubuntu packaging. Change-Id: I669523ee1db5e1d30af247dd94e36cd31613c4a9 Closes-Bug: 1565829 --- diff --git a/debian/changelog b/debian/changelog index eb7e9ef..5dd3693 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +rabbitmq-server (3.6.1-1~u14.04+mos3) mos9.0; urgency=medium + + * Autodetect erlang async thread pool size. + + -- Alexey Lebedeff Mon, 04 Apr 2016 17:50:00 +0300 + rabbitmq-server (3.6.1-1~u14.04+mos2) mos8.0; urgency=medium * Use HiPE compilation. diff --git a/debian/patches/erlang-thread-pool-autotune.patch b/debian/patches/erlang-thread-pool-autotune.patch new file mode 100644 index 0000000..5a1214d --- /dev/null +++ b/debian/patches/erlang-thread-pool-autotune.patch @@ -0,0 +1,85 @@ +Description: Autotune erlang thread pool size + Useful for hosts with big number of CPU cores +Origin: upstream, https://github.com/binarin/rabbitmq-server/tree/rabbitmq-server-151 +Bug: https://github.com/binarin/rabbitmq-server/tree/rabbitmq-server-151 +Applied-Upstream: 3.6.2 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/deps/rabbit_common/src/rabbit_misc.erl ++++ b/deps/rabbit_common/src/rabbit_misc.erl +@@ -73,6 +73,7 @@ + -export([get_env/3]). + -export([get_channel_operation_timeout/0]). + -export([random/1]). ++-export([report_default_thread_pool_size/0]). + + %% Horrible macro to use in guards + -define(IS_BENIGN_EXIT(R), +@@ -263,6 +264,7 @@ + -spec(get_env/3 :: (atom(), atom(), term()) -> term()). + -spec(get_channel_operation_timeout/0 :: () -> non_neg_integer()). + -spec(random/1 :: (non_neg_integer()) -> non_neg_integer()). ++-spec(report_default_thread_pool_size/0 :: () -> 'ok'). + + -endif. + +@@ -1160,6 +1162,24 @@ random(N) -> + end, + random:uniform(N). + ++guess_number_of_cpu_cores() -> ++ case erlang:system_info(logical_processors_available) of ++ unknown -> % Happens on Mac OS X. ++ erlang:system_info(schedulers); ++ N -> N ++ end. ++ ++%% Discussion of choosen values is at ++%% https://github.com/rabbitmq/rabbitmq-server/issues/151 ++guess_default_thread_pool_size() -> ++ PoolSize = 16 * guess_number_of_cpu_cores(), ++ min(1024, max(64, PoolSize)). ++ ++report_default_thread_pool_size() -> ++ io:format("~b", [guess_default_thread_pool_size()]), ++ erlang:halt(0), ++ ok. ++ + %% ------------------------------------------------------------------------- + %% Begin copypasta from gen_server2.erl + +--- a/scripts/rabbitmq-defaults ++++ b/scripts/rabbitmq-defaults +@@ -40,6 +40,5 @@ MNESIA_BASE=${SYS_PREFIX}/var/lib/rabbit + ENABLED_PLUGINS_FILE=${SYS_PREFIX}/etc/rabbitmq/enabled_plugins + + PLUGINS_DIR="${RABBITMQ_HOME}/plugins" +-IO_THREAD_POOL_SIZE=64 + + CONF_ENV_FILE=${SYS_PREFIX}/etc/rabbitmq/rabbitmq-env.conf +--- a/scripts/rabbitmq-server ++++ b/scripts/rabbitmq-server +@@ -117,7 +117,23 @@ fi + # there is no other way of preventing their expansion. + set -f + ++# Lazy initialization of threed pool size - if it wasn't set ++# explicitly. This parameter is only needed when server is starting, ++# so it makes no sense to do this calculations in rabbitmq-env or ++# rabbitmq-defaults scripts. ++ensure_thread_pool_size() { ++ if [ -z "${RABBITMQ_IO_THREAD_POOL_SIZE}" ]; then ++ RABBITMQ_IO_THREAD_POOL_SIZE=$( ++ ${ERL_DIR}erl -pa "$RABBITMQ_EBIN_ROOT" \ ++ -boot "${CLEAN_BOOT_FILE}" \ ++ -noinput \ ++ -s rabbit_misc report_default_thread_pool_size ++ ) ++ fi ++} ++ + start_rabbitmq_server() { ++ ensure_thread_pool_size + RABBITMQ_CONFIG_FILE=$RABBITMQ_CONFIG_FILE \ + exec ${ERL_DIR}erl \ + -pa /var/lib/rabbitmq/native-code \ diff --git a/debian/patches/native-code-path.patch b/debian/patches/native-code-path.patch new file mode 100644 index 0000000..4044749 --- /dev/null +++ b/debian/patches/native-code-path.patch @@ -0,0 +1,12 @@ +Только в rabbitmq-server: .pc +diff -r -u rabbitmq-server-3.6.1/scripts/rabbitmq-server rabbitmq-server/scripts/rabbitmq-server +--- rabbitmq-server-3.6.1/scripts/rabbitmq-server 2016-02-09 15:55:29.000000000 +0300 ++++ rabbitmq-server/scripts/rabbitmq-server 2016-04-04 20:18:01.000000000 +0300 +@@ -115,6 +115,7 @@ + start_rabbitmq_server() { + RABBITMQ_CONFIG_FILE=$RABBITMQ_CONFIG_FILE \ + exec ${ERL_DIR}erl \ ++ -pa /var/lib/rabbitmq/native-code \ + -pa ${RABBITMQ_EBIN_ROOT} \ + ${RABBITMQ_START_RABBIT} \ + ${RABBITMQ_NAME_TYPE} ${RABBITMQ_NODENAME} \ diff --git a/debian/patches/rabbitmq-probe-ephemeral-port.patch b/debian/patches/rabbitmq-probe-ephemeral-port.patch new file mode 100644 index 0000000..4c031a9 --- /dev/null +++ b/debian/patches/rabbitmq-probe-ephemeral-port.patch @@ -0,0 +1,13 @@ +diff -ru rabbitmq-server-3.6.0.orig/deps/rabbit_common/src/rabbit_networking.erl rabbitmq-server-3.6.0/deps/rabbit_common/src/rabbit_networking.erl +--- rabbitmq-server-3.6.0.orig/deps/rabbit_common/src/rabbit_networking.erl 2015-12-14 13:41:01.000000000 +0300 ++++ rabbitmq-server-3.6.0/deps/rabbit_common/src/rabbit_networking.erl 2016-03-01 14:51:14.203046564 +0300 +@@ -50,7 +50,7 @@ + -include("rabbit.hrl"). + -include_lib("kernel/include/inet.hrl"). + +--define(FIRST_TEST_BIND_PORT, 10000). ++-define(FIRST_TEST_BIND_PORT, 32768). + + %% POODLE + -define(BAD_SSL_PROTOCOL_VERSIONS, [sslv3]). +Только в rabbitmq-server-3.6.0/deps/rabbit_common/src: rabbit_networking.erl.orig diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 0000000..fd33cc5 --- /dev/null +++ b/debian/patches/series @@ -0,0 +1,4 @@ +rabbitmq-probe-ephemeral-port.patch +native-code-path.patch +zero-deps-systemd-1.patch +erlang-thread-pool-autotune.patch diff --git a/debian/patches/zero-deps-systemd-1.patch b/debian/patches/zero-deps-systemd-1.patch new file mode 100644 index 0000000..8a88842 --- /dev/null +++ b/debian/patches/zero-deps-systemd-1.patch @@ -0,0 +1,216 @@ +Только в rabbitmq-server-3.6.1/docs: rabbitmq-server.service.example +--- a/scripts/rabbitmq-server ++++ b/scripts/rabbitmq-server +@@ -47,7 +47,7 @@ case "$(uname -s)" in + exit $EX_CANTCREAT + fi + if ! echo $$ > ${RABBITMQ_PID_FILE}; then +- # Bettern diagnostics - otherwise the only report in logs is about failed 'echo' ++ # Better diagnostics - otherwise the only report in logs is about failed 'echo' + # command, but without any other details: neither what script has failed nor what + # file output was redirected to. + echo "Failed to write pid file: ${RABBITMQ_PID_FILE}" +@@ -58,8 +58,13 @@ esac + + RABBITMQ_EBIN_ROOT="${RABBITMQ_HOME}/ebin" + ++[ "$NOTIFY_SOCKET" ] && RUNNING_UNDER_SYSTEMD=true ++ + set +e + ++# NOTIFY_SOCKET is needed here to prevent epmd from impersonating the ++# success of our startup sequence to systemd. ++NOTIFY_SOCKET= \ + RABBITMQ_CONFIG_FILE=$RABBITMQ_CONFIG_FILE \ + RABBITMQ_DIST_PORT=$RABBITMQ_DIST_PORT \ + ${ERL_DIR}erl -pa "$RABBITMQ_EBIN_ROOT" \ +@@ -152,7 +157,20 @@ stop_rabbitmq_server() { + fi + } + +-if [ 'x' = "x$RABBITMQ_ALLOW_INPUT" -a -z "$detached" ]; then ++if [ "$RABBITMQ_ALLOW_INPUT" -o "$RUNNING_UNDER_SYSTEMD" -o "$detached" ]; then ++ # Run erlang VM directly, completely replacing current shell ++ # process - so the pid file written in the code above will be ++ # valid (unless detached, which is also handled in the code ++ # above). ++ # ++ # And also this is the correct mode to run the broker under ++ # systemd - there is no need in a proxy process that converts ++ # signals to graceful shutdown command, the unit file should already ++ # contain instructions for graceful shutdown. Also by removing ++ # this additional process we could simply use value returned by ++ # `os:getpid/0` for a systemd ready notification. ++ start_rabbitmq_server "$@" ++else + # When RabbitMQ runs in the foreground but the Erlang shell is + # disabled, we setup signal handlers to stop RabbitMQ properly. This + # is at least useful in the case of Docker. +@@ -161,7 +179,7 @@ if [ 'x' = "x$RABBITMQ_ALLOW_INPUT" -a - + RABBITMQ_SERVER_START_ARGS="${RABBITMQ_SERVER_START_ARGS} +B i" + + # Signal handlers. They all stop RabbitMQ properly (using +- # rabbitmqctl stop). Depending on the signal, this script will exwit ++ # rabbitmqctl stop). Depending on the signal, this script will exit + # with a non-zero error code: + # SIGHUP SIGTERM SIGTSTP + # They are considered a normal process termination, so the script +@@ -177,6 +195,4 @@ if [ 'x' = "x$RABBITMQ_ALLOW_INPUT" -a - + # Block until RabbitMQ exits or a signal is caught. + # Waits for last command (which is start_rabbitmq_server) + wait $! +-else +- start_rabbitmq_server "$@" + fi +--- a/src/rabbit.erl ++++ b/src/rabbit.erl +@@ -284,16 +284,120 @@ broker_start() -> + Plugins = rabbit_plugins:setup(), + ToBeLoaded = Plugins ++ ?APPS, + start_apps(ToBeLoaded), +- case os:type() of +- {win32, _} -> ok; +- _ -> case code:load_file(sd_notify) of +- {module, sd_notify} -> SDNotify = sd_notify, +- SDNotify:sd_notify(0, "READY=1"); +- {error, _} -> os:cmd("systemd-notify --ready") +- end +- end, ++ maybe_sd_notify(), + ok = log_broker_started(rabbit_plugins:active()). + ++%% Try to send systemd ready notification if it makes sense in the ++%% current environment. standard_error is used intentionally in all ++%% logging statements, so all this messages will end in systemd ++%% journal. ++maybe_sd_notify() -> ++ case sd_notify_ready() of ++ false -> ++ io:format(standard_error, "systemd READY notification failed, beware of timeouts~n", []); ++ _ -> ++ ok ++ end. ++ ++sd_notify_ready() -> ++ case {os:type(), os:getenv("NOTIFY_SOCKET")} of ++ {{win32, _}, _} -> ++ true; ++ {_, [_|_]} -> %% Non-empty NOTIFY_SOCKET, give it a try ++ sd_notify_legacy() orelse sd_notify_socat(); ++ _ -> ++ true ++ end. ++ ++sd_notify_data() -> ++ "READY=1\nSTATUS=Initialized\nMAINPID=" ++ os:getpid() ++ "\n". ++ ++sd_notify_legacy() -> ++ case code:load_file(sd_notify) of ++ {module, sd_notify} -> ++ SDNotify = sd_notify, ++ SDNotify:sd_notify(0, sd_notify_data()), ++ true; ++ {error, _} -> ++ false ++ end. ++ ++%% socat(1) is the most portable way the sd_notify could be ++%% implemented in erlang, without introducing some NIF. Currently the ++%% following issues prevent us from implementing it in a more ++%% reasonable way: ++%% - systemd-notify(1) is unstable for non-root users ++%% - erlang doesn't support unix domain sockets. ++%% ++%% Some details on how we ended with such a solution: ++%% https://github.com/rabbitmq/rabbitmq-server/issues/664 ++sd_notify_socat() -> ++ case sd_current_unit() of ++ {ok, Unit} -> ++ io:format(standard_error, "systemd unit for activation check: \"~s\"~n", [Unit]), ++ sd_notify_socat(Unit); ++ _ -> ++ false ++ end. ++ ++socat_socket_arg("@" ++ AbstractUnixSocket) -> ++ "abstract-sendto:" ++ AbstractUnixSocket; ++socat_socket_arg(UnixSocket) -> ++ "unix-sendto:" ++ UnixSocket. ++ ++sd_open_port() -> ++ open_port( ++ {spawn_executable, os:find_executable("socat")}, ++ [{args, [socat_socket_arg(os:getenv("NOTIFY_SOCKET")), "STDIO"]}, ++ use_stdio, out]). ++ ++sd_notify_socat(Unit) -> ++ case sd_open_port() of ++ {'EXIT', Exit} -> ++ io:format(standard_error, "Failed to start socat ~p~n", [Exit]), ++ false; ++ Port -> ++ Port ! {self(), {command, sd_notify_data()}}, ++ Result = sd_wait_activation(Port, Unit), ++ port_close(Port), ++ Result ++ end. ++ ++sd_current_unit() -> ++ case catch re:run(os:cmd("systemctl status " ++ os:getpid()), "([-.@0-9a-zA-Z]+)", [unicode, {capture, all_but_first, list}]) of ++ {'EXIT', _} -> ++ error; ++ {match, [Unit]} -> ++ {ok, Unit}; ++ _ -> ++ error ++ end. ++ ++sd_wait_activation(Port, Unit) -> ++ case os:find_executable("systemctl") of ++ false -> ++ io:format(standard_error, "'systemctl' unavailable, falling back to sleep~n", []), ++ timer:sleep(5000), ++ true; ++ _ -> ++ sd_wait_activation(Port, Unit, 10) ++ end. ++ ++sd_wait_activation(_, _, 0) -> ++ io:format(standard_error, "Service still in 'activating' state, bailing out~n", []), ++ false; ++sd_wait_activation(Port, Unit, AttemptsLeft) -> ++ case os:cmd("systemctl show --property=ActiveState " ++ Unit) of ++ "ActiveState=activating\n" -> ++ timer:sleep(1000), ++ sd_wait_activation(Port, Unit, AttemptsLeft - 1); ++ "ActiveState=" ++ _ -> ++ true; ++ _ = Err-> ++ io:format(standard_error, "Unexpected status from systemd ~p~n", [Err]), ++ false ++ end. ++ + start_it(StartFun) -> + Marker = spawn_link(fun() -> receive stop -> ok end end), + case catch register(rabbit_boot, Marker) of +@@ -332,6 +436,10 @@ stop_and_halt() -> + stop() + after + rabbit_log:info("Halting Erlang VM~n", []), ++ %% Also duplicate this information to stderr, so console where ++ %% foreground broker was running (or systemd journal) will ++ %% contain information about graceful termination. ++ io:format(standard_error, "Gracefully halting Erlang VM~n", []), + init:stop() + end, + ok. +@@ -693,7 +801,8 @@ print_banner() -> + "~n ########## Logs: ~s" + "~n ###### ## ~s" + "~n ##########" +- "~n Starting broker...", ++ "~n Starting broker..." ++ "~n", + [Product, Version, ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE, + log_location(kernel), log_location(sasl)]). + diff --git a/rabbitmq-server/scripts/rabbitmq-server b/rabbitmq-server/scripts/rabbitmq-server index 6538778..548a085 100755 --- a/rabbitmq-server/scripts/rabbitmq-server +++ b/rabbitmq-server/scripts/rabbitmq-server @@ -115,7 +115,6 @@ set -f start_rabbitmq_server() { RABBITMQ_CONFIG_FILE=$RABBITMQ_CONFIG_FILE \ exec ${ERL_DIR}erl \ - -pa /var/lib/rabbitmq/native-code \ -pa ${RABBITMQ_EBIN_ROOT} \ ${RABBITMQ_START_RABBIT} \ ${RABBITMQ_NAME_TYPE} ${RABBITMQ_NODENAME} \ diff --git a/tests/runtests.sh b/tests/runtests.sh index 08a5d16..5178de1 100755 --- a/tests/runtests.sh +++ b/tests/runtests.sh @@ -134,6 +134,41 @@ report-hipe-status() { run-ctl eval "lists:module_info(native)." } +# rabbit may fail to notify systemd about successfull startup, +# which'll result in timeouts and failures. +test-repeated-restart() { + local try_no + + stop-service rabbitmq-server + for try_no in $(seq 1 20); do + start-service rabbitmq-server + stop-service rabbitmq-server + done +} + +# Logrotate should be able to successfully signal rabbit about a need +# to reopen its logs. +test-logrotate-sanity() { + logrotate -f /etc/logrotate.d/rabbitmq-server +} + +validate-erlang-thread-pool-size() { + local pool_size_arg + start-service rabbitmq-server + pool_size_arg=$(ps ax | perl -nE '/-s rabbit boot/ && /^\s*(\d+).*?-A (\d+)/ && say "$2"' | head -n 1) + num_cores=$(erl -boot start_clean -noinput -eval 'io:format("~b", [erlang:system_info(logical_processors_available)]), erlang:halt(0).') + expected=$(($num_cores * 16)) + if [[ $expected -lt 64 ]]; then + expected=64 + elif [[ $expected -gt 1024 ]]; then + expected=1024 + fi + if [[ $pool_size_arg -ne $expected ]]; then + echo "Async pool $pool_size_arg, but expected $expected" + exit 1 + fi +} + case $PACKAGE in rabbitmq-server) install-helper-packages @@ -142,9 +177,12 @@ case $PACKAGE in configure-rabbitmq-server enable-management-plugin start-rabbitmq-server + test-logrotate-sanity rabbitmq-health-check rabbitmq-management-aliveness-test report-hipe-status + validate-erlang-thread-pool-size + test-repeated-restart ;; *) echo "test not defined, skipping...."