From a0b12a27a4fe62876ca74e02dce3b9041561ed9b Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Thu, 9 Nov 2023 13:01:39 +0100 Subject: [PATCH 01/18] Wait for DNS in cets_discovery optionally --- src/cets_discovery.erl | 57 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index f39ab5cf..603366c2 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -90,11 +90,16 @@ join_status := not_running | running, should_retry_join := boolean(), timer_ref := reference() | undefined, - pending_wait_for_ready := [gen_server:from()] + pending_wait_for_ready := [gen_server:from()], + dns_status := dns_status() }. +-type dns_status() :: ready | waiting. +-type dns_inet_family() :: inet | inet6. %% Backend could define its own options --type opts() :: #{name := atom(), _ := _}. +-type opts() :: #{ + name := atom(), wait_for_dns := boolean(), dns_inet_family := dns_inet_family(), _ := _ +}. -type start_result() :: {ok, pid()} | {error, term()}. -type server() :: pid() | atom(). -type system_info() :: map(). @@ -159,6 +164,7 @@ init(Opts) -> BackendState = Mod:init(Opts), %% Changes phase from initial to regular (affects the check interval) erlang:send_after(timer:minutes(5), self(), enter_regular_phase), + DNSStatus = maybe_wait_for_dns_async(Opts), {ok, #{ phase => initial, results => [], @@ -174,7 +180,8 @@ init(Opts) -> join_status => not_running, should_retry_join => false, timer_ref => undefined, - pending_wait_for_ready => [] + pending_wait_for_ready => [], + dns_status => DNSStatus }}. -spec handle_call(term(), from(), state()) -> {reply, term(), state()} | {noreply, state()}. @@ -228,6 +235,9 @@ handle_info({ping_result, Node, Result}, State) -> {noreply, handle_ping_result(Node, Result, State)}; handle_info(enter_regular_phase, State) -> {noreply, State#{phase := regular}}; +handle_info(dns_is_ready, State) -> + self() ! check, + {noreply, State#{dns_status := ready}}; handle_info(Msg, State) -> ?LOG_ERROR(#{what => unexpected_info, msg => Msg}), {noreply, State}. @@ -239,6 +249,8 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. -spec handle_check(state()) -> state(). +handle_check(State = #{dns_status := waiting}) -> + State; handle_check(State = #{tables := []}) -> %% No tables to track, skip State; @@ -454,3 +466,42 @@ has_join_result_for(Node, Table, #{results := Results}) -> -spec handle_system_info(state()) -> system_info(). handle_system_info(State) -> State#{verify_ready => verify_ready(State)}. + +-spec wait_for_dns(dns_inet_family()) -> ok. +wait_for_dns(Family) -> + {node, _Name, Host} = dist_util:split_node(node()), + wait_for_dns(Family, Host, 1). + +wait_for_dns(Family, Host, N) -> + case inet_res:gethostbyname(Host, Family) of + {ok, _} -> + ok; + {error, Reason} -> + case N rem 50 of + 0 -> + ?LOG_WARNING(#{ + what => wait_for_dns, + node => node(), + host => Host, + reason => Reason + }); + _ -> + ok + end, + timer:sleep(100), + wait_for_dns(Family, Host, N + 1) + end. + +-spec maybe_wait_for_dns_async(opts()) -> dns_status(). +maybe_wait_for_dns_async(Opts = #{wait_for_dns := true}) -> + Me = self(), + %% or inet6 + Family = maps:get(dns_inet_family, Opts, inet), + spawn_link(fun() -> + wait_for_dns(Family), + Me ! dns_is_ready + end), + waiting; +maybe_wait_for_dns_async(#{}) -> + %% Skip waiting + ready. From 94ac2ddb30a7830d2390433edbc59966c6e98122 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Thu, 9 Nov 2023 13:11:11 +0100 Subject: [PATCH 02/18] Improve net_adm:ping/1 Do not call net_kernel to connect to a node, if its name is not resolvable Do not disconnect from a node if ping fails --- src/cets_discovery.erl | 2 +- src/cets_ping.erl | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 src/cets_ping.erl diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 603366c2..c97c2632 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -327,7 +327,7 @@ ping_not_connected_nodes(Nodes) -> Self = self(), NotConNodes = Nodes -- [node() | nodes()], [ - spawn(fun() -> Self ! {ping_result, Node, net_adm:ping(Node)} end) + spawn(fun() -> Self ! {ping_result, Node, cets_ping:ping(Node)} end) || Node <- lists:sort(NotConNodes) ], ok. diff --git a/src/cets_ping.erl b/src/cets_ping.erl new file mode 100644 index 00000000..bed41489 --- /dev/null +++ b/src/cets_ping.erl @@ -0,0 +1,44 @@ +-module(cets_ping). +-export([ping/1]). +ping(Node) when is_atom(Node) -> + %% It is important to understand, that initial setup for dist connections + %% is done by the single net_kernel process. + %% It calls net_kernel:setup, which calls inet_tcp_dist, which calls + %% erl_epmd:address_please/3, which does a DNS request. + %% If DNS is slow - net_kernel process would become busy. + %% But if we have a lot of offline nodes in the CETS discovery table, + %% we would try to call net_kernel for each node (even if we probably would receive + %% {error, nxdomain} from erl_epmd:address_please/3). + %% So, we first do nslookup here and only after that we try to connect. + case lists:member(Node, nodes()) of + true -> + pong; + false -> + case dist_util:split_node(Node) of + {node, _Name, Host} -> + case {inet:getaddr(Host, inet6), inet:getaddr(Host, inet)} of + {{error, _}, {error, _}} -> + pang; + _ -> + ping_without_disconnect(Node) + end; + _ -> + pang + end + end. + +%% net_adm:ping/1 but without disconnect_node +%% (because disconnect_node could introduce more chaos and it is not atomic) +ping_without_disconnect(Node) -> + Msg = {is_auth, node()}, + Dst = {net_kernel, Node}, + try gen:call(Dst, '$gen_call', Msg, infinity) of + {ok, yes} -> + pong; + _ -> + % erlang:disconnect_node(Node), + pang + catch + _:_ -> + pang + end. From 68cff831e45101ed229f3af225fed3f1b8dad2e2 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Fri, 10 Nov 2023 11:04:02 +0100 Subject: [PATCH 03/18] Rewrite ping logic - use connect_node there instead Add ping_pairs function ping_pairs does not use net_adm:ping anymore --- src/cets_join.erl | 11 +---------- src/cets_ping.erl | 50 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/cets_join.erl b/src/cets_join.erl index 28a01394..ad449d6d 100644 --- a/src/cets_join.erl +++ b/src/cets_join.erl @@ -207,16 +207,7 @@ check_could_reach_each_other(Info, LocPids, RemPids) -> {min(LocNode, RemNode), max(LocNode, RemNode)} || LocNode <- LocNodes, RemNode <- RemNodes, LocNode =/= RemNode ]), - Results = - [ - {Node1, Node2, - cets_long:run_tracked( - #{task => ping_node, node1 => Node1, node2 => Node2}, fun() -> - rpc:call(Node1, net_adm, ping, [Node2], 10000) - end - )} - || {Node1, Node2} <- Pairs - ], + Results = cets_ping:ping_pairs(Pairs), NotConnected = [X || {_Node1, _Node2, Res} = X <- Results, Res =/= pong], case NotConnected of [] -> diff --git a/src/cets_ping.erl b/src/cets_ping.erl index bed41489..6fb86608 100644 --- a/src/cets_ping.erl +++ b/src/cets_ping.erl @@ -1,5 +1,5 @@ -module(cets_ping). --export([ping/1]). +-export([ping/1, ping_pairs/1]). ping(Node) when is_atom(Node) -> %% It is important to understand, that initial setup for dist connections %% is done by the single net_kernel process. @@ -20,25 +20,49 @@ ping(Node) when is_atom(Node) -> {{error, _}, {error, _}} -> pang; _ -> - ping_without_disconnect(Node) + connect_ping(Node) end; _ -> pang end end. -%% net_adm:ping/1 but without disconnect_node -%% (because disconnect_node could introduce more chaos and it is not atomic) -ping_without_disconnect(Node) -> - Msg = {is_auth, node()}, - Dst = {net_kernel, Node}, - try gen:call(Dst, '$gen_call', Msg, infinity) of - {ok, yes} -> +connect_ping(Node) -> + %% We could use net_adm:ping/1 but it does: + %% - disconnect node on pang - we don't want that + %% (because it could disconnect already connected node because of race conditions) + %% - it calls net_kernel's gen_server of the remote server, + %% but it could be busy doing something, + %% which means slower response time. + case net_kernel:connect_node(Node) of + true -> pong; _ -> - % erlang:disconnect_node(Node), - pang - catch - _:_ -> pang end. + +-spec ping_pairs([{node(), node()}]) -> [{node(), node(), pong | Reason :: term()}]. +ping_pairs(Pairs) -> + %% We could use rpc:multicall(Nodes, cets_ping, ping, Args). + %% But it means more chance of nodes trying to contact each other. + ping_pairs_stop_on_pang(Pairs). + +ping_pairs_stop_on_pang([{Node1, Node2} | Pairs]) -> + F = fun() -> rpc:call(Node1, cets_ping, ping, [Node2], 10000) end, + Info = #{task => ping_node, node1 => Node1, node2 => Node2}, + Res = cets_long:run_tracked(Info, F), + case Res of + pong -> + [{Node1, Node2, pong} | ping_pairs_stop_on_pang(Pairs)]; + Other -> + %% We do not need to ping the rest of nodes - + %% one node returning pang is enough to cancel join. + %% We could exit earlier and safe some time + %% (connect_node to the dead node could be time consuming) + [{Node1, Node2, Other} | fail_pairs(Pairs, skipped)] + end; +ping_pairs_stop_on_pang([]) -> + []. + +fail_pairs(Pairs, Reason) -> + [{Node1, Node2, Reason} || {Node1, Node2} <- Pairs]. From 4b31db045a72424df73d56dcd202250b3d9f4e91 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Fri, 10 Nov 2023 15:16:24 +0100 Subject: [PATCH 04/18] Report if it took long time to get the node name resolvable --- src/cets_discovery.erl | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index c97c2632..92330a29 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -470,12 +470,27 @@ handle_system_info(State) -> -spec wait_for_dns(dns_inet_family()) -> ok. wait_for_dns(Family) -> {node, _Name, Host} = dist_util:split_node(node()), - wait_for_dns(Family, Host, 1). + {ok, N} = wait_for_dns(Family, Host, 1), + case N > 50 of + true -> + %% Report if it took long time to get the node name resolvable + ?LOG_WARNING(#{ + what => dns_is_ready, + text => <<"Successfully resolved our node name after a long waiting">>, + node => node(), + host => Host, + tries => N + }); + false -> + ok + end, + ok. + wait_for_dns(Family, Host, N) -> case inet_res:gethostbyname(Host, Family) of {ok, _} -> - ok; + {ok, N}; {error, Reason} -> case N rem 50 of 0 -> @@ -483,7 +498,8 @@ wait_for_dns(Family, Host, N) -> what => wait_for_dns, node => node(), host => Host, - reason => Reason + reason => Reason, + tries => N }); _ -> ok From 35fd6635ffb5e98d2a435945cbd0800d13064f66 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Fri, 10 Nov 2023 15:26:15 +0100 Subject: [PATCH 05/18] Do check_could_reach_each_other before check_fully_connected Do pings before sending any messages to new nodes --- src/cets_discovery.erl | 5 ++--- src/cets_join.erl | 4 ++-- test/cets_SUITE.erl | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 92330a29..31bedae2 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -481,11 +481,10 @@ wait_for_dns(Family) -> host => Host, tries => N }); - false -> - ok + false -> + ok end, ok. - wait_for_dns(Family, Host, N) -> case inet_res:gethostbyname(Host, Family) of diff --git a/src/cets_join.erl b/src/cets_join.erl index ad449d6d..95b63387 100644 --- a/src/cets_join.erl +++ b/src/cets_join.erl @@ -195,9 +195,9 @@ get_pids(Pid) -> check_pids(Info, LocPids, RemPids, JoinOpts) -> check_do_not_overlap(Info, LocPids, RemPids), checkpoint(before_check_fully_connected, JoinOpts), + check_could_reach_each_other(Info, LocPids, RemPids), check_fully_connected(Info, LocPids), - check_fully_connected(Info, RemPids), - check_could_reach_each_other(Info, LocPids, RemPids). + check_fully_connected(Info, RemPids). -spec check_could_reach_each_other(cets_long:log_info(), cets:servers(), cets:servers()) -> ok. check_could_reach_each_other(Info, LocPids, RemPids) -> diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl index 2ea8cf76..c527354f 100644 --- a/test/cets_SUITE.erl +++ b/test/cets_SUITE.erl @@ -2087,8 +2087,7 @@ joining_not_fully_connected_node_is_not_allowed(Config) -> %% Pid5 and Pid3 could contact each other. %% Pid3 could contact Pid1 (they are joined). %% But Pid5 cannot contact Pid1. - {error, - {task_failed, {{nodedown, Node1}, {gen_server, call, [_, other_servers, infinity]}}, _}} = + {error, {task_failed, check_could_reach_each_other_failed, _}} = rpc(Peer5, cets_join, join, [lock_name(Config), #{}, Pid5, Pid3]), %% Still connected cets:insert(Pid1, {r1}), From 539f9036d99dba2c78ee20389b0673b193c26614 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Tue, 14 Nov 2023 10:07:01 +0100 Subject: [PATCH 06/18] Report transaction retry on info level, not error --- src/cets_join.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cets_join.erl b/src/cets_join.erl index 95b63387..b63974c5 100644 --- a/src/cets_join.erl +++ b/src/cets_join.erl @@ -84,10 +84,12 @@ join_loop(LockKey, Info, LocalPid, RemotePid, Start, JoinOpts) -> %% Just lock all nodes, no magic here :) Nodes = [node() | nodes()], Retries = 1, + %% global could abort the transaction when one of the nodes goes down. + %% It could usually abort it during startup or update. case global:trans(LockRequest, F, Nodes, Retries) of aborted -> checkpoint(before_retry, JoinOpts), - ?LOG_ERROR(Info#{what => join_retry, reason => lock_aborted}), + ?LOG_INFO(Info#{what => join_retry, reason => lock_aborted}), join_loop(LockKey, Info, LocalPid, RemotePid, Start, JoinOpts); Result -> Result From 68224d80d605e2d6064bfe5b1670dd6680763da3 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Tue, 14 Nov 2023 10:28:45 +0100 Subject: [PATCH 07/18] Log nodeup/nodedown events with warning level --- src/cets_discovery.erl | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 31bedae2..3032d0bd 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -91,10 +91,12 @@ should_retry_join := boolean(), timer_ref := reference() | undefined, pending_wait_for_ready := [gen_server:from()], - dns_status := dns_status() + dns_status := dns_status(), + nodeup_timestamps := #{node() => milliseconds()} }. -type dns_status() :: ready | waiting. -type dns_inet_family() :: inet | inet6. +-type milliseconds() :: integer(). %% Backend could define its own options -type opts() :: #{ @@ -165,7 +167,7 @@ init(Opts) -> %% Changes phase from initial to regular (affects the check interval) erlang:send_after(timer:minutes(5), self(), enter_regular_phase), DNSStatus = maybe_wait_for_dns_async(Opts), - {ok, #{ + State = #{ phase => initial, results => [], nodes => [], @@ -181,8 +183,13 @@ init(Opts) -> should_retry_join => false, timer_ref => undefined, pending_wait_for_ready => [], - dns_status => DNSStatus - }}. + dns_status => DNSStatus, + nodeup_timestamps => #{} + }, + %% Set initial timestamps because we would not receive nodeup events for + %% already connected nodes + State2 = lists:foldl(fun remember_nodeup_timestamp/2, State, nodes()), + {ok, State2}. -spec handle_call(term(), from(), state()) -> {reply, term(), state()} | {noreply, state()}. handle_call(get_tables, _From, State = #{tables := Tables}) -> @@ -223,12 +230,15 @@ handle_info(check, State) -> handle_info({handle_check_result, Result, BackendState}, State) -> {noreply, handle_get_nodes_result(Result, BackendState, State)}; handle_info({nodeup, Node}, State) -> + ?LOG_WARNING(#{what => nodeup, remote_node => Node}), State2 = remove_node_from_unavailable_list(Node, State), - {noreply, try_joining(State2)}; -handle_info({nodedown, _Node}, State) -> + {noreply, remember_nodeup_timestamp(Node, try_joining(State2))}; +handle_info({nodedown, Node}, State) -> + {NodeUpTime, State2} = remove_nodeup_timestamp(Node, State), + ?LOG_WARNING(#{what => nodedown, remote_node => Node, connected_for_milliseconds => NodeUpTime}), %% Do another check to update unavailable_nodes list self() ! check, - {noreply, State}; + {noreply, State2}; handle_info({joining_finished, Results}, State) -> {noreply, handle_joining_finished(Results, State)}; handle_info({ping_result, Node, Result}, State) -> @@ -520,3 +530,20 @@ maybe_wait_for_dns_async(Opts = #{wait_for_dns := true}) -> maybe_wait_for_dns_async(#{}) -> %% Skip waiting ready. + +remember_nodeup_timestamp(Node, State = #{nodeup_timestamps := Map}) -> + Time = erlang:system_time(millisecond), + Map2 = Map#{Node => Time}, + State#{nodeup_timestamps := Map2}. + +remove_nodeup_timestamp(Node, State = #{nodeup_timestamps := Map}) -> + StartTime = maps:get(Node, Map, unknown), + NodeUpTime = calculate_uptime(StartTime), + Map2 = maps:remove(Node, State), + {NodeUpTime, Map2}. + +calculate_uptime(unknown) -> + unknown; +calculate_uptime(StartTime) -> + Time = erlang:system_time(millisecond), + Time - StartTime. From b9f91bfcff5bd5b00942d534565b817de9fc0029 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Tue, 14 Nov 2023 11:33:40 +0100 Subject: [PATCH 08/18] Report downtime duration --- src/cets_discovery.erl | 45 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 3032d0bd..12ea3233 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -92,7 +92,8 @@ timer_ref := reference() | undefined, pending_wait_for_ready := [gen_server:from()], dns_status := dns_status(), - nodeup_timestamps := #{node() => milliseconds()} + nodeup_timestamps := #{node() => milliseconds()}, + nodedown_timestamps := #{node() => milliseconds()} }. -type dns_status() :: ready | waiting. -type dns_inet_family() :: inet | inet6. @@ -184,7 +185,8 @@ init(Opts) -> timer_ref => undefined, pending_wait_for_ready => [], dns_status => DNSStatus, - nodeup_timestamps => #{} + nodeup_timestamps => #{}, + nodedown_timestamps => #{} }, %% Set initial timestamps because we would not receive nodeup events for %% already connected nodes @@ -230,12 +232,17 @@ handle_info(check, State) -> handle_info({handle_check_result, Result, BackendState}, State) -> {noreply, handle_get_nodes_result(Result, BackendState, State)}; handle_info({nodeup, Node}, State) -> - ?LOG_WARNING(#{what => nodeup, remote_node => Node}), - State2 = remove_node_from_unavailable_list(Node, State), - {noreply, remember_nodeup_timestamp(Node, try_joining(State2))}; + {NodeDownTime, State2} = handle_nodeup(Node, State), + ?LOG_WARNING(#{ + what => nodeup, remote_node => Node, downtime_millisecond_duration => NodeDownTime + }), + State3 = remove_node_from_unavailable_list(Node, State2), + {noreply, try_joining(State3)}; handle_info({nodedown, Node}, State) -> - {NodeUpTime, State2} = remove_nodeup_timestamp(Node, State), - ?LOG_WARNING(#{what => nodedown, remote_node => Node, connected_for_milliseconds => NodeUpTime}), + {NodeUpTime, State2} = handle_nodedown(Node, State), + ?LOG_WARNING(#{ + what => nodedown, remote_node => Node, connected_millisecond_duration => NodeUpTime + }), %% Do another check to update unavailable_nodes list self() ! check, {noreply, State2}; @@ -531,19 +538,41 @@ maybe_wait_for_dns_async(#{}) -> %% Skip waiting ready. +handle_nodedown(Node, State) -> + State2 = remember_nodedown_timestamp(Node, State), + remove_nodeup_timestamp(Node, State2). + +handle_nodeup(Node, State) -> + State2 = remember_nodeup_timestamp(Node, State), + {get_downtime(Node, State2), State2}. + remember_nodeup_timestamp(Node, State = #{nodeup_timestamps := Map}) -> Time = erlang:system_time(millisecond), Map2 = Map#{Node => Time}, State#{nodeup_timestamps := Map2}. +remember_nodedown_timestamp(Node, State = #{nodedown_timestamps := Map}) -> + Time = erlang:system_time(millisecond), + Map2 = Map#{Node => Time}, + State#{nodedown_timestamps := Map2}. + remove_nodeup_timestamp(Node, State = #{nodeup_timestamps := Map}) -> StartTime = maps:get(Node, Map, unknown), NodeUpTime = calculate_uptime(StartTime), Map2 = maps:remove(Node, State), - {NodeUpTime, Map2}. + {NodeUpTime, State#{nodeup_timestamps := Map2}}. calculate_uptime(unknown) -> unknown; calculate_uptime(StartTime) -> Time = erlang:system_time(millisecond), Time - StartTime. + +get_downtime(Node, #{nodedown_timestamps := Map}) -> + case maps:get(Node, Map, unknown) of + unknown -> + unknown; + WentDown -> + Time = erlang:system_time(millisecond), + Time - WentDown + end. From faf71a7386fc309e34e46fbf4cc933f153664138 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Tue, 14 Nov 2023 12:02:38 +0100 Subject: [PATCH 09/18] Use undefined instead of unknown for timestaps in logs --- src/cets_discovery.erl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 12ea3233..02d04c2a 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -557,21 +557,21 @@ remember_nodedown_timestamp(Node, State = #{nodedown_timestamps := Map}) -> State#{nodedown_timestamps := Map2}. remove_nodeup_timestamp(Node, State = #{nodeup_timestamps := Map}) -> - StartTime = maps:get(Node, Map, unknown), + StartTime = maps:get(Node, Map, undefined), NodeUpTime = calculate_uptime(StartTime), Map2 = maps:remove(Node, State), {NodeUpTime, State#{nodeup_timestamps := Map2}}. -calculate_uptime(unknown) -> - unknown; +calculate_uptime(undefined) -> + undefined; calculate_uptime(StartTime) -> Time = erlang:system_time(millisecond), Time - StartTime. get_downtime(Node, #{nodedown_timestamps := Map}) -> - case maps:get(Node, Map, unknown) of - unknown -> - unknown; + case maps:get(Node, Map, undefined) of + undefined -> + undefined; WentDown -> Time = erlang:system_time(millisecond), Time - WentDown From 29c5ccc2744bba66c3b9841dcdff7db0d465dd87 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Tue, 14 Nov 2023 12:18:07 +0100 Subject: [PATCH 10/18] Skip undefined timestamps in logs --- src/cets_discovery.erl | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 02d04c2a..c230ab4b 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -233,16 +233,20 @@ handle_info({handle_check_result, Result, BackendState}, State) -> {noreply, handle_get_nodes_result(Result, BackendState, State)}; handle_info({nodeup, Node}, State) -> {NodeDownTime, State2} = handle_nodeup(Node, State), - ?LOG_WARNING(#{ - what => nodeup, remote_node => Node, downtime_millisecond_duration => NodeDownTime - }), + ?LOG_WARNING( + set_defined(downtime_millisecond_duration, NodeDownTime, #{ + what => nodeup, remote_node => Node + }) + ), State3 = remove_node_from_unavailable_list(Node, State2), {noreply, try_joining(State3)}; handle_info({nodedown, Node}, State) -> {NodeUpTime, State2} = handle_nodedown(Node, State), - ?LOG_WARNING(#{ - what => nodedown, remote_node => Node, connected_millisecond_duration => NodeUpTime - }), + ?LOG_WARNING( + set_defined(connected_millisecond_duration, NodeUpTime, #{ + what => nodedown, remote_node => Node + }) + ), %% Do another check to update unavailable_nodes list self() ! check, {noreply, State2}; @@ -576,3 +580,8 @@ get_downtime(Node, #{nodedown_timestamps := Map}) -> Time = erlang:system_time(millisecond), Time - WentDown end. + +set_defined(_Key, undefined, Map) -> + Map; +set_defined(Key, Value, Map) -> + Map#{Key => Value}. From 3701aeb14aae01c45a85c122a9b8f07dd7a06f01 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Tue, 14 Nov 2023 12:42:35 +0100 Subject: [PATCH 11/18] Report alive nodes count on nodeup/nodedown --- src/cets_discovery.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index c230ab4b..40628438 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -235,7 +235,7 @@ handle_info({nodeup, Node}, State) -> {NodeDownTime, State2} = handle_nodeup(Node, State), ?LOG_WARNING( set_defined(downtime_millisecond_duration, NodeDownTime, #{ - what => nodeup, remote_node => Node + what => nodeup, remote_node => Node, alive_nodes => length(nodes()) + 1 }) ), State3 = remove_node_from_unavailable_list(Node, State2), @@ -244,7 +244,7 @@ handle_info({nodedown, Node}, State) -> {NodeUpTime, State2} = handle_nodedown(Node, State), ?LOG_WARNING( set_defined(connected_millisecond_duration, NodeUpTime, #{ - what => nodedown, remote_node => Node + what => nodedown, remote_node => Node, alive_nodes => length(nodes()) + 1 }) ), %% Do another check to update unavailable_nodes list From 4290e4258a837c97991affac1c562dbc9eacb7cf Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Tue, 14 Nov 2023 13:03:02 +0100 Subject: [PATCH 12/18] Report time_since_startup_in_milliseconds for nodeup/nodedown events --- src/cets_discovery.erl | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 40628438..d91daf11 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -93,7 +93,8 @@ pending_wait_for_ready := [gen_server:from()], dns_status := dns_status(), nodeup_timestamps := #{node() => milliseconds()}, - nodedown_timestamps := #{node() => milliseconds()} + nodedown_timestamps := #{node() => milliseconds()}, + start_time := milliseconds() }. -type dns_status() :: ready | waiting. -type dns_inet_family() :: inet | inet6. @@ -159,6 +160,7 @@ wait_for_ready(Server, Timeout) -> -spec init(term()) -> {ok, state()}. init(Opts) -> + StartTime = erlang:system_time(millisecond), %% Sends nodeup / nodedown ok = net_kernel:monitor_nodes(true), Mod = maps:get(backend_module, Opts, cets_discovery_file), @@ -186,7 +188,8 @@ init(Opts) -> pending_wait_for_ready => [], dns_status => DNSStatus, nodeup_timestamps => #{}, - nodedown_timestamps => #{} + nodedown_timestamps => #{}, + start_time => StartTime }, %% Set initial timestamps because we would not receive nodeup events for %% already connected nodes @@ -235,7 +238,12 @@ handle_info({nodeup, Node}, State) -> {NodeDownTime, State2} = handle_nodeup(Node, State), ?LOG_WARNING( set_defined(downtime_millisecond_duration, NodeDownTime, #{ - what => nodeup, remote_node => Node, alive_nodes => length(nodes()) + 1 + what => nodeup, + remote_node => Node, + alive_nodes => length(nodes()) + 1, + %% We report that time so we could work on minimizing that time. + %% It says how long it took to discover nodes after startup. + time_since_startup_in_milliseconds => time_since_startup_in_milliseconds(State) }) ), State3 = remove_node_from_unavailable_list(Node, State2), @@ -244,7 +252,10 @@ handle_info({nodedown, Node}, State) -> {NodeUpTime, State2} = handle_nodedown(Node, State), ?LOG_WARNING( set_defined(connected_millisecond_duration, NodeUpTime, #{ - what => nodedown, remote_node => Node, alive_nodes => length(nodes()) + 1 + what => nodedown, + remote_node => Node, + alive_nodes => length(nodes()) + 1, + time_since_startup_in_milliseconds => time_since_startup_in_milliseconds(State) }) ), %% Do another check to update unavailable_nodes list @@ -569,19 +580,23 @@ remove_nodeup_timestamp(Node, State = #{nodeup_timestamps := Map}) -> calculate_uptime(undefined) -> undefined; calculate_uptime(StartTime) -> - Time = erlang:system_time(millisecond), - Time - StartTime. + time_since(StartTime). get_downtime(Node, #{nodedown_timestamps := Map}) -> case maps:get(Node, Map, undefined) of undefined -> undefined; WentDown -> - Time = erlang:system_time(millisecond), - Time - WentDown + time_since(WentDown) end. set_defined(_Key, undefined, Map) -> Map; set_defined(Key, Value, Map) -> Map#{Key => Value}. + +time_since_startup_in_milliseconds(#{start_time := StartTime}) -> + time_since(StartTime). + +time_since(StartTime) -> + erlang:system_time(millisecond) - StartTime. From 55053e8c149294873ddd4739689b36b009dec2ea Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Thu, 16 Nov 2023 11:17:55 +0100 Subject: [PATCH 13/18] Use epmd instead of resolver to test if node is pingable --- src/cets_discovery.erl | 2 +- src/cets_ping.erl | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index d91daf11..a9dadeae 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -545,7 +545,7 @@ maybe_wait_for_dns_async(Opts = #{wait_for_dns := true}) -> %% or inet6 Family = maps:get(dns_inet_family, Opts, inet), spawn_link(fun() -> - wait_for_dns(Family), + % wait_for_dns(Family), Me ! dns_is_ready end), waiting; diff --git a/src/cets_ping.erl b/src/cets_ping.erl index 6fb86608..300e4416 100644 --- a/src/cets_ping.erl +++ b/src/cets_ping.erl @@ -15,8 +15,11 @@ ping(Node) when is_atom(Node) -> pong; false -> case dist_util:split_node(Node) of - {node, _Name, Host} -> - case {inet:getaddr(Host, inet6), inet:getaddr(Host, inet)} of + {node, Name, Host} -> + Epmd = net_kernel:epmd_module(), + V4 = Epmd:address_please(Name, Host, inet), + V6 = Epmd:address_please(Name, Host, inet6), + case {V4, V6} of {{error, _}, {error, _}} -> pang; _ -> From 425c92380c1c5a1d3857cbdbda3c4bb948195258 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Thu, 16 Nov 2023 13:11:32 +0100 Subject: [PATCH 14/18] Do not report long task failing with reason stop --- src/cets_long.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cets_long.erl b/src/cets_long.erl index dce536fb..bf2ccc63 100644 --- a/src/cets_long.erl +++ b/src/cets_long.erl @@ -91,6 +91,9 @@ run_monitor(Info, Ref, Parent, Start) -> monitor_loop(Mon, Info, Parent, Start, Interval) -> receive + {'DOWN', _MonRef, process, _Pid, stop} -> + %% Special case, the long task is stopped using exit(Pid, stop) + ok; {'DOWN', MonRef, process, _Pid, Reason} when Mon =:= MonRef -> ?LOG_ERROR(Info#{ what => task_failed, From 7897c6c9b25c84aeaddff39765bf75b4aabb9e7a Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Thu, 16 Nov 2023 15:55:29 +0100 Subject: [PATCH 15/18] Log warning if same node reconnects --- src/cets_discovery.erl | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index a9dadeae..68319284 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -94,6 +94,7 @@ dns_status := dns_status(), nodeup_timestamps := #{node() => milliseconds()}, nodedown_timestamps := #{node() => milliseconds()}, + node_start_timestamps := #{node() => milliseconds()}, start_time := milliseconds() }. -type dns_status() :: ready | waiting. @@ -188,6 +189,7 @@ init(Opts) -> pending_wait_for_ready => [], dns_status => DNSStatus, nodeup_timestamps => #{}, + node_start_timestamps => #{}, nodedown_timestamps => #{}, start_time => StartTime }, @@ -258,9 +260,12 @@ handle_info({nodedown, Node}, State) -> time_since_startup_in_milliseconds => time_since_startup_in_milliseconds(State) }) ), + send_start_time_to(Node, State), %% Do another check to update unavailable_nodes list self() ! check, {noreply, State2}; +handle_info({start_time, Node, StartTime}, State) -> + {noreply, handle_receive_start_time(Node, StartTime, State)}; handle_info({joining_finished, Results}, State) -> {noreply, handle_joining_finished(Results, State)}; handle_info({ping_result, Node, Result}, State) -> @@ -600,3 +605,28 @@ time_since_startup_in_milliseconds(#{start_time := StartTime}) -> time_since(StartTime) -> erlang:system_time(millisecond) - StartTime. + +send_start_time_to(Node, #{start_time := StartTime}) -> + case erlang:process_info(self(), registered_name) of + {registered_name, Name} -> + erlang:send({Name, Node}, {start_time, node(), StartTime}); + _ -> + ok + end. + +handle_receive_start_time(Node, StartTime, State = #{node_start_timestamps := Map}) -> + case maps:get(Node, Map, undefined) of + undefined -> + ok; + StartTime -> + ?LOG_WARNING(#{ + what => node_reconnects, + remote_node => Node, + start_time => StartTime, + text => <<"Netsplit recovery. The remote node has been connected to us before.">> + }); + _ -> + %% Restarted node reconnected, this is fine during the rolling updates + ok + end, + State#{node_start_timestamps := maps:put(Node, StartTime, Map)}. From 6de3fe244a86674da8e83e15ec8d0adfd3c8f4eb Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Fri, 17 Nov 2023 09:55:21 +0100 Subject: [PATCH 16/18] Remove wait_for_dns --- src/cets_discovery.erl | 66 +----------------------------------------- 1 file changed, 1 insertion(+), 65 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 68319284..09544896 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -91,19 +91,16 @@ should_retry_join := boolean(), timer_ref := reference() | undefined, pending_wait_for_ready := [gen_server:from()], - dns_status := dns_status(), nodeup_timestamps := #{node() => milliseconds()}, nodedown_timestamps := #{node() => milliseconds()}, node_start_timestamps := #{node() => milliseconds()}, start_time := milliseconds() }. --type dns_status() :: ready | waiting. --type dns_inet_family() :: inet | inet6. -type milliseconds() :: integer(). %% Backend could define its own options -type opts() :: #{ - name := atom(), wait_for_dns := boolean(), dns_inet_family := dns_inet_family(), _ := _ + name := atom(), _ := _ }. -type start_result() :: {ok, pid()} | {error, term()}. -type server() :: pid() | atom(). @@ -170,7 +167,6 @@ init(Opts) -> BackendState = Mod:init(Opts), %% Changes phase from initial to regular (affects the check interval) erlang:send_after(timer:minutes(5), self(), enter_regular_phase), - DNSStatus = maybe_wait_for_dns_async(Opts), State = #{ phase => initial, results => [], @@ -187,7 +183,6 @@ init(Opts) -> should_retry_join => false, timer_ref => undefined, pending_wait_for_ready => [], - dns_status => DNSStatus, nodeup_timestamps => #{}, node_start_timestamps => #{}, nodedown_timestamps => #{}, @@ -272,9 +267,6 @@ handle_info({ping_result, Node, Result}, State) -> {noreply, handle_ping_result(Node, Result, State)}; handle_info(enter_regular_phase, State) -> {noreply, State#{phase := regular}}; -handle_info(dns_is_ready, State) -> - self() ! check, - {noreply, State#{dns_status := ready}}; handle_info(Msg, State) -> ?LOG_ERROR(#{what => unexpected_info, msg => Msg}), {noreply, State}. @@ -286,8 +278,6 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. -spec handle_check(state()) -> state(). -handle_check(State = #{dns_status := waiting}) -> - State; handle_check(State = #{tables := []}) -> %% No tables to track, skip State; @@ -504,60 +494,6 @@ has_join_result_for(Node, Table, #{results := Results}) -> handle_system_info(State) -> State#{verify_ready => verify_ready(State)}. --spec wait_for_dns(dns_inet_family()) -> ok. -wait_for_dns(Family) -> - {node, _Name, Host} = dist_util:split_node(node()), - {ok, N} = wait_for_dns(Family, Host, 1), - case N > 50 of - true -> - %% Report if it took long time to get the node name resolvable - ?LOG_WARNING(#{ - what => dns_is_ready, - text => <<"Successfully resolved our node name after a long waiting">>, - node => node(), - host => Host, - tries => N - }); - false -> - ok - end, - ok. - -wait_for_dns(Family, Host, N) -> - case inet_res:gethostbyname(Host, Family) of - {ok, _} -> - {ok, N}; - {error, Reason} -> - case N rem 50 of - 0 -> - ?LOG_WARNING(#{ - what => wait_for_dns, - node => node(), - host => Host, - reason => Reason, - tries => N - }); - _ -> - ok - end, - timer:sleep(100), - wait_for_dns(Family, Host, N + 1) - end. - --spec maybe_wait_for_dns_async(opts()) -> dns_status(). -maybe_wait_for_dns_async(Opts = #{wait_for_dns := true}) -> - Me = self(), - %% or inet6 - Family = maps:get(dns_inet_family, Opts, inet), - spawn_link(fun() -> - % wait_for_dns(Family), - Me ! dns_is_ready - end), - waiting; -maybe_wait_for_dns_async(#{}) -> - %% Skip waiting - ready. - handle_nodedown(Node, State) -> State2 = remember_nodedown_timestamp(Node, State), remove_nodeup_timestamp(Node, State2). From 053f241f9882c1f65d8eade4805666bed423b522 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Fri, 17 Nov 2023 16:56:39 +0100 Subject: [PATCH 17/18] Add node_down_history --- src/cets.erl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/cets.erl b/src/cets.erl index 54340953..cd6260f2 100644 --- a/src/cets.erl +++ b/src/cets.erl @@ -130,7 +130,8 @@ is_leader := boolean(), opts := start_opts(), backlog := [backlog_entry()], - pause_monitors := [pause_monitor()] + pause_monitors := [pause_monitor()], + node_down_history := [node()] }. -type long_msg() :: @@ -154,7 +155,8 @@ memory := non_neg_integer(), ack_pid := ack_pid(), join_ref := join_ref(), - opts := start_opts() + opts := start_opts(), + node_down_history := [node()] }. -type handle_down_fun() :: fun((#{remote_pid := server_pid(), table := table_name()}) -> ok). @@ -417,7 +419,8 @@ init({Tab, Opts}) -> is_leader => true, opts => Opts, backlog => [], - pause_monitors => [] + pause_monitors => [], + node_down_history => [] }}. -spec handle_call(long_msg() | {op, op()}, from(), state()) -> @@ -524,7 +527,7 @@ handle_down2(RemotePid, State = #{other_servers := Servers, ack_pid := AckPid}) cets_ack:send_remote_down(AckPid, RemotePid), call_user_handle_down(RemotePid, State), Servers2 = lists:delete(RemotePid, Servers), - set_other_servers(Servers2, State); + update_node_down_history(RemotePid, set_other_servers(Servers2, State)); false -> %% This should not happen ?LOG_ERROR(#{ @@ -535,6 +538,9 @@ handle_down2(RemotePid, State = #{other_servers := Servers, ack_pid := AckPid}) State end. +update_node_down_history(RemotePid, State = #{node_down_history := History}) -> + State#{node_down_history := [node(RemotePid) | History]}. + %% Merge two lists of pids, create the missing monitors. -spec add_servers(Servers, Servers) -> Servers when Servers :: servers(). add_servers(Pids, Servers) -> @@ -742,6 +748,7 @@ handle_get_info( other_servers := Servers, ack_pid := AckPid, join_ref := JoinRef, + node_down_history := DownHistory, opts := Opts } ) -> @@ -752,6 +759,7 @@ handle_get_info( memory => ets:info(Tab, memory), ack_pid => AckPid, join_ref => JoinRef, + node_down_history => DownHistory, opts => Opts }. From c8d67802d2b0fcb7a7c4b4322ab8d3a5267b6612 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Fri, 17 Nov 2023 21:10:07 +0100 Subject: [PATCH 18/18] Sort available nodes in the status API --- src/cets_status.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cets_status.erl b/src/cets_status.erl index 36b29e14..6b8a7865 100644 --- a/src/cets_status.erl +++ b/src/cets_status.erl @@ -64,7 +64,7 @@ gather_data(Disco) -> ThisNode = node(), Info = cets_discovery:system_info(Disco), #{tables := Tables} = Info, - OnlineNodes = [ThisNode | nodes()], + OnlineNodes = lists:sort([ThisNode | nodes()]), AvailNodes = available_nodes(Disco, OnlineNodes), Expected = get_local_table_to_other_nodes_map(Tables), OtherTabNodes = get_node_to_tab_nodes_map(AvailNodes, Disco),