From fd9b49b8657438c320ab571a102e867acaa04e1d Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Tue, 5 Nov 2024 15:09:52 +0200 Subject: [PATCH] Streaming re-organization (#18941) split streaming into multiple files --- CMakeLists.txt | 58 +- src/daemon/analytics.c | 4 +- src/daemon/analytics.h | 1 + src/daemon/common.h | 1 - src/daemon/{common.c => h2o-common.c} | 0 src/daemon/main.c | 2 +- src/database/contexts/api_v2_contexts.c | 2 +- .../contexts/api_v2_contexts_agents.c | 2 +- src/database/engine/dbengine-stresstest.c | 14 +- src/database/engine/dbengine-unittest.c | 14 +- src/database/rrd.h | 82 +- src/database/rrdhost.c | 249 +--- src/plugins.d/pluginsd_parser.c | 14 +- src/streaming/{common.h => h2o-common.h} | 0 .../protocol/command-begin-set-end.c | 126 ++ .../protocol/command-chart-definition.c | 206 +++ src/streaming/protocol/command-function.c | 20 + src/streaming/protocol/command-host-labels.c | 25 + .../protocol/command-host-variables.c | 52 + src/streaming/protocol/commands.c | 55 + src/streaming/protocol/commands.h | 27 + src/streaming/receiver.c | 477 ++++++- src/streaming/receiver.h | 93 ++ src/streaming/replication.h | 2 + src/streaming/rrdhost-status.c | 355 +++++ src/streaming/rrdhost-status.h | 161 +++ src/streaming/rrdpush.c | 1258 ----------------- src/streaming/rrdpush.h | 689 +-------- .../{sender_commit.c => sender-commit.c} | 2 +- .../{sender_connect.c => sender-connect.c} | 71 +- src/streaming/sender-destinations.c | 143 ++ src/streaming/sender-destinations.h | 38 + .../{sender_execute.c => sender-execute.c} | 2 +- ...{sender_internals.h => sender-internals.h} | 6 +- src/streaming/sender.c | 140 +- src/streaming/sender.h | 169 +++ ...m_capabilities.c => stream-capabilities.c} | 0 ...m_capabilities.h => stream-capabilities.h} | 0 .../brotli.c} | 2 +- .../brotli.h} | 0 .../{ => stream-compression}/compression.c | 8 +- .../{ => stream-compression}/compression.h | 12 +- .../gzip.c} | 2 +- .../gzip.h} | 0 .../lz4.c} | 2 +- .../lz4.h} | 0 .../zstd.c} | 2 +- .../zstd.h} | 0 src/streaming/stream-conf.c | 137 ++ src/streaming/stream-conf.h | 28 + src/streaming/stream-handshake.c | 53 + src/streaming/stream-handshake.h | 82 ++ .../{stream_path.c => stream-path.c} | 2 +- .../{stream_path.h => stream-path.h} | 2 +- src/web/api/v1/api_v1_info.c | 2 +- src/web/server/h2o/http_server.c | 2 +- src/web/server/h2o/{streaming.c => rrdpush.c} | 2 +- 57 files changed, 2483 insertions(+), 2415 deletions(-) rename src/daemon/{common.c => h2o-common.c} (100%) rename src/streaming/{common.h => h2o-common.h} (100%) create mode 100644 src/streaming/protocol/command-begin-set-end.c create mode 100644 src/streaming/protocol/command-chart-definition.c create mode 100644 src/streaming/protocol/command-function.c create mode 100644 src/streaming/protocol/command-host-labels.c create mode 100644 src/streaming/protocol/command-host-variables.c create mode 100644 src/streaming/receiver.h create mode 100644 src/streaming/rrdhost-status.c create mode 100644 src/streaming/rrdhost-status.h delete mode 100644 src/streaming/rrdpush.c rename src/streaming/{sender_commit.c => sender-commit.c} (99%) rename src/streaming/{sender_connect.c => sender-connect.c} (92%) create mode 100644 src/streaming/sender-destinations.c create mode 100644 src/streaming/sender-destinations.h rename src/streaming/{sender_execute.c => sender-execute.c} (99%) rename src/streaming/{sender_internals.h => sender-internals.h} (94%) create mode 100644 src/streaming/sender.h rename src/streaming/{stream_capabilities.c => stream-capabilities.c} (100%) rename src/streaming/{stream_capabilities.h => stream-capabilities.h} (100%) rename src/streaming/{compression_brotli.c => stream-compression/brotli.c} (99%) rename src/streaming/{compression_brotli.h => stream-compression/brotli.h} (100%) rename src/streaming/{ => stream-compression}/compression.c (99%) rename src/streaming/{ => stream-compression}/compression.h (93%) rename src/streaming/{compression_gzip.c => stream-compression/gzip.c} (99%) rename src/streaming/{compression_gzip.h => stream-compression/gzip.h} (100%) rename src/streaming/{compression_lz4.c => stream-compression/lz4.c} (99%) rename src/streaming/{compression_lz4.h => stream-compression/lz4.h} (100%) rename src/streaming/{compression_zstd.c => stream-compression/zstd.c} (99%) rename src/streaming/{compression_zstd.h => stream-compression/zstd.h} (100%) create mode 100644 src/streaming/stream-conf.c create mode 100644 src/streaming/stream-conf.h create mode 100644 src/streaming/stream-handshake.c create mode 100644 src/streaming/stream-handshake.h rename src/streaming/{stream_path.c => stream-path.c} (99%) rename src/streaming/{stream_path.h => stream-path.h} (98%) rename src/web/server/h2o/{streaming.c => rrdpush.c} (99%) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9a62130010b72..5d1b0f9a8144c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -973,7 +973,7 @@ set(LIBH2O_FILES set(DAEMON_FILES src/daemon/buildinfo.c src/daemon/buildinfo.h - src/daemon/common.c + src/daemon/h2o-common.c src/daemon/common.h src/daemon/daemon.c src/daemon/daemon.h @@ -1017,7 +1017,7 @@ set(H2O_FILES src/web/server/h2o/http_server.h src/web/server/h2o/h2o_utils.c src/web/server/h2o/h2o_utils.h - src/web/server/h2o/streaming.c + src/web/server/h2o/rrdpush.c src/web/server/h2o/streaming.h src/web/server/h2o/connlist.c src/web/server/h2o/connlist.h @@ -1371,35 +1371,49 @@ set(SYSTEMD_JOURNAL_PLUGIN_FILES ) set(STREAMING_PLUGIN_FILES - src/streaming/rrdpush.c src/streaming/rrdpush.h - src/streaming/compression.c - src/streaming/compression.h - src/streaming/compression_brotli.c - src/streaming/compression_brotli.h - src/streaming/compression_gzip.c - src/streaming/compression_gzip.h - src/streaming/compression_lz4.c - src/streaming/compression_lz4.h - src/streaming/compression_zstd.c - src/streaming/compression_zstd.h + src/streaming/stream-compression/compression.c + src/streaming/stream-compression/compression.h + src/streaming/stream-compression/brotli.c + src/streaming/stream-compression/brotli.h + src/streaming/stream-compression/gzip.c + src/streaming/stream-compression/gzip.h + src/streaming/stream-compression/lz4.c + src/streaming/stream-compression/lz4.h + src/streaming/stream-compression/zstd.c + src/streaming/stream-compression/zstd.h src/streaming/receiver.c src/streaming/sender.c src/streaming/replication.c src/streaming/replication.h - src/streaming/common.h + src/streaming/h2o-common.h src/streaming/protocol/command-nodeid.c src/streaming/protocol/commands.c src/streaming/protocol/commands.h src/streaming/protocol/command-claimed_id.c - src/streaming/stream_path.c - src/streaming/stream_path.h - src/streaming/stream_capabilities.c - src/streaming/stream_capabilities.h - src/streaming/sender_connect.c - src/streaming/sender_internals.h - src/streaming/sender_execute.c - src/streaming/sender_commit.c + src/streaming/stream-path.c + src/streaming/stream-path.h + src/streaming/stream-capabilities.c + src/streaming/stream-capabilities.h + src/streaming/sender-connect.c + src/streaming/sender-internals.h + src/streaming/sender-execute.c + src/streaming/sender-commit.c + src/streaming/sender-destinations.c + src/streaming/stream-handshake.c + src/streaming/protocol/command-function.c + src/streaming/protocol/command-host-labels.c + src/streaming/protocol/command-chart-definition.c + src/streaming/protocol/command-begin-set-end.c + src/streaming/protocol/command-host-variables.c + src/streaming/stream-conf.c + src/streaming/stream-conf.h + src/streaming/stream-handshake.h + src/streaming/sender.h + src/streaming/sender-destinations.h + src/streaming/rrdhost-status.c + src/streaming/rrdhost-status.h + src/streaming/receiver.h ) set(WEB_PLUGIN_FILES diff --git a/src/daemon/analytics.c b/src/daemon/analytics.c index 91a42bc85dee9a..cebfdeb704052d 100644 --- a/src/daemon/analytics.c +++ b/src/daemon/analytics.c @@ -522,7 +522,7 @@ void analytics_gather_mutable_meta_data(void) analytics_alarms_notifications(); analytics_set_data( - &analytics_data.netdata_config_is_parent, (rrdhost_hosts_available() > 1 || configured_as_parent()) ? "true" : "false"); + &analytics_data.netdata_config_is_parent, (rrdhost_hosts_available() > 1 || stream_conf_configured_as_parent()) ? "true" : "false"); analytics_set_data(&analytics_data.netdata_host_agent_claimed, is_agent_claimed() ? "true" : "false"); @@ -619,7 +619,7 @@ void *analytics_main(void *ptr) */ void set_late_analytics_variables(struct rrdhost_system_info *system_info) { - analytics_set_data(&analytics_data.netdata_config_stream_enabled, default_rrdpush_enabled ? "true" : "false"); + analytics_set_data(&analytics_data.netdata_config_stream_enabled, stream_conf_send_enabled ? "true" : "false"); analytics_set_data_str(&analytics_data.netdata_config_memory_mode, (char *)rrd_memory_mode_name(default_rrd_memory_mode)); analytics_set_data(&analytics_data.netdata_host_cloud_enabled, "true"); diff --git a/src/daemon/analytics.h b/src/daemon/analytics.h index b818bea9387c6c..b1d3c1386c5798 100644 --- a/src/daemon/analytics.h +++ b/src/daemon/analytics.h @@ -76,6 +76,7 @@ struct analytics_data { bool exporting_enabled; }; +struct rrdhost_system_info; void set_late_analytics_variables(struct rrdhost_system_info *system_info); void analytics_free_data(void); void analytics_log_shell(void); diff --git a/src/daemon/common.h b/src/daemon/common.h index cc2ea289e4d936..9f6efa3efba79a 100644 --- a/src/daemon/common.h +++ b/src/daemon/common.h @@ -32,7 +32,6 @@ // streaming metrics between netdata servers #include "streaming/rrdpush.h" - // anomaly detection #include "ml/ml.h" diff --git a/src/daemon/common.c b/src/daemon/h2o-common.c similarity index 100% rename from src/daemon/common.c rename to src/daemon/h2o-common.c diff --git a/src/daemon/main.c b/src/daemon/main.c index 925d955158700c..03ae7e0036076b 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -1503,7 +1503,7 @@ int unittest_prepare_rrd(const char **user) { fprintf(stderr, "rrd_init failed for unittest\n"); return 1; } - default_rrdpush_enabled = 0; + stream_conf_send_enabled = 0; return 0; } diff --git a/src/database/contexts/api_v2_contexts.c b/src/database/contexts/api_v2_contexts.c index fcb08298094da4..d8d945afb3df02 100644 --- a/src/database/contexts/api_v2_contexts.c +++ b/src/database/contexts/api_v2_contexts.c @@ -422,7 +422,7 @@ static void rrdcontext_to_json_v2_rrdhost(BUFFER *wb, RRDHOST *host, struct rrdc // stale - connected but not having live data // reachable - connected with live data // pruned - not connected for some time and has been removed - buffer_json_member_add_string(wb, "state", rrdhost_state_cloud_emulation(host) ? "reachable" : "stale"); + buffer_json_member_add_string(wb, "state", rrdhost_is_online(host) ? "reachable" : "stale"); rrdhost_health_to_json_v2(wb, "health", &s); agent_capabilities_to_json(wb, host, "capabilities"); diff --git a/src/database/contexts/api_v2_contexts_agents.c b/src/database/contexts/api_v2_contexts_agents.c index d6ffd57d6aed1e..e279405a0b0797 100644 --- a/src/database/contexts/api_v2_contexts_agents.c +++ b/src/database/contexts/api_v2_contexts_agents.c @@ -44,7 +44,7 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now sending++; if(host != localhost) { - if (rrdhost_state_cloud_emulation(host)) + if (rrdhost_is_online(host)) receiving++; else archived++; diff --git a/src/database/engine/dbengine-stresstest.c b/src/database/engine/dbengine-stresstest.c index 1d978cd520d971..0447bcf337cbff 100644 --- a/src/database/engine/dbengine-stresstest.c +++ b/src/database/engine/dbengine-stresstest.c @@ -22,13 +22,13 @@ static RRDHOST *dbengine_rrdhost_find_or_create(char *name) { default_rrd_history_entries, RRD_MEMORY_MODE_DBENGINE, health_plugin_enabled(), - default_rrdpush_enabled, - default_rrdpush_destination, - default_rrdpush_api_key, - default_rrdpush_send_charts_matching, - default_rrdpush_enable_replication, - default_rrdpush_seconds_to_replicate, - default_rrdpush_replication_step, + stream_conf_send_enabled, + stream_conf_send_destination, + stream_conf_send_api_key, + stream_conf_send_charts_matching, + stream_conf_replication_enabled, + stream_conf_replication_period, + stream_conf_replication_step, NULL, 0 ); diff --git a/src/database/engine/dbengine-unittest.c b/src/database/engine/dbengine-unittest.c index cfe038df628234..75533610162e8d 100644 --- a/src/database/engine/dbengine-unittest.c +++ b/src/database/engine/dbengine-unittest.c @@ -108,13 +108,13 @@ static RRDHOST *dbengine_rrdhost_find_or_create(char *name) { default_rrd_history_entries, RRD_MEMORY_MODE_DBENGINE, health_plugin_enabled(), - default_rrdpush_enabled, - default_rrdpush_destination, - default_rrdpush_api_key, - default_rrdpush_send_charts_matching, - default_rrdpush_enable_replication, - default_rrdpush_seconds_to_replicate, - default_rrdpush_replication_step, + stream_conf_send_enabled, + stream_conf_send_destination, + stream_conf_send_api_key, + stream_conf_send_charts_matching, + stream_conf_replication_enabled, + stream_conf_replication_period, + stream_conf_replication_step, NULL, 0 ); diff --git a/src/database/rrd.h b/src/database/rrd.h index 39874c2145e68b..c914b783d2340d 100644 --- a/src/database/rrd.h +++ b/src/database/rrd.h @@ -98,14 +98,53 @@ struct ml_metrics_statistics { size_t silenced; }; + +// use this for configuration flags, not for state control +// flags are set/unset in a manner that is not thread safe +// and may lead to missing information. +typedef enum __attribute__ ((__packed__)) rrdset_flags { + RRDSET_FLAG_DEBUG = (1 << 2), // enables or disables debugging for a chart + RRDSET_FLAG_OBSOLETE = (1 << 3), // this is marked by the collector/module as obsolete + RRDSET_FLAG_EXPORTING_SEND = (1 << 4), // if set, this chart should be sent to Prometheus web API and external databases + RRDSET_FLAG_EXPORTING_IGNORE = (1 << 5), // if set, this chart should not be sent to Prometheus web API and external databases + + RRDSET_FLAG_UPSTREAM_SEND = (1 << 6), // if set, this chart should be sent upstream (streaming) + RRDSET_FLAG_UPSTREAM_IGNORE = (1 << 7), // if set, this chart should not be sent upstream (streaming) + + RRDSET_FLAG_STORE_FIRST = (1 << 8), // if set, do not eliminate the first collection during interpolation + RRDSET_FLAG_HETEROGENEOUS = (1 << 9), // if set, the chart is not homogeneous (dimensions in it have multiple algorithms, multipliers or dividers) + RRDSET_FLAG_HOMOGENEOUS_CHECK = (1 << 10), // if set, the chart should be checked to determine if the dimensions are homogeneous + RRDSET_FLAG_HIDDEN = (1 << 11), // if set, do not show this chart on the dashboard, but use it for exporting + RRDSET_FLAG_SYNC_CLOCK = (1 << 12), // if set, microseconds on next data collection will be ignored (the chart will be synced to now) + RRDSET_FLAG_OBSOLETE_DIMENSIONS = (1 << 13), // this is marked by the collector/module when a chart has obsolete dimensions + + RRDSET_FLAG_METADATA_UPDATE = (1 << 14), // Mark that metadata needs to be stored + RRDSET_FLAG_ANOMALY_DETECTION = (1 << 15), // flag to identify anomaly detection charts. + RRDSET_FLAG_INDEXED_ID = (1 << 16), // the rrdset is indexed by its id + RRDSET_FLAG_INDEXED_NAME = (1 << 17), // the rrdset is indexed by its name + + RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 18), + + RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS = (1 << 19), // the sending side has replication in progress + RRDSET_FLAG_SENDER_REPLICATION_FINISHED = (1 << 20), // the sending side has completed replication + RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS = (1 << 21), // the receiving side has replication in progress + RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED = (1 << 22), // the receiving side has completed replication + + RRDSET_FLAG_UPSTREAM_SEND_VARIABLES = (1 << 23), // a custom variable has been updated and needs to be exposed to parent + + RRDSET_FLAG_COLLECTION_FINISHED = (1 << 24), // when set, data collection is not available for this chart + + RRDSET_FLAG_HAS_RRDCALC_LINKED = (1 << 25), // this chart has at least one rrdcal linked +} RRDSET_FLAGS; + #include "daemon/common.h" #include "web/api/queries/query.h" #include "web/api/queries/rrdr.h" #include "health/rrdvar.h" #include "health/rrdcalc.h" #include "rrdlabels.h" -#include "streaming/stream_capabilities.h" -#include "streaming/stream_path.h" +#include "streaming/stream-capabilities.h" +#include "streaming/stream-path.h" #include "streaming/rrdpush.h" //#include "aclk/aclk_rrdhost_state.h" #include "sqlite/sqlite_health.h" @@ -664,45 +703,6 @@ STORAGE_ENGINE* storage_engine_find(const char* name); // ---------------------------------------------------------------------------- // RRDSET - this is a chart -// use this for configuration flags, not for state control -// flags are set/unset in a manner that is not thread safe -// and may lead to missing information. - -typedef enum __attribute__ ((__packed__)) rrdset_flags { - RRDSET_FLAG_DEBUG = (1 << 2), // enables or disables debugging for a chart - RRDSET_FLAG_OBSOLETE = (1 << 3), // this is marked by the collector/module as obsolete - RRDSET_FLAG_EXPORTING_SEND = (1 << 4), // if set, this chart should be sent to Prometheus web API and external databases - RRDSET_FLAG_EXPORTING_IGNORE = (1 << 5), // if set, this chart should not be sent to Prometheus web API and external databases - - RRDSET_FLAG_UPSTREAM_SEND = (1 << 6), // if set, this chart should be sent upstream (streaming) - RRDSET_FLAG_UPSTREAM_IGNORE = (1 << 7), // if set, this chart should not be sent upstream (streaming) - - RRDSET_FLAG_STORE_FIRST = (1 << 8), // if set, do not eliminate the first collection during interpolation - RRDSET_FLAG_HETEROGENEOUS = (1 << 9), // if set, the chart is not homogeneous (dimensions in it have multiple algorithms, multipliers or dividers) - RRDSET_FLAG_HOMOGENEOUS_CHECK = (1 << 10), // if set, the chart should be checked to determine if the dimensions are homogeneous - RRDSET_FLAG_HIDDEN = (1 << 11), // if set, do not show this chart on the dashboard, but use it for exporting - RRDSET_FLAG_SYNC_CLOCK = (1 << 12), // if set, microseconds on next data collection will be ignored (the chart will be synced to now) - RRDSET_FLAG_OBSOLETE_DIMENSIONS = (1 << 13), // this is marked by the collector/module when a chart has obsolete dimensions - - RRDSET_FLAG_METADATA_UPDATE = (1 << 14), // Mark that metadata needs to be stored - RRDSET_FLAG_ANOMALY_DETECTION = (1 << 15), // flag to identify anomaly detection charts. - RRDSET_FLAG_INDEXED_ID = (1 << 16), // the rrdset is indexed by its id - RRDSET_FLAG_INDEXED_NAME = (1 << 17), // the rrdset is indexed by its name - - RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 18), - - RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS = (1 << 19), // the sending side has replication in progress - RRDSET_FLAG_SENDER_REPLICATION_FINISHED = (1 << 20), // the sending side has completed replication - RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS = (1 << 21), // the receiving side has replication in progress - RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED = (1 << 22), // the receiving side has completed replication - - RRDSET_FLAG_UPSTREAM_SEND_VARIABLES = (1 << 23), // a custom variable has been updated and needs to be exposed to parent - - RRDSET_FLAG_COLLECTION_FINISHED = (1 << 24), // when set, data collection is not available for this chart - - RRDSET_FLAG_HAS_RRDCALC_LINKED = (1 << 25), // this chart has at least one rrdcal linked -} RRDSET_FLAGS; - #define rrdset_flag_get(st) __atomic_load_n(&((st)->flags), __ATOMIC_ACQUIRE) #define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_ACQUIRE) & (flag)) #define rrdset_flag_set(st, flag) __atomic_or_fetch(&((st)->flags), flag, __ATOMIC_RELEASE) diff --git a/src/database/rrdhost.c b/src/database/rrdhost.c index f1767e5254a6f1..1902746ee51a3e 100644 --- a/src/database/rrdhost.c +++ b/src/database/rrdhost.c @@ -1047,9 +1047,9 @@ int rrd_init(const char *hostname, struct rrdhost_system_info *system_info, bool dbengine_enabled = true; } else { - rrdpush_init(); + stream_conf_init(); - if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE || rrdpush_receiver_needs_dbengine()) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE || stream_conf_receiver_needs_dbengine()) { nd_log(NDLS_DAEMON, NDLP_DEBUG, "DBENGINE: Initializing ..."); @@ -1094,14 +1094,14 @@ int rrd_init(const char *hostname, struct rrdhost_system_info *system_info, bool , default_rrd_history_entries , default_rrd_memory_mode , health_plugin_enabled() - , default_rrdpush_enabled - , default_rrdpush_destination - , default_rrdpush_api_key - , default_rrdpush_send_charts_matching - , default_rrdpush_enable_replication - , default_rrdpush_seconds_to_replicate - , default_rrdpush_replication_step - , system_info + , + stream_conf_send_enabled, + stream_conf_send_destination, + stream_conf_send_api_key, + stream_conf_send_charts_matching, + stream_conf_replication_enabled, + stream_conf_replication_period, + stream_conf_replication_step, system_info , 1 , 0 ); @@ -1184,7 +1184,7 @@ static void rrdhost_streaming_sender_structures_init(RRDHOST *host) host->sender->rrdpush_sender_socket = -1; host->sender->disabled_capabilities = STREAM_CAP_NONE; - if(!default_rrdpush_compression_enabled) + if(!stream_conf_compression_enabled) host->sender->disabled_capabilities |= STREAM_CAP_COMPRESSIONS_AVAILABLE; spinlock_init(&host->sender->spinlock); @@ -1666,235 +1666,8 @@ int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, ch return res; } -static NETDATA_DOUBLE rrdhost_sender_replication_completion_unsafe(RRDHOST *host, time_t now, size_t *instances) { - size_t charts = rrdhost_sender_replicating_charts(host); - NETDATA_DOUBLE completion; - if(!charts || !host->sender || !host->sender->replication.oldest_request_after_t) - completion = 100.0; - else if(!host->sender->replication.latest_completed_before_t || host->sender->replication.latest_completed_before_t < host->sender->replication.oldest_request_after_t) - completion = 0.0; - else { - time_t total = now - host->sender->replication.oldest_request_after_t; - time_t current = host->sender->replication.latest_completed_before_t - host->sender->replication.oldest_request_after_t; - completion = (NETDATA_DOUBLE) current * 100.0 / (NETDATA_DOUBLE) total; - } - - *instances = charts; - - return completion; -} - bool rrdhost_matches_window(RRDHOST *host, time_t after, time_t before, time_t now) { time_t first_time_s, last_time_s; rrdhost_retention(host, now, rrdhost_is_online(host), &first_time_s, &last_time_s); return query_matches_retention(after, before, first_time_s, last_time_s, 0); } - -bool rrdhost_state_cloud_emulation(RRDHOST *host) { - return rrdhost_is_online(host); -} - -void rrdhost_status(RRDHOST *host, time_t now, RRDHOST_STATUS *s) { - memset(s, 0, sizeof(*s)); - - s->host = host; - s->now = now; - - RRDHOST_FLAGS flags = __atomic_load_n(&host->flags, __ATOMIC_RELAXED); - - // --- dyncfg --- - - s->dyncfg.status = dyncfg_available_for_rrdhost(host) ? RRDHOST_DYNCFG_STATUS_AVAILABLE : RRDHOST_DYNCFG_STATUS_UNAVAILABLE; - - // --- db --- - - bool online = rrdhost_is_online(host); - - rrdhost_retention(host, now, online, &s->db.first_time_s, &s->db.last_time_s); - s->db.metrics = host->rrdctx.metrics; - s->db.instances = host->rrdctx.instances; - s->db.contexts = dictionary_entries(host->rrdctx.contexts); - if(!s->db.first_time_s || !s->db.last_time_s || !s->db.metrics || !s->db.instances || !s->db.contexts || - (flags & (RRDHOST_FLAG_PENDING_CONTEXT_LOAD))) - s->db.status = RRDHOST_DB_STATUS_INITIALIZING; - else - s->db.status = RRDHOST_DB_STATUS_QUERYABLE; - - s->db.mode = host->rrd_memory_mode; - - // --- ingest --- - - s->ingest.since = MAX(host->child_connect_time, host->child_disconnected_time); - s->ingest.reason = (online) ? STREAM_HANDSHAKE_NEVER : host->rrdpush_last_receiver_exit_reason; - - spinlock_lock(&host->receiver_lock); - s->ingest.hops = (host->system_info ? host->system_info->hops : (host == localhost) ? 0 : 1); - bool has_receiver = false; - if (host->receiver) { - has_receiver = true; - s->ingest.replication.instances = rrdhost_receiver_replicating_charts(host); - s->ingest.replication.completion = host->rrdpush_receiver_replication_percent; - s->ingest.replication.in_progress = s->ingest.replication.instances > 0; - - s->ingest.capabilities = host->receiver->capabilities; - s->ingest.peers = socket_peers(host->receiver->fd); - s->ingest.ssl = SSL_connection(&host->receiver->ssl); - } - spinlock_unlock(&host->receiver_lock); - - if (online) { - if(s->db.status == RRDHOST_DB_STATUS_INITIALIZING) - s->ingest.status = RRDHOST_INGEST_STATUS_INITIALIZING; - - else if (host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST)) { - s->ingest.status = RRDHOST_INGEST_STATUS_ONLINE; - s->ingest.since = netdata_start_time; - } - - else if (s->ingest.replication.in_progress) - s->ingest.status = RRDHOST_INGEST_STATUS_REPLICATING; - - else - s->ingest.status = RRDHOST_INGEST_STATUS_ONLINE; - } - else { - if (!s->ingest.since) { - s->ingest.status = RRDHOST_INGEST_STATUS_ARCHIVED; - s->ingest.since = s->db.last_time_s; - } - - else - s->ingest.status = RRDHOST_INGEST_STATUS_OFFLINE; - } - - if(host == localhost) - s->ingest.type = RRDHOST_INGEST_TYPE_LOCALHOST; - else if(has_receiver || rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED)) - s->ingest.type = RRDHOST_INGEST_TYPE_CHILD; - else if(rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST)) - s->ingest.type = RRDHOST_INGEST_TYPE_VIRTUAL; - else - s->ingest.type = RRDHOST_INGEST_TYPE_ARCHIVED; - - s->ingest.id = host->rrdpush_receiver_connection_counter; - - if(!s->ingest.since) - s->ingest.since = netdata_start_time; - - if(s->ingest.status == RRDHOST_INGEST_STATUS_ONLINE) - s->db.liveness = RRDHOST_DB_LIVENESS_LIVE; - else - s->db.liveness = RRDHOST_DB_LIVENESS_STALE; - - // --- stream --- - - if (!host->sender) { - s->stream.status = RRDHOST_STREAM_STATUS_DISABLED; - s->stream.hops = s->ingest.hops + 1; - } - else { - sender_lock(host->sender); - - s->stream.since = host->sender->last_state_since_t; - s->stream.peers = socket_peers(host->sender->rrdpush_sender_socket); - s->stream.ssl = SSL_connection(&host->sender->ssl); - - memcpy(s->stream.sent_bytes_on_this_connection_per_type, - host->sender->sent_bytes_on_this_connection_per_type, - MIN(sizeof(s->stream.sent_bytes_on_this_connection_per_type), - sizeof(host->sender->sent_bytes_on_this_connection_per_type))); - - if (rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED)) { - s->stream.hops = host->sender->hops; - s->stream.reason = STREAM_HANDSHAKE_NEVER; - s->stream.capabilities = host->sender->capabilities; - - s->stream.replication.completion = rrdhost_sender_replication_completion_unsafe(host, now, &s->stream.replication.instances); - s->stream.replication.in_progress = s->stream.replication.instances > 0; - - if(s->stream.replication.in_progress) - s->stream.status = RRDHOST_STREAM_STATUS_REPLICATING; - else - s->stream.status = RRDHOST_STREAM_STATUS_ONLINE; - - s->stream.compression = host->sender->compressor.initialized; - } - else { - s->stream.status = RRDHOST_STREAM_STATUS_OFFLINE; - s->stream.hops = s->ingest.hops + 1; - s->stream.reason = host->sender->exit.reason; - } - - sender_unlock(host->sender); - } - - s->stream.id = host->rrdpush_sender_connection_counter; - - if(!s->stream.since) - s->stream.since = netdata_start_time; - - // --- ml --- - - if(ml_host_get_host_status(host, &s->ml.metrics)) { - s->ml.type = RRDHOST_ML_TYPE_SELF; - - if(s->ingest.status == RRDHOST_INGEST_STATUS_OFFLINE || s->ingest.status == RRDHOST_INGEST_STATUS_ARCHIVED) - s->ml.status = RRDHOST_ML_STATUS_OFFLINE; - else - s->ml.status = RRDHOST_ML_STATUS_RUNNING; - } - else if(stream_has_capability(&s->ingest, STREAM_CAP_DATA_WITH_ML)) { - s->ml.type = RRDHOST_ML_TYPE_RECEIVED; - s->ml.status = RRDHOST_ML_STATUS_RUNNING; - } - else { - // does not receive ML, does not run ML - s->ml.type = RRDHOST_ML_TYPE_DISABLED; - s->ml.status = RRDHOST_ML_STATUS_DISABLED; - } - - // --- health --- - - if(host->health.health_enabled) { - if(flags & RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION) - s->health.status = RRDHOST_HEALTH_STATUS_INITIALIZING; - else { - s->health.status = RRDHOST_HEALTH_STATUS_RUNNING; - - RRDCALC *rc; - foreach_rrdcalc_in_rrdhost_read(host, rc) { - if (unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) - continue; - - switch (rc->status) { - default: - case RRDCALC_STATUS_REMOVED: - break; - - case RRDCALC_STATUS_CLEAR: - s->health.alerts.clear++; - break; - - case RRDCALC_STATUS_WARNING: - s->health.alerts.warning++; - break; - - case RRDCALC_STATUS_CRITICAL: - s->health.alerts.critical++; - break; - - case RRDCALC_STATUS_UNDEFINED: - s->health.alerts.undefined++; - break; - - case RRDCALC_STATUS_UNINITIALIZED: - s->health.alerts.uninitialized++; - break; - } - } - foreach_rrdcalc_in_rrdhost_done(rc); - } - } - else - s->health.status = RRDHOST_HEALTH_STATUS_DISABLED; -} diff --git a/src/plugins.d/pluginsd_parser.c b/src/plugins.d/pluginsd_parser.c index 26610239d3dfb3..62f56d3091f2b1 100644 --- a/src/plugins.d/pluginsd_parser.c +++ b/src/plugins.d/pluginsd_parser.c @@ -190,13 +190,13 @@ static inline PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, si default_rrd_history_entries, default_rrd_memory_mode, health_plugin_enabled(), - default_rrdpush_enabled, - default_rrdpush_destination, - default_rrdpush_api_key, - default_rrdpush_send_charts_matching, - default_rrdpush_enable_replication, - default_rrdpush_seconds_to_replicate, - default_rrdpush_replication_step, + stream_conf_send_enabled, + stream_conf_send_destination, + stream_conf_send_api_key, + stream_conf_send_charts_matching, + stream_conf_replication_enabled, + stream_conf_replication_period, + stream_conf_replication_step, rrdhost_labels_to_system_info(parser->user.host_define.rrdlabels), false); diff --git a/src/streaming/common.h b/src/streaming/h2o-common.h similarity index 100% rename from src/streaming/common.h rename to src/streaming/h2o-common.h diff --git a/src/streaming/protocol/command-begin-set-end.c b/src/streaming/protocol/command-begin-set-end.c new file mode 100644 index 00000000000000..17daef776a3933 --- /dev/null +++ b/src/streaming/protocol/command-begin-set-end.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "commands.h" +#include "plugins.d/pluginsd_internals.h" + +static void rrdpush_send_chart_metrics(BUFFER *wb, RRDSET *st, struct sender_state *s __maybe_unused, RRDSET_FLAGS flags) { + buffer_fast_strcat(wb, "BEGIN \"", 7); + buffer_fast_strcat(wb, rrdset_id(st), string_strlen(st->id)); + buffer_fast_strcat(wb, "\" ", 2); + + if(st->last_collected_time.tv_sec > st->rrdpush.sender.resync_time_s) + buffer_print_uint64(wb, st->usec_since_last_update); + else + buffer_fast_strcat(wb, "0", 1); + + buffer_fast_strcat(wb, "\n", 1); + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if(unlikely(!rrddim_check_updated(rd))) + continue; + + if(likely(rrddim_check_upstream_exposed_collector(rd))) { + buffer_fast_strcat(wb, "SET \"", 5); + buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id)); + buffer_fast_strcat(wb, "\" = ", 4); + buffer_print_int64(wb, rd->collector.collected_value); + buffer_fast_strcat(wb, "\n", 1); + } + else { + internal_error(true, "STREAM: 'host:%s/chart:%s/dim:%s' flag 'exposed' is updated but not exposed", + rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); + // we will include it in the next iteration + rrddim_metadata_updated(rd); + } + } + rrddim_foreach_done(rd); + + if(unlikely(flags & RRDSET_FLAG_UPSTREAM_SEND_VARIABLES)) + rrdvar_print_to_streaming_custom_chart_variables(st, wb); + + buffer_fast_strcat(wb, "END\n", 4); +} + +void rrdset_push_metrics_v1(RRDSET_STREAM_BUFFER *rsb, RRDSET *st) { + RRDHOST *host = st->rrdhost; + rrdpush_send_chart_metrics(rsb->wb, st, host->sender, rsb->rrdset_flags); +} + +void rrddim_push_metrics_v2(RRDSET_STREAM_BUFFER *rsb, RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags) { + if(!rsb->wb || !rsb->v2 || !netdata_double_isnumber(n) || !does_storage_number_exist(flags)) + return; + + bool with_slots = stream_has_capability(rsb, STREAM_CAP_SLOTS) ? true : false; + NUMBER_ENCODING integer_encoding = stream_has_capability(rsb, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; + NUMBER_ENCODING doubles_encoding = stream_has_capability(rsb, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL; + BUFFER *wb = rsb->wb; + time_t point_end_time_s = (time_t)(point_end_time_ut / USEC_PER_SEC); + if(unlikely(rsb->last_point_end_time_s != point_end_time_s)) { + + if(unlikely(rsb->begin_v2_added)) + buffer_fast_strcat(wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1); + + buffer_fast_strcat(wb, PLUGINSD_KEYWORD_BEGIN_V2, sizeof(PLUGINSD_KEYWORD_BEGIN_V2) - 1); + + if(with_slots) { + buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); + buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdset->rrdpush.sender.chart_slot); + } + + buffer_fast_strcat(wb, " '", 2); + buffer_fast_strcat(wb, rrdset_id(rd->rrdset), string_strlen(rd->rrdset->id)); + buffer_fast_strcat(wb, "' ", 2); + buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdset->update_every); + buffer_fast_strcat(wb, " ", 1); + buffer_print_uint64_encoded(wb, integer_encoding, point_end_time_s); + buffer_fast_strcat(wb, " ", 1); + if(point_end_time_s == rsb->wall_clock_time) + buffer_fast_strcat(wb, "#", 1); + else + buffer_print_uint64_encoded(wb, integer_encoding, rsb->wall_clock_time); + buffer_fast_strcat(wb, "\n", 1); + + rsb->last_point_end_time_s = point_end_time_s; + rsb->begin_v2_added = true; + } + + buffer_fast_strcat(wb, PLUGINSD_KEYWORD_SET_V2, sizeof(PLUGINSD_KEYWORD_SET_V2) - 1); + + if(with_slots) { + buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); + buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdpush.sender.dim_slot); + } + + buffer_fast_strcat(wb, " '", 2); + buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id)); + buffer_fast_strcat(wb, "' ", 2); + buffer_print_int64_encoded(wb, integer_encoding, rd->collector.last_collected_value); + buffer_fast_strcat(wb, " ", 1); + + if((NETDATA_DOUBLE)rd->collector.last_collected_value == n) + buffer_fast_strcat(wb, "#", 1); + else + buffer_print_netdata_double_encoded(wb, doubles_encoding, n); + + buffer_fast_strcat(wb, " ", 1); + buffer_print_sn_flags(wb, flags, true); + buffer_fast_strcat(wb, "\n", 1); +} + +void rrdset_push_metrics_finished(RRDSET_STREAM_BUFFER *rsb, RRDSET *st) { + if(!rsb->wb) + return; + + if(rsb->v2 && rsb->begin_v2_added) { + if(unlikely(rsb->rrdset_flags & RRDSET_FLAG_UPSTREAM_SEND_VARIABLES)) + rrdvar_print_to_streaming_custom_chart_variables(st, rsb->wb); + + buffer_fast_strcat(rsb->wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1); + } + + sender_commit(st->rrdhost->sender, rsb->wb, STREAM_TRAFFIC_TYPE_DATA); + + *rsb = (RRDSET_STREAM_BUFFER){ .wb = NULL, }; +} + diff --git a/src/streaming/protocol/command-chart-definition.c b/src/streaming/protocol/command-chart-definition.c new file mode 100644 index 00000000000000..864d13242b965c --- /dev/null +++ b/src/streaming/protocol/command-chart-definition.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "commands.h" +#include "plugins.d/pluginsd_internals.h" + +// chart labels +static int send_clabels_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + BUFFER *wb = (BUFFER *)data; + buffer_sprintf(wb, PLUGINSD_KEYWORD_CLABEL " \"%s\" \"%s\" %d\n", name, value, ls & ~(RRDLABEL_FLAG_INTERNAL)); + return 1; +} + +static void rrdpush_send_clabels(BUFFER *wb, RRDSET *st) { + if (st->rrdlabels) { + if(rrdlabels_walkthrough_read(st->rrdlabels, send_clabels_callback, wb) > 0) + buffer_sprintf(wb, PLUGINSD_KEYWORD_CLABEL_COMMIT "\n"); + } +} + +// Send the current chart definition. +// Assumes that collector thread has already called sender_start for mutex / buffer state. +bool rrdpush_send_chart_definition(BUFFER *wb, RRDSET *st) { + uint32_t version = rrdset_metadata_version(st); + + RRDHOST *host = st->rrdhost; + NUMBER_ENCODING integer_encoding = stream_has_capability(host->sender, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; + bool with_slots = stream_has_capability(host->sender, STREAM_CAP_SLOTS) ? true : false; + + bool replication_progress = false; + + // properly set the name for the remote end to parse it + char *name = ""; + if(likely(st->name)) { + if(unlikely(st->id != st->name)) { + // they differ + name = strchr(rrdset_name(st), '.'); + if(name) + name++; + else + name = ""; + } + } + + buffer_fast_strcat(wb, PLUGINSD_KEYWORD_CHART, sizeof(PLUGINSD_KEYWORD_CHART) - 1); + + if(with_slots) { + buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); + buffer_print_uint64_encoded(wb, integer_encoding, st->rrdpush.sender.chart_slot); + } + + // send the chart + buffer_sprintf( + wb + , " \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" %d %d \"%s %s %s\" \"%s\" \"%s\"\n" + , rrdset_id(st) + , name + , rrdset_title(st) + , rrdset_units(st) + , rrdset_family(st) + , rrdset_context(st) + , rrdset_type_name(st->chart_type) + , st->priority + , st->update_every + , rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)?"obsolete":"" + , rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)?"store_first":"" + , rrdset_flag_check(st, RRDSET_FLAG_HIDDEN)?"hidden":"" + , rrdset_plugin_name(st) + , rrdset_module_name(st) + ); + + // send the chart labels + if (stream_has_capability(host->sender, STREAM_CAP_CLABELS)) + rrdpush_send_clabels(wb, st); + + // send the dimensions + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + buffer_fast_strcat(wb, PLUGINSD_KEYWORD_DIMENSION, sizeof(PLUGINSD_KEYWORD_DIMENSION) - 1); + + if(with_slots) { + buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); + buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdpush.sender.dim_slot); + } + + buffer_sprintf( + wb + , " \"%s\" \"%s\" \"%s\" %d %d \"%s %s %s\"\n" + , rrddim_id(rd) + , rrddim_name(rd) + , rrd_algorithm_name(rd->algorithm) + , rd->multiplier + , rd->divisor + , rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)?"obsolete":"" + , rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN)?"hidden":"" + , rrddim_option_check(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":"" + ); + } + rrddim_foreach_done(rd); + + // send the chart functions + if(stream_has_capability(host->sender, STREAM_CAP_FUNCTIONS)) + rrd_chart_functions_expose_rrdpush(st, wb); + + // send the chart local custom variables + rrdvar_print_to_streaming_custom_chart_variables(st, wb); + + if (stream_has_capability(host->sender, STREAM_CAP_REPLICATION)) { + time_t db_first_time_t, db_last_time_t; + + time_t now = now_realtime_sec(); + rrdset_get_retention_of_tier_for_collected_chart(st, &db_first_time_t, &db_last_time_t, now, 0); + + buffer_sprintf(wb, PLUGINSD_KEYWORD_CHART_DEFINITION_END " %llu %llu %llu\n", + (unsigned long long)db_first_time_t, + (unsigned long long)db_last_time_t, + (unsigned long long)now); + + if(!rrdset_flag_check(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS)) { + rrdset_flag_set(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS); + rrdset_flag_clear(st, RRDSET_FLAG_SENDER_REPLICATION_FINISHED); + rrdhost_sender_replicating_charts_plus_one(st->rrdhost); + } + replication_progress = true; + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + internal_error(true, "REPLAY: 'host:%s/chart:%s' replication starts", + rrdhost_hostname(st->rrdhost), rrdset_id(st)); +#endif + } + + sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA); + + // we can set the exposed flag, after we commit the buffer + // because replication may pick it up prematurely + rrddim_foreach_read(rd, st) { + rrddim_metadata_exposed_upstream(rd, version); + } + rrddim_foreach_done(rd); + rrdset_metadata_exposed_upstream(st, version); + + st->rrdpush.sender.resync_time_s = st->last_collected_time.tv_sec + (stream_conf_initial_clock_resync_iterations * st->update_every); + return replication_progress; +} + +bool should_send_chart_matching(RRDSET *st, RRDSET_FLAGS flags) { + if(!(flags & RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED)) + return false; + + if(unlikely(!(flags & (RRDSET_FLAG_UPSTREAM_SEND | RRDSET_FLAG_UPSTREAM_IGNORE)))) { + RRDHOST *host = st->rrdhost; + + if (flags & RRDSET_FLAG_ANOMALY_DETECTION) { + if(ml_streaming_enabled()) + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND); + else + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE); + } + else { + int negative = 0, positive = 0; + SIMPLE_PATTERN_RESULT r; + + r = simple_pattern_matches_string_extract(host->rrdpush.send.charts_matching, st->context, NULL, 0); + if(r == SP_MATCHED_POSITIVE) positive++; + else if(r == SP_MATCHED_NEGATIVE) negative++; + + if(!negative) { + r = simple_pattern_matches_string_extract(host->rrdpush.send.charts_matching, st->name, NULL, 0); + if (r == SP_MATCHED_POSITIVE) positive++; + else if (r == SP_MATCHED_NEGATIVE) negative++; + } + + if(!negative) { + r = simple_pattern_matches_string_extract(host->rrdpush.send.charts_matching, st->id, NULL, 0); + if (r == SP_MATCHED_POSITIVE) positive++; + else if (r == SP_MATCHED_NEGATIVE) negative++; + } + + if(!negative && positive) + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND); + else + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE); + } + + // get the flags again, to know how to respond + flags = rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND|RRDSET_FLAG_UPSTREAM_IGNORE); + } + + return flags & RRDSET_FLAG_UPSTREAM_SEND; +} + +// Called from the internal collectors to mark a chart obsolete. +bool rrdset_push_chart_definition_now(RRDSET *st) { + RRDHOST *host = st->rrdhost; + + if(unlikely(!rrdhost_can_send_definitions_to_parent(host) + || !should_send_chart_matching(st, rrdset_flag_get(st)))) { + return false; + } + + BUFFER *wb = sender_start(host->sender); + rrdpush_send_chart_definition(wb, st); + sender_thread_buffer_free(); + + return true; +} + diff --git a/src/streaming/protocol/command-function.c b/src/streaming/protocol/command-function.c new file mode 100644 index 00000000000000..d9b28eb4e9e026 --- /dev/null +++ b/src/streaming/protocol/command-function.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "commands.h" +#include "plugins.d/pluginsd_internals.h" + +void rrdpush_send_global_functions(RRDHOST *host) { + if(!stream_has_capability(host->sender, STREAM_CAP_FUNCTIONS)) + return; + + if(unlikely(!rrdhost_can_send_definitions_to_parent(host))) + return; + + BUFFER *wb = sender_start(host->sender); + + rrd_global_functions_expose_rrdpush(host, wb, stream_has_capability(host->sender, STREAM_CAP_DYNCFG)); + + sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); + + sender_thread_buffer_free(); +} diff --git a/src/streaming/protocol/command-host-labels.c b/src/streaming/protocol/command-host-labels.c new file mode 100644 index 00000000000000..7c2a2d0dd40c0b --- /dev/null +++ b/src/streaming/protocol/command-host-labels.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "commands.h" +#include "plugins.d/pluginsd_internals.h" + +static int send_labels_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + BUFFER *wb = (BUFFER *)data; + buffer_sprintf(wb, "LABEL \"%s\" = %d \"%s\"\n", name, ls, value); + return 1; +} + +void rrdpush_send_host_labels(RRDHOST *host) { + if(unlikely(!rrdhost_can_send_definitions_to_parent(host) + || !stream_has_capability(host->sender, STREAM_CAP_HLABELS))) + return; + + BUFFER *wb = sender_start(host->sender); + + rrdlabels_walkthrough_read(host->rrdlabels, send_labels_callback, wb); + buffer_sprintf(wb, "OVERWRITE %s\n", "labels"); + + sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA); + + sender_thread_buffer_free(); +} diff --git a/src/streaming/protocol/command-host-variables.c b/src/streaming/protocol/command-host-variables.c new file mode 100644 index 00000000000000..83e4990d67d426 --- /dev/null +++ b/src/streaming/protocol/command-host-variables.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "commands.h" +#include "plugins.d/pluginsd_internals.h" + +static inline void rrdpush_sender_add_host_variable_to_buffer(BUFFER *wb, const RRDVAR_ACQUIRED *rva) { + buffer_sprintf( + wb + , "VARIABLE HOST %s = " NETDATA_DOUBLE_FORMAT "\n" + , rrdvar_name(rva) + , rrdvar2number(rva) + ); + + netdata_log_debug(D_STREAM, "RRDVAR pushed HOST VARIABLE %s = " NETDATA_DOUBLE_FORMAT, rrdvar_name(rva), rrdvar2number(rva)); +} + +void rrdpush_sender_send_this_host_variable_now(RRDHOST *host, const RRDVAR_ACQUIRED *rva) { + if(rrdhost_can_send_definitions_to_parent(host)) { + BUFFER *wb = sender_start(host->sender); + rrdpush_sender_add_host_variable_to_buffer(wb, rva); + sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA); + sender_thread_buffer_free(); + } +} + +struct custom_host_variables_callback { + BUFFER *wb; +}; + +static int rrdpush_sender_thread_custom_host_variables_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdvar_ptr __maybe_unused, void *struct_ptr) { + const RRDVAR_ACQUIRED *rv = (const RRDVAR_ACQUIRED *)item; + struct custom_host_variables_callback *tmp = struct_ptr; + BUFFER *wb = tmp->wb; + + rrdpush_sender_add_host_variable_to_buffer(wb, rv); + return 1; +} + +void rrdpush_sender_thread_send_custom_host_variables(RRDHOST *host) { + if(rrdhost_can_send_definitions_to_parent(host)) { + BUFFER *wb = sender_start(host->sender); + struct custom_host_variables_callback tmp = { + .wb = wb + }; + int ret = rrdvar_walkthrough_read(host->rrdvars, rrdpush_sender_thread_custom_host_variables_callback, &tmp); + (void)ret; + sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA); + sender_thread_buffer_free(); + + netdata_log_debug(D_STREAM, "RRDVAR sent %d VARIABLES", ret); + } +} diff --git a/src/streaming/protocol/commands.c b/src/streaming/protocol/commands.c index 95a34529b6497b..e9e16bdac25177 100644 --- a/src/streaming/protocol/commands.c +++ b/src/streaming/protocol/commands.c @@ -1,3 +1,58 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "commands.h" + +RRDSET_STREAM_BUFFER rrdset_push_metric_initialize(RRDSET *st, time_t wall_clock_time) { + RRDHOST *host = st->rrdhost; + + // fetch the flags we need to check with one atomic operation + RRDHOST_FLAGS host_flags = __atomic_load_n(&host->flags, __ATOMIC_SEQ_CST); + + // check if we are not connected + if(unlikely(!(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS))) { + + if(unlikely(!(host_flags & (RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN | RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED)))) + rrdpush_sender_thread_spawn(host); + + if(unlikely(!(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS))) { + rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS); + nd_log_daemon(NDLP_NOTICE, "STREAM %s [send]: not ready - collected metrics are not sent to parent.", rrdhost_hostname(host)); + } + + return (RRDSET_STREAM_BUFFER) { .wb = NULL, }; + } + else if(unlikely(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS)) { + nd_log_daemon(NDLP_INFO, "STREAM %s [send]: sending metrics to parent...", rrdhost_hostname(host)); + rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS); + } + + if(unlikely(host_flags & RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED)) { + BUFFER *wb = sender_start(host->sender); + rrd_global_functions_expose_rrdpush(host, wb, stream_has_capability(host->sender, STREAM_CAP_DYNCFG)); + sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); + } + + bool exposed_upstream = rrdset_check_upstream_exposed(st); + RRDSET_FLAGS rrdset_flags = rrdset_flag_get(st); + bool replication_in_progress = !(rrdset_flags & RRDSET_FLAG_SENDER_REPLICATION_FINISHED); + + if(unlikely((exposed_upstream && replication_in_progress) || + !should_send_chart_matching(st, rrdset_flags))) + return (RRDSET_STREAM_BUFFER) { .wb = NULL, }; + + if(unlikely(!exposed_upstream)) { + BUFFER *wb = sender_start(host->sender); + replication_in_progress = rrdpush_send_chart_definition(wb, st); + } + + if(replication_in_progress) + return (RRDSET_STREAM_BUFFER) { .wb = NULL, }; + + return (RRDSET_STREAM_BUFFER) { + .capabilities = host->sender->capabilities, + .v2 = stream_has_capability(host->sender, STREAM_CAP_INTERPOLATED), + .rrdset_flags = rrdset_flags, + .wb = sender_start(host->sender), + .wall_clock_time = wall_clock_time, + }; +} diff --git a/src/streaming/protocol/commands.h b/src/streaming/protocol/commands.h index 4f5ca7875a4a87..81344175c1e07b 100644 --- a/src/streaming/protocol/commands.h +++ b/src/streaming/protocol/commands.h @@ -3,12 +3,39 @@ #ifndef NETDATA_STREAMING_PROTCOL_COMMANDS_H #define NETDATA_STREAMING_PROTCOL_COMMANDS_H +#include "database/rrd.h" #include "../rrdpush.h" +typedef struct rrdset_stream_buffer { + STREAM_CAPABILITIES capabilities; + bool v2; + bool begin_v2_added; + time_t wall_clock_time; + RRDSET_FLAGS rrdset_flags; + time_t last_point_end_time_s; + BUFFER *wb; +} RRDSET_STREAM_BUFFER; + +RRDSET_STREAM_BUFFER rrdset_push_metric_initialize(RRDSET *st, time_t wall_clock_time); + void rrdpush_sender_get_node_and_claim_id_from_parent(struct sender_state *s); void rrdpush_receiver_send_node_and_claim_id_to_child(RRDHOST *host); void rrdpush_sender_clear_parent_claim_id(RRDHOST *host); void rrdpush_sender_send_claimed_id(RRDHOST *host); +void rrdpush_send_global_functions(RRDHOST *host); +void rrdpush_send_host_labels(RRDHOST *host); + +void rrdpush_sender_thread_send_custom_host_variables(RRDHOST *host); +void rrdpush_sender_send_this_host_variable_now(RRDHOST *host, const RRDVAR_ACQUIRED *rva); + +bool rrdpush_send_chart_definition(BUFFER *wb, RRDSET *st); +bool rrdset_push_chart_definition_now(RRDSET *st); +bool should_send_chart_matching(RRDSET *st, RRDSET_FLAGS flags); + +void rrdset_push_metrics_v1(RRDSET_STREAM_BUFFER *rsb, RRDSET *st); +void rrddim_push_metrics_v2(RRDSET_STREAM_BUFFER *rsb, RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags); +void rrdset_push_metrics_finished(RRDSET_STREAM_BUFFER *rsb, RRDSET *st); + #endif //NETDATA_STREAMING_PROTCOL_COMMANDS_H diff --git a/src/streaming/receiver.c b/src/streaming/receiver.c index da910b1c1ccac8..619d308c43d4a5 100644 --- a/src/streaming/receiver.c +++ b/src/streaming/receiver.c @@ -3,7 +3,10 @@ #include "rrdpush.h" #include "web/server/h2o/http_server.h" -extern struct config stream_config; +// When a child disconnects this is the maximum we will wait +// before we update the cloud that the child is offline +#define MAX_CHILD_DISC_DELAY (30000) +#define MAX_CHILD_DISC_TOLERANCE (125 / 100) void receiver_state_free(struct receiver_state *rpt) { netdata_ssl_close(&rpt->ssl); @@ -553,7 +556,7 @@ static void rrdpush_send_error_on_taken_over_connection(struct receiver_state *r 5); } -void rrdpush_receive_log_status(struct receiver_state *rpt, const char *msg, const char *status, ND_LOG_FIELD_PRIORITY priority) { +static void rrdpush_receive_log_status(struct receiver_state *rpt, const char *msg, const char *status, ND_LOG_FIELD_PRIORITY priority) { // this function may be called BEFORE we spawn the receiver thread // so, we need to add the fields again (it does not harm) ND_LOG_STACK lgs[] = { @@ -589,14 +592,14 @@ static void rrdpush_receive(struct receiver_state *rpt) rpt->config.alarms_delay = 60; rpt->config.alarms_history = HEALTH_LOG_RETENTION_DEFAULT; - rpt->config.rrdpush_enabled = (int)default_rrdpush_enabled; - rpt->config.rrdpush_destination = default_rrdpush_destination; - rpt->config.rrdpush_api_key = default_rrdpush_api_key; - rpt->config.rrdpush_send_charts_matching = default_rrdpush_send_charts_matching; + rpt->config.rrdpush_enabled = (int)stream_conf_send_enabled; + rpt->config.rrdpush_destination = stream_conf_send_destination; + rpt->config.rrdpush_api_key = stream_conf_send_api_key; + rpt->config.rrdpush_send_charts_matching = stream_conf_send_charts_matching; - rpt->config.rrdpush_enable_replication = default_rrdpush_enable_replication; - rpt->config.rrdpush_seconds_to_replicate = default_rrdpush_seconds_to_replicate; - rpt->config.rrdpush_replication_step = default_rrdpush_replication_step; + rpt->config.rrdpush_enable_replication = stream_conf_replication_enabled; + rpt->config.rrdpush_seconds_to_replicate = stream_conf_replication_period; + rpt->config.rrdpush_replication_step = stream_conf_replication_step; rpt->config.update_every = (int)appconfig_get_duration_seconds(&stream_config, rpt->machine_guid, "update every", rpt->config.update_every); if(rpt->config.update_every < 0) rpt->config.update_every = 1; @@ -648,7 +651,7 @@ static void rrdpush_receive(struct receiver_state *rpt) rpt->config.rrdpush_replication_step = appconfig_get_number(&stream_config, rpt->key, "replication step", rpt->config.rrdpush_replication_step); rpt->config.rrdpush_replication_step = appconfig_get_number(&stream_config, rpt->machine_guid, "replication step", rpt->config.rrdpush_replication_step); - rpt->config.rrdpush_compression = default_rrdpush_compression_enabled; + rpt->config.rrdpush_compression = stream_conf_compression_enabled; rpt->config.rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->key, "enable compression", rpt->config.rrdpush_compression); rpt->config.rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->machine_guid, "enable compression", rpt->config.rrdpush_compression); @@ -922,3 +925,457 @@ void *rrdpush_receiver_thread(void *ptr) { receiver_state_free(rpt); return NULL; } + +int rrdpush_receiver_permission_denied(struct web_client *w) { + // we always respond with the same message and error code + // to prevent an attacker from gaining info about the error + buffer_flush(w->response.data); + buffer_strcat(w->response.data, START_STREAMING_ERROR_NOT_PERMITTED); + return HTTP_RESP_UNAUTHORIZED; +} + +int rrdpush_receiver_too_busy_now(struct web_client *w) { + // we always respond with the same message and error code + // to prevent an attacker from gaining info about the error + buffer_flush(w->response.data); + buffer_strcat(w->response.data, START_STREAMING_ERROR_BUSY_TRY_LATER); + return HTTP_RESP_SERVICE_UNAVAILABLE; +} + +static void rrdpush_receiver_takeover_web_connection(struct web_client *w, struct receiver_state *rpt) { + rpt->fd = w->ifd; + + rpt->ssl.conn = w->ssl.conn; + rpt->ssl.state = w->ssl.state; + + w->ssl = NETDATA_SSL_UNSET_CONNECTION; + + WEB_CLIENT_IS_DEAD(w); + + if(web_server_mode == WEB_SERVER_MODE_STATIC_THREADED) { + web_client_flag_set(w, WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET); + } + else { + if(w->ifd == w->ofd) + w->ifd = w->ofd = -1; + else + w->ifd = -1; + } + + buffer_flush(w->response.data); +} + +int rrdpush_receiver_thread_spawn(struct web_client *w, char *decoded_query_string, void *h2o_ctx __maybe_unused) { + + if(!service_running(ABILITY_STREAMING_CONNECTIONS)) + return rrdpush_receiver_too_busy_now(w); + + struct receiver_state *rpt = callocz(1, sizeof(*rpt)); + rpt->connected_since_s = now_realtime_sec(); + rpt->last_msg_t = now_monotonic_sec(); + rpt->hops = 1; + + rpt->capabilities = STREAM_CAP_INVALID; + +#ifdef ENABLE_H2O + rpt->h2o_ctx = h2o_ctx; +#endif + + __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_receivers, sizeof(*rpt), __ATOMIC_RELAXED); + __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED); + + rpt->system_info = callocz(1, sizeof(struct rrdhost_system_info)); + rpt->system_info->hops = rpt->hops; + + rpt->fd = -1; + rpt->client_ip = strdupz(w->client_ip); + rpt->client_port = strdupz(w->client_port); + + rpt->ssl = NETDATA_SSL_UNSET_CONNECTION; + + rpt->config.update_every = default_rrd_update_every; + + // parse the parameters and fill rpt and rpt->system_info + + while(decoded_query_string) { + char *value = strsep_skip_consecutive_separators(&decoded_query_string, "&"); + if(!value || !*value) continue; + + char *name = strsep_skip_consecutive_separators(&value, "="); + if(!name || !*name) continue; + if(!value || !*value) continue; + + if(!strcmp(name, "key") && !rpt->key) + rpt->key = strdupz(value); + + else if(!strcmp(name, "hostname") && !rpt->hostname) + rpt->hostname = strdupz(value); + + else if(!strcmp(name, "registry_hostname") && !rpt->registry_hostname) + rpt->registry_hostname = strdupz(value); + + else if(!strcmp(name, "machine_guid") && !rpt->machine_guid) + rpt->machine_guid = strdupz(value); + + else if(!strcmp(name, "update_every")) + rpt->config.update_every = (int)strtoul(value, NULL, 0); + + else if(!strcmp(name, "os") && !rpt->os) + rpt->os = strdupz(value); + + else if(!strcmp(name, "timezone") && !rpt->timezone) + rpt->timezone = strdupz(value); + + else if(!strcmp(name, "abbrev_timezone") && !rpt->abbrev_timezone) + rpt->abbrev_timezone = strdupz(value); + + else if(!strcmp(name, "utc_offset")) + rpt->utc_offset = (int32_t)strtol(value, NULL, 0); + + else if(!strcmp(name, "hops")) + rpt->hops = rpt->system_info->hops = (uint16_t) strtoul(value, NULL, 0); + + else if(!strcmp(name, "ml_capable")) + rpt->system_info->ml_capable = strtoul(value, NULL, 0); + + else if(!strcmp(name, "ml_enabled")) + rpt->system_info->ml_enabled = strtoul(value, NULL, 0); + + else if(!strcmp(name, "mc_version")) + rpt->system_info->mc_version = strtoul(value, NULL, 0); + + else if(!strcmp(name, "ver") && (rpt->capabilities & STREAM_CAP_INVALID)) + rpt->capabilities = convert_stream_version_to_capabilities(strtoul(value, NULL, 0), NULL, false); + + else { + // An old Netdata child does not have a compatible streaming protocol, map to something sane. + if (!strcmp(name, "NETDATA_SYSTEM_OS_NAME")) + name = "NETDATA_HOST_OS_NAME"; + + else if (!strcmp(name, "NETDATA_SYSTEM_OS_ID")) + name = "NETDATA_HOST_OS_ID"; + + else if (!strcmp(name, "NETDATA_SYSTEM_OS_ID_LIKE")) + name = "NETDATA_HOST_OS_ID_LIKE"; + + else if (!strcmp(name, "NETDATA_SYSTEM_OS_VERSION")) + name = "NETDATA_HOST_OS_VERSION"; + + else if (!strcmp(name, "NETDATA_SYSTEM_OS_VERSION_ID")) + name = "NETDATA_HOST_OS_VERSION_ID"; + + else if (!strcmp(name, "NETDATA_SYSTEM_OS_DETECTION")) + name = "NETDATA_HOST_OS_DETECTION"; + + else if(!strcmp(name, "NETDATA_PROTOCOL_VERSION") && (rpt->capabilities & STREAM_CAP_INVALID)) + rpt->capabilities = convert_stream_version_to_capabilities(1, NULL, false); + + if (unlikely(rrdhost_set_system_info_variable(rpt->system_info, name, value))) { + nd_log_daemon(NDLP_NOTICE, "STREAM '%s' [receive from [%s]:%s]: " + "request has parameter '%s' = '%s', which is not used." + , (rpt->hostname && *rpt->hostname) ? rpt->hostname : "-" + , rpt->client_ip, rpt->client_port + , name, value); + } + } + } + + if (rpt->capabilities & STREAM_CAP_INVALID) + // no version is supplied, assume version 0; + rpt->capabilities = convert_stream_version_to_capabilities(0, NULL, false); + + // find the program name and version + if(w->user_agent && w->user_agent[0]) { + char *t = strchr(w->user_agent, '/'); + if(t && *t) { + *t = '\0'; + t++; + } + + rpt->program_name = strdupz(w->user_agent); + if(t && *t) rpt->program_version = strdupz(t); + } + + // check if we should accept this connection + + if(!rpt->key || !*rpt->key) { + rrdpush_receive_log_status( + rpt, "request without an API key, rejecting connection", + RRDPUSH_STATUS_NO_API_KEY, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + if(!rpt->hostname || !*rpt->hostname) { + rrdpush_receive_log_status( + rpt, "request without a hostname, rejecting connection", + RRDPUSH_STATUS_NO_HOSTNAME, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + if(!rpt->registry_hostname) + rpt->registry_hostname = strdupz(rpt->hostname); + + if(!rpt->machine_guid || !*rpt->machine_guid) { + rrdpush_receive_log_status( + rpt, "request without a machine GUID, rejecting connection", + RRDPUSH_STATUS_NO_MACHINE_GUID, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + { + char buf[GUID_LEN + 1]; + + if (regenerate_guid(rpt->key, buf) == -1) { + rrdpush_receive_log_status( + rpt, "API key is not a valid UUID (use the command uuidgen to generate one)", + RRDPUSH_STATUS_INVALID_API_KEY, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + if (regenerate_guid(rpt->machine_guid, buf) == -1) { + rrdpush_receive_log_status( + rpt, "machine GUID is not a valid UUID", + RRDPUSH_STATUS_INVALID_MACHINE_GUID, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + } + + const char *api_key_type = appconfig_get(&stream_config, rpt->key, "type", "api"); + if(!api_key_type || !*api_key_type) api_key_type = "unknown"; + if(strcmp(api_key_type, "api") != 0) { + rrdpush_receive_log_status( + rpt, "API key is a machine GUID", + RRDPUSH_STATUS_INVALID_API_KEY, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + if(!appconfig_get_boolean(&stream_config, rpt->key, "enabled", 0)) { + rrdpush_receive_log_status( + rpt, "API key is not enabled", + RRDPUSH_STATUS_API_KEY_DISABLED, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + { + SIMPLE_PATTERN *key_allow_from = simple_pattern_create( + appconfig_get(&stream_config, rpt->key, "allow from", "*"), + NULL, SIMPLE_PATTERN_EXACT, true); + + if(key_allow_from) { + if(!simple_pattern_matches(key_allow_from, w->client_ip)) { + simple_pattern_free(key_allow_from); + + rrdpush_receive_log_status( + rpt, "API key is not allowed from this IP", + RRDPUSH_STATUS_NOT_ALLOWED_IP, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + simple_pattern_free(key_allow_from); + } + } + + { + const char *machine_guid_type = appconfig_get(&stream_config, rpt->machine_guid, "type", "machine"); + if (!machine_guid_type || !*machine_guid_type) machine_guid_type = "unknown"; + + if (strcmp(machine_guid_type, "machine") != 0) { + rrdpush_receive_log_status( + rpt, "machine GUID is an API key", + RRDPUSH_STATUS_INVALID_MACHINE_GUID, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + } + + if(!appconfig_get_boolean(&stream_config, rpt->machine_guid, "enabled", 1)) { + rrdpush_receive_log_status( + rpt, "machine GUID is not enabled", + RRDPUSH_STATUS_MACHINE_GUID_DISABLED, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + { + SIMPLE_PATTERN *machine_allow_from = simple_pattern_create( + appconfig_get(&stream_config, rpt->machine_guid, "allow from", "*"), + NULL, SIMPLE_PATTERN_EXACT, true); + + if(machine_allow_from) { + if(!simple_pattern_matches(machine_allow_from, w->client_ip)) { + simple_pattern_free(machine_allow_from); + + rrdpush_receive_log_status( + rpt, "machine GUID is not allowed from this IP", + RRDPUSH_STATUS_NOT_ALLOWED_IP, NDLP_WARNING); + + receiver_state_free(rpt); + return rrdpush_receiver_permission_denied(w); + } + + simple_pattern_free(machine_allow_from); + } + } + + if (strcmp(rpt->machine_guid, localhost->machine_guid) == 0) { + + rrdpush_receiver_takeover_web_connection(w, rpt); + + rrdpush_receive_log_status( + rpt, "machine GUID is my own", + RRDPUSH_STATUS_LOCALHOST, NDLP_DEBUG); + + char initial_response[HTTP_HEADER_SIZE + 1]; + snprintfz(initial_response, HTTP_HEADER_SIZE, "%s", START_STREAMING_ERROR_SAME_LOCALHOST); + + if(send_timeout( + &rpt->ssl, + rpt->fd, initial_response, strlen(initial_response), 0, 60) != (ssize_t)strlen(initial_response)) { + + nd_log_daemon(NDLP_ERR, "STREAM '%s' [receive from [%s]:%s]: " + "failed to reply." + , rpt->hostname + , rpt->client_ip, rpt->client_port + ); + } + + receiver_state_free(rpt); + return HTTP_RESP_OK; + } + + if(unlikely(web_client_streaming_rate_t > 0)) { + static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER; + static time_t last_stream_accepted_t = 0; + + time_t now = now_realtime_sec(); + spinlock_lock(&spinlock); + + if(unlikely(last_stream_accepted_t == 0)) + last_stream_accepted_t = now; + + if(now - last_stream_accepted_t < web_client_streaming_rate_t) { + spinlock_unlock(&spinlock); + + char msg[100 + 1]; + snprintfz(msg, sizeof(msg) - 1, + "rate limit, will accept new connection in %ld secs", + (long)(web_client_streaming_rate_t - (now - last_stream_accepted_t))); + + rrdpush_receive_log_status( + rpt, msg, + RRDPUSH_STATUS_RATE_LIMIT, NDLP_NOTICE); + + receiver_state_free(rpt); + return rrdpush_receiver_too_busy_now(w); + } + + last_stream_accepted_t = now; + spinlock_unlock(&spinlock); + } + + /* + * Quick path for rejecting multiple connections. The lock taken is fine-grained - it only protects the receiver + * pointer within the host (if a host exists). This protects against multiple concurrent web requests hitting + * separate threads within the web-server and landing here. The lock guards the thread-shutdown sequence that + * detaches the receiver from the host. If the host is being created (first time-access) then we also use the + * lock to prevent race-hazard (two threads try to create the host concurrently, one wins and the other does a + * lookup to the now-attached structure). + */ + + { + time_t age = 0; + bool receiver_stale = false; + bool receiver_working = false; + + rrd_rdlock(); + RRDHOST *host = rrdhost_find_by_guid(rpt->machine_guid); + if (unlikely(host && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) /* Ignore archived hosts. */ + host = NULL; + + if (host) { + spinlock_lock(&host->receiver_lock); + if (host->receiver) { + age = now_monotonic_sec() - host->receiver->last_msg_t; + + if (age < 30) + receiver_working = true; + else + receiver_stale = true; + } + spinlock_unlock(&host->receiver_lock); + } + rrd_rdunlock(); + + if (receiver_stale && stop_streaming_receiver(host, STREAM_HANDSHAKE_DISCONNECT_STALE_RECEIVER)) { + // we stopped the receiver + // we can proceed with this connection + receiver_stale = false; + + nd_log_daemon(NDLP_NOTICE, "STREAM '%s' [receive from [%s]:%s]: " + "stopped previous stale receiver to accept this one." + , rpt->hostname + , rpt->client_ip, rpt->client_port + ); + } + + if (receiver_working || receiver_stale) { + // another receiver is already connected + // try again later + + char msg[200 + 1]; + snprintfz(msg, sizeof(msg) - 1, + "multiple connections for same host, " + "old connection was last used %ld secs ago%s", + age, receiver_stale ? " (signaled old receiver to stop)" : " (new connection not accepted)"); + + rrdpush_receive_log_status( + rpt, msg, + RRDPUSH_STATUS_ALREADY_CONNECTED, NDLP_DEBUG); + + // Have not set WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET - caller should clean up + buffer_flush(w->response.data); + buffer_strcat(w->response.data, START_STREAMING_ERROR_ALREADY_STREAMING); + receiver_state_free(rpt); + return HTTP_RESP_CONFLICT; + } + } + + rrdpush_receiver_takeover_web_connection(w, rpt); + + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, THREAD_TAG_STREAM_RECEIVER "[%s]", rpt->hostname); + tag[NETDATA_THREAD_TAG_MAX] = '\0'; + + rpt->thread = nd_thread_create(tag, NETDATA_THREAD_OPTION_DEFAULT, rrdpush_receiver_thread, (void *)rpt); + if(!rpt->thread) { + rrdpush_receive_log_status( + rpt, "can't create receiver thread", + RRDPUSH_STATUS_INTERNAL_SERVER_ERROR, NDLP_ERR); + + buffer_flush(w->response.data); + buffer_strcat(w->response.data, "Can't handle this request"); + receiver_state_free(rpt); + return HTTP_RESP_INTERNAL_SERVER_ERROR; + } + + // prevent the caller from closing the streaming socket + return HTTP_RESP_OK; +} diff --git a/src/streaming/receiver.h b/src/streaming/receiver.h new file mode 100644 index 00000000000000..a1f2086088c783 --- /dev/null +++ b/src/streaming/receiver.h @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RECEIVER_H +#define NETDATA_RECEIVER_H + +#include "libnetdata/libnetdata.h" +#include "database/rrd.h" + +struct parser; + +struct receiver_state { + RRDHOST *host; + pid_t tid; + ND_THREAD *thread; + int fd; + char *key; + char *hostname; + char *registry_hostname; + char *machine_guid; + char *os; + char *timezone; // Unused? + char *abbrev_timezone; + int32_t utc_offset; + char *client_ip; // Duplicated in pluginsd + char *client_port; // Duplicated in pluginsd + char *program_name; // Duplicated in pluginsd + char *program_version; + struct rrdhost_system_info *system_info; + STREAM_CAPABILITIES capabilities; + time_t last_msg_t; + time_t connected_since_s; + + struct buffered_reader reader; + + uint16_t hops; + + struct { + bool shutdown; // signal the streaming parser to exit + STREAM_HANDSHAKE reason; + } exit; + + struct { + RRD_MEMORY_MODE mode; + int history; + int update_every; + int health_enabled; // CONFIG_BOOLEAN_YES, CONFIG_BOOLEAN_NO, CONFIG_BOOLEAN_AUTO + time_t alarms_delay; + uint32_t alarms_history; + int rrdpush_enabled; + const char *rrdpush_api_key; // DONT FREE - it is allocated in appconfig + const char *rrdpush_send_charts_matching; // DONT FREE - it is allocated in appconfig + bool rrdpush_enable_replication; + time_t rrdpush_seconds_to_replicate; + time_t rrdpush_replication_step; + const char *rrdpush_destination; // DONT FREE - it is allocated in appconfig + unsigned int rrdpush_compression; + STREAM_CAPABILITIES compression_priorities[COMPRESSION_ALGORITHM_MAX]; + } config; + + NETDATA_SSL ssl; + + time_t replication_first_time_t; + + struct decompressor_state decompressor; + /* + struct { + uint32_t count; + STREAM_NODE_INSTANCE *array; + } instances; +*/ + + // The parser pointer is safe to read and use, only when having the host receiver lock. + // Without this lock, the data pointed by the pointer may vanish randomly. + // Also, since the receiver sets it when it starts, it should be read with + // an atomic read. + struct parser *parser; + +#ifdef ENABLE_H2O + void *h2o_ctx; +#endif +}; + +#ifdef ENABLE_H2O +#define is_h2o_rrdpush(x) ((x)->h2o_ctx != NULL) +#define unless_h2o_rrdpush(x) if(!is_h2o_rrdpush(x)) +#endif + +int rrdpush_receiver_thread_spawn(struct web_client *w, char *decoded_query_string, void *h2o_ctx); + +void receiver_state_free(struct receiver_state *rpt); +bool stop_streaming_receiver(RRDHOST *host, STREAM_HANDSHAKE reason); + +#endif //NETDATA_RECEIVER_H diff --git a/src/streaming/replication.h b/src/streaming/replication.h index 9448199fb96bfc..27baeaf35c3393 100644 --- a/src/streaming/replication.h +++ b/src/streaming/replication.h @@ -5,6 +5,8 @@ #include "daemon/common.h" +struct parser; + struct replication_query_statistics { SPINLOCK spinlock; size_t queries_started; diff --git a/src/streaming/rrdhost-status.c b/src/streaming/rrdhost-status.c new file mode 100644 index 00000000000000..e03204b7ddc15a --- /dev/null +++ b/src/streaming/rrdhost-status.c @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdhost-status.h" + +const char *rrdhost_db_status_to_string(RRDHOST_DB_STATUS status) { + switch(status) { + default: + case RRDHOST_DB_STATUS_INITIALIZING: + return "initializing"; + + case RRDHOST_DB_STATUS_QUERYABLE: + return "online"; + } +} + +const char *rrdhost_db_liveness_to_string(RRDHOST_DB_LIVENESS status) { + switch(status) { + default: + case RRDHOST_DB_LIVENESS_STALE: + return "stale"; + + case RRDHOST_DB_LIVENESS_LIVE: + return "live"; + } +} + +const char *rrdhost_ingest_status_to_string(RRDHOST_INGEST_STATUS status) { + switch(status) { + case RRDHOST_INGEST_STATUS_ARCHIVED: + return "archived"; + + case RRDHOST_INGEST_STATUS_INITIALIZING: + return "initializing"; + + case RRDHOST_INGEST_STATUS_REPLICATING: + return "replicating"; + + case RRDHOST_INGEST_STATUS_ONLINE: + return "online"; + + default: + case RRDHOST_INGEST_STATUS_OFFLINE: + return "offline"; + } +} + +const char *rrdhost_ingest_type_to_string(RRDHOST_INGEST_TYPE type) { + switch(type) { + case RRDHOST_INGEST_TYPE_LOCALHOST: + return "localhost"; + + case RRDHOST_INGEST_TYPE_VIRTUAL: + return "virtual"; + + case RRDHOST_INGEST_TYPE_CHILD: + return "child"; + + default: + case RRDHOST_INGEST_TYPE_ARCHIVED: + return "archived"; + } +} + +const char *rrdhost_streaming_status_to_string(RRDHOST_STREAMING_STATUS status) { + switch(status) { + case RRDHOST_STREAM_STATUS_DISABLED: + return "disabled"; + + case RRDHOST_STREAM_STATUS_REPLICATING: + return "replicating"; + + case RRDHOST_STREAM_STATUS_ONLINE: + return "online"; + + default: + case RRDHOST_STREAM_STATUS_OFFLINE: + return "offline"; + } +} + +const char *rrdhost_ml_status_to_string(RRDHOST_ML_STATUS status) { + switch(status) { + case RRDHOST_ML_STATUS_RUNNING: + return "online"; + + case RRDHOST_ML_STATUS_OFFLINE: + return "offline"; + + default: + case RRDHOST_ML_STATUS_DISABLED: + return "disabled"; + } +} + +const char *rrdhost_ml_type_to_string(RRDHOST_ML_TYPE type) { + switch(type) { + case RRDHOST_ML_TYPE_SELF: + return "self"; + + case RRDHOST_ML_TYPE_RECEIVED: + return "received"; + + default: + case RRDHOST_ML_TYPE_DISABLED: + return "disabled"; + } +} + +const char *rrdhost_health_status_to_string(RRDHOST_HEALTH_STATUS status) { + switch(status) { + default: + case RRDHOST_HEALTH_STATUS_DISABLED: + return "disabled"; + + case RRDHOST_HEALTH_STATUS_INITIALIZING: + return "initializing"; + + case RRDHOST_HEALTH_STATUS_RUNNING: + return "online"; + } +} + +const char *rrdhost_dyncfg_status_to_string(RRDHOST_DYNCFG_STATUS status) { + switch(status) { + default: + case RRDHOST_DYNCFG_STATUS_UNAVAILABLE: + return "unavailable"; + + case RRDHOST_DYNCFG_STATUS_AVAILABLE: + return "online"; + } +} + +static NETDATA_DOUBLE rrdhost_sender_replication_completion_unsafe(RRDHOST *host, time_t now, size_t *instances) { + size_t charts = rrdhost_sender_replicating_charts(host); + NETDATA_DOUBLE completion; + if(!charts || !host->sender || !host->sender->replication.oldest_request_after_t) + completion = 100.0; + else if(!host->sender->replication.latest_completed_before_t || host->sender->replication.latest_completed_before_t < host->sender->replication.oldest_request_after_t) + completion = 0.0; + else { + time_t total = now - host->sender->replication.oldest_request_after_t; + time_t current = host->sender->replication.latest_completed_before_t - host->sender->replication.oldest_request_after_t; + completion = (NETDATA_DOUBLE) current * 100.0 / (NETDATA_DOUBLE) total; + } + + *instances = charts; + + return completion; +} + +void rrdhost_status(RRDHOST *host, time_t now, RRDHOST_STATUS *s) { + memset(s, 0, sizeof(*s)); + + s->host = host; + s->now = now; + + RRDHOST_FLAGS flags = __atomic_load_n(&host->flags, __ATOMIC_RELAXED); + + // --- dyncfg --- + + s->dyncfg.status = dyncfg_available_for_rrdhost(host) ? RRDHOST_DYNCFG_STATUS_AVAILABLE : RRDHOST_DYNCFG_STATUS_UNAVAILABLE; + + // --- db --- + + bool online = rrdhost_is_online(host); + + rrdhost_retention(host, now, online, &s->db.first_time_s, &s->db.last_time_s); + s->db.metrics = host->rrdctx.metrics; + s->db.instances = host->rrdctx.instances; + s->db.contexts = dictionary_entries(host->rrdctx.contexts); + if(!s->db.first_time_s || !s->db.last_time_s || !s->db.metrics || !s->db.instances || !s->db.contexts || + (flags & (RRDHOST_FLAG_PENDING_CONTEXT_LOAD))) + s->db.status = RRDHOST_DB_STATUS_INITIALIZING; + else + s->db.status = RRDHOST_DB_STATUS_QUERYABLE; + + s->db.mode = host->rrd_memory_mode; + + // --- ingest --- + + s->ingest.since = MAX(host->child_connect_time, host->child_disconnected_time); + s->ingest.reason = (online) ? STREAM_HANDSHAKE_NEVER : host->rrdpush_last_receiver_exit_reason; + + spinlock_lock(&host->receiver_lock); + s->ingest.hops = (host->system_info ? host->system_info->hops : (host == localhost) ? 0 : 1); + bool has_receiver = false; + if (host->receiver) { + has_receiver = true; + s->ingest.replication.instances = rrdhost_receiver_replicating_charts(host); + s->ingest.replication.completion = host->rrdpush_receiver_replication_percent; + s->ingest.replication.in_progress = s->ingest.replication.instances > 0; + + s->ingest.capabilities = host->receiver->capabilities; + s->ingest.peers = socket_peers(host->receiver->fd); + s->ingest.ssl = SSL_connection(&host->receiver->ssl); + } + spinlock_unlock(&host->receiver_lock); + + if (online) { + if(s->db.status == RRDHOST_DB_STATUS_INITIALIZING) + s->ingest.status = RRDHOST_INGEST_STATUS_INITIALIZING; + + else if (host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST)) { + s->ingest.status = RRDHOST_INGEST_STATUS_ONLINE; + s->ingest.since = netdata_start_time; + } + + else if (s->ingest.replication.in_progress) + s->ingest.status = RRDHOST_INGEST_STATUS_REPLICATING; + + else + s->ingest.status = RRDHOST_INGEST_STATUS_ONLINE; + } + else { + if (!s->ingest.since) { + s->ingest.status = RRDHOST_INGEST_STATUS_ARCHIVED; + s->ingest.since = s->db.last_time_s; + } + + else + s->ingest.status = RRDHOST_INGEST_STATUS_OFFLINE; + } + + if(host == localhost) + s->ingest.type = RRDHOST_INGEST_TYPE_LOCALHOST; + else if(has_receiver || rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED)) + s->ingest.type = RRDHOST_INGEST_TYPE_CHILD; + else if(rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST)) + s->ingest.type = RRDHOST_INGEST_TYPE_VIRTUAL; + else + s->ingest.type = RRDHOST_INGEST_TYPE_ARCHIVED; + + s->ingest.id = host->rrdpush_receiver_connection_counter; + + if(!s->ingest.since) + s->ingest.since = netdata_start_time; + + if(s->ingest.status == RRDHOST_INGEST_STATUS_ONLINE) + s->db.liveness = RRDHOST_DB_LIVENESS_LIVE; + else + s->db.liveness = RRDHOST_DB_LIVENESS_STALE; + + // --- stream --- + + if (!host->sender) { + s->stream.status = RRDHOST_STREAM_STATUS_DISABLED; + s->stream.hops = s->ingest.hops + 1; + } + else { + sender_lock(host->sender); + + s->stream.since = host->sender->last_state_since_t; + s->stream.peers = socket_peers(host->sender->rrdpush_sender_socket); + s->stream.ssl = SSL_connection(&host->sender->ssl); + + memcpy(s->stream.sent_bytes_on_this_connection_per_type, + host->sender->sent_bytes_on_this_connection_per_type, + MIN(sizeof(s->stream.sent_bytes_on_this_connection_per_type), + sizeof(host->sender->sent_bytes_on_this_connection_per_type))); + + if (rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED)) { + s->stream.hops = host->sender->hops; + s->stream.reason = STREAM_HANDSHAKE_NEVER; + s->stream.capabilities = host->sender->capabilities; + + s->stream.replication.completion = rrdhost_sender_replication_completion_unsafe(host, now, &s->stream.replication.instances); + s->stream.replication.in_progress = s->stream.replication.instances > 0; + + if(s->stream.replication.in_progress) + s->stream.status = RRDHOST_STREAM_STATUS_REPLICATING; + else + s->stream.status = RRDHOST_STREAM_STATUS_ONLINE; + + s->stream.compression = host->sender->compressor.initialized; + } + else { + s->stream.status = RRDHOST_STREAM_STATUS_OFFLINE; + s->stream.hops = s->ingest.hops + 1; + s->stream.reason = host->sender->exit.reason; + } + + sender_unlock(host->sender); + } + + s->stream.id = host->rrdpush_sender_connection_counter; + + if(!s->stream.since) + s->stream.since = netdata_start_time; + + // --- ml --- + + if(ml_host_get_host_status(host, &s->ml.metrics)) { + s->ml.type = RRDHOST_ML_TYPE_SELF; + + if(s->ingest.status == RRDHOST_INGEST_STATUS_OFFLINE || s->ingest.status == RRDHOST_INGEST_STATUS_ARCHIVED) + s->ml.status = RRDHOST_ML_STATUS_OFFLINE; + else + s->ml.status = RRDHOST_ML_STATUS_RUNNING; + } + else if(stream_has_capability(&s->ingest, STREAM_CAP_DATA_WITH_ML)) { + s->ml.type = RRDHOST_ML_TYPE_RECEIVED; + s->ml.status = RRDHOST_ML_STATUS_RUNNING; + } + else { + // does not receive ML, does not run ML + s->ml.type = RRDHOST_ML_TYPE_DISABLED; + s->ml.status = RRDHOST_ML_STATUS_DISABLED; + } + + // --- health --- + + if(host->health.health_enabled) { + if(flags & RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION) + s->health.status = RRDHOST_HEALTH_STATUS_INITIALIZING; + else { + s->health.status = RRDHOST_HEALTH_STATUS_RUNNING; + + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(host, rc) { + if (unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) + continue; + + switch (rc->status) { + default: + case RRDCALC_STATUS_REMOVED: + break; + + case RRDCALC_STATUS_CLEAR: + s->health.alerts.clear++; + break; + + case RRDCALC_STATUS_WARNING: + s->health.alerts.warning++; + break; + + case RRDCALC_STATUS_CRITICAL: + s->health.alerts.critical++; + break; + + case RRDCALC_STATUS_UNDEFINED: + s->health.alerts.undefined++; + break; + + case RRDCALC_STATUS_UNINITIALIZED: + s->health.alerts.uninitialized++; + break; + } + } + foreach_rrdcalc_in_rrdhost_done(rc); + } + } + else + s->health.status = RRDHOST_HEALTH_STATUS_DISABLED; +} diff --git a/src/streaming/rrdhost-status.h b/src/streaming/rrdhost-status.h new file mode 100644 index 00000000000000..21298e268cb361 --- /dev/null +++ b/src/streaming/rrdhost-status.h @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDHOST_STATUS_H +#define NETDATA_RRDHOST_STATUS_H + +#include "libnetdata/libnetdata.h" +#include "stream-handshake.h" +#include "stream-capabilities.h" +#include "database/rrd.h" + +typedef enum __attribute__((packed)) { + RRDHOST_DB_STATUS_INITIALIZING = 0, + RRDHOST_DB_STATUS_QUERYABLE, +} RRDHOST_DB_STATUS; + +const char *rrdhost_db_status_to_string(RRDHOST_DB_STATUS status); + +typedef enum __attribute__((packed)) { + RRDHOST_DB_LIVENESS_STALE = 0, + RRDHOST_DB_LIVENESS_LIVE, +} RRDHOST_DB_LIVENESS; + +const char *rrdhost_db_liveness_to_string(RRDHOST_DB_LIVENESS status); + +typedef enum __attribute__((packed)) { + RRDHOST_INGEST_STATUS_ARCHIVED = 0, + RRDHOST_INGEST_STATUS_INITIALIZING, + RRDHOST_INGEST_STATUS_REPLICATING, + RRDHOST_INGEST_STATUS_ONLINE, + RRDHOST_INGEST_STATUS_OFFLINE, +} RRDHOST_INGEST_STATUS; + +const char *rrdhost_ingest_status_to_string(RRDHOST_INGEST_STATUS status); + +typedef enum __attribute__((packed)) { + RRDHOST_INGEST_TYPE_LOCALHOST = 0, + RRDHOST_INGEST_TYPE_VIRTUAL, + RRDHOST_INGEST_TYPE_CHILD, + RRDHOST_INGEST_TYPE_ARCHIVED, +} RRDHOST_INGEST_TYPE; + +const char *rrdhost_ingest_type_to_string(RRDHOST_INGEST_TYPE type); + +typedef enum __attribute__((packed)) { + RRDHOST_STREAM_STATUS_DISABLED = 0, + RRDHOST_STREAM_STATUS_REPLICATING, + RRDHOST_STREAM_STATUS_ONLINE, + RRDHOST_STREAM_STATUS_OFFLINE, +} RRDHOST_STREAMING_STATUS; + +const char *rrdhost_streaming_status_to_string(RRDHOST_STREAMING_STATUS status); + +typedef enum __attribute__((packed)) { + RRDHOST_ML_STATUS_DISABLED = 0, + RRDHOST_ML_STATUS_OFFLINE, + RRDHOST_ML_STATUS_RUNNING, +} RRDHOST_ML_STATUS; + +const char *rrdhost_ml_status_to_string(RRDHOST_ML_STATUS status); + +typedef enum __attribute__((packed)) { + RRDHOST_ML_TYPE_DISABLED = 0, + RRDHOST_ML_TYPE_SELF, + RRDHOST_ML_TYPE_RECEIVED, +} RRDHOST_ML_TYPE; + +const char *rrdhost_ml_type_to_string(RRDHOST_ML_TYPE type); + +typedef enum __attribute__((packed)) { + RRDHOST_HEALTH_STATUS_DISABLED = 0, + RRDHOST_HEALTH_STATUS_INITIALIZING, + RRDHOST_HEALTH_STATUS_RUNNING, +} RRDHOST_HEALTH_STATUS; + +const char *rrdhost_health_status_to_string(RRDHOST_HEALTH_STATUS status); + +typedef enum __attribute__((packed)) { + RRDHOST_DYNCFG_STATUS_UNAVAILABLE = 0, + RRDHOST_DYNCFG_STATUS_AVAILABLE, +} RRDHOST_DYNCFG_STATUS; + +const char *rrdhost_dyncfg_status_to_string(RRDHOST_DYNCFG_STATUS status); + +typedef struct { + RRDHOST *host; + time_t now; + + struct { + RRDHOST_DYNCFG_STATUS status; + } dyncfg; + + struct { + RRDHOST_DB_STATUS status; + RRDHOST_DB_LIVENESS liveness; + RRD_MEMORY_MODE mode; + time_t first_time_s; + time_t last_time_s; + size_t metrics; + size_t instances; + size_t contexts; + } db; + + struct { + RRDHOST_ML_STATUS status; + RRDHOST_ML_TYPE type; + struct ml_metrics_statistics metrics; + } ml; + + struct { + size_t hops; + RRDHOST_INGEST_TYPE type; + RRDHOST_INGEST_STATUS status; + SOCKET_PEERS peers; + bool ssl; + STREAM_CAPABILITIES capabilities; + uint32_t id; + time_t since; + STREAM_HANDSHAKE reason; + + struct { + bool in_progress; + NETDATA_DOUBLE completion; + size_t instances; + } replication; + } ingest; + + struct { + size_t hops; + RRDHOST_STREAMING_STATUS status; + SOCKET_PEERS peers; + bool ssl; + bool compression; + STREAM_CAPABILITIES capabilities; + uint32_t id; + time_t since; + STREAM_HANDSHAKE reason; + + struct { + bool in_progress; + NETDATA_DOUBLE completion; + size_t instances; + } replication; + + size_t sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_MAX]; + } stream; + + struct { + RRDHOST_HEALTH_STATUS status; + struct { + uint32_t undefined; + uint32_t uninitialized; + uint32_t clear; + uint32_t warning; + uint32_t critical; + } alerts; + } health; +} RRDHOST_STATUS; + +void rrdhost_status(RRDHOST *host, time_t now, RRDHOST_STATUS *s); + +#endif //NETDATA_RRDHOST_STATUS_H diff --git a/src/streaming/rrdpush.c b/src/streaming/rrdpush.c deleted file mode 100644 index d9777b30e662ca..00000000000000 --- a/src/streaming/rrdpush.c +++ /dev/null @@ -1,1258 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "rrdpush.h" - -/* - * rrdpush - * - * 3 threads are involved for all stream operations - * - * 1. a random data collection thread, calling rrdset_done_push() - * this is called for each chart. - * - * the output of this work is kept in a thread BUFFER - * the sender thread is signalled via a pipe (in RRDHOST) - * - * 2. a sender thread running at the sending netdata - * this is spawned automatically on the first chart to be pushed - * - * It tries to push the metrics to the remote netdata, as fast - * as possible (i.e. immediately after they are collected). - * - * 3. a receiver thread, running at the receiving netdata - * this is spawned automatically when the sender connects to - * the receiver. - * - */ - -struct config stream_config = APPCONFIG_INITIALIZER; - -unsigned int default_rrdpush_enabled = 0; - -unsigned int default_rrdpush_compression_enabled = 1; -const char *default_rrdpush_destination = NULL; -const char *default_rrdpush_api_key = NULL; -const char *default_rrdpush_send_charts_matching = "*"; -bool default_rrdpush_enable_replication = true; -time_t default_rrdpush_seconds_to_replicate = 86400; -time_t default_rrdpush_replication_step = 600; -const char *netdata_ssl_ca_path = NULL; -const char *netdata_ssl_ca_file = NULL; - -static void load_stream_conf() { - errno_clear(); - char *filename = filename_from_path_entry_strdupz(netdata_configured_user_config_dir, "stream.conf"); - if(!appconfig_load(&stream_config, filename, 0, NULL)) { - nd_log_daemon(NDLP_NOTICE, "CONFIG: cannot load user config '%s'. Will try stock config.", filename); - freez(filename); - - filename = filename_from_path_entry_strdupz(netdata_configured_stock_config_dir, "stream.conf"); - if(!appconfig_load(&stream_config, filename, 0, NULL)) - nd_log_daemon(NDLP_NOTICE, "CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename); - } - - freez(filename); - - appconfig_move(&stream_config, - CONFIG_SECTION_STREAM, "timeout seconds", - CONFIG_SECTION_STREAM, "timeout"); - - appconfig_move(&stream_config, - CONFIG_SECTION_STREAM, "reconnect delay seconds", - CONFIG_SECTION_STREAM, "reconnect delay"); - - appconfig_move_everywhere(&stream_config, "default memory mode", "db"); - appconfig_move_everywhere(&stream_config, "memory mode", "db"); - appconfig_move_everywhere(&stream_config, "db mode", "db"); - appconfig_move_everywhere(&stream_config, "default history", "retention"); - appconfig_move_everywhere(&stream_config, "history", "retention"); - appconfig_move_everywhere(&stream_config, "default proxy enabled", "proxy enabled"); - appconfig_move_everywhere(&stream_config, "default proxy destination", "proxy destination"); - appconfig_move_everywhere(&stream_config, "default proxy api key", "proxy api key"); - appconfig_move_everywhere(&stream_config, "default proxy send charts matching", "proxy send charts matching"); - appconfig_move_everywhere(&stream_config, "default health log history", "health log retention"); - appconfig_move_everywhere(&stream_config, "health log history", "health log retention"); - appconfig_move_everywhere(&stream_config, "seconds to replicate", "replication period"); - appconfig_move_everywhere(&stream_config, "seconds per replication step", "replication step"); - appconfig_move_everywhere(&stream_config, "default postpone alarms on connect seconds", "postpone alerts on connect"); - appconfig_move_everywhere(&stream_config, "postpone alarms on connect seconds", "postpone alerts on connect"); -} - -bool rrdpush_receiver_needs_dbengine(void) { - return stream_conf_needs_dbengine(&stream_config); -} - -int rrdpush_init() { - // -------------------------------------------------------------------- - // load stream.conf - load_stream_conf(); - - default_rrdpush_enabled = - (unsigned int)appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "enabled", default_rrdpush_enabled); - - default_rrdpush_destination = - appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "destination", ""); - - default_rrdpush_api_key = - appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "api key", ""); - - default_rrdpush_send_charts_matching = - appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "send charts matching", default_rrdpush_send_charts_matching); - - default_rrdpush_enable_replication = - config_get_boolean(CONFIG_SECTION_DB, "enable replication", default_rrdpush_enable_replication); - - default_rrdpush_seconds_to_replicate = - config_get_duration_seconds(CONFIG_SECTION_DB, "replication period", default_rrdpush_seconds_to_replicate); - - default_rrdpush_replication_step = - config_get_duration_seconds(CONFIG_SECTION_DB, "replication step", default_rrdpush_replication_step); - - rrdhost_free_orphan_time_s = - config_get_duration_seconds(CONFIG_SECTION_DB, "cleanup orphan hosts after", rrdhost_free_orphan_time_s); - - default_rrdpush_compression_enabled = - (unsigned int)appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, - "enable compression", default_rrdpush_compression_enabled); - - rrdpush_compression_levels[COMPRESSION_ALGORITHM_BROTLI] = (int)appconfig_get_number( - &stream_config, CONFIG_SECTION_STREAM, "brotli compression level", - rrdpush_compression_levels[COMPRESSION_ALGORITHM_BROTLI]); - - rrdpush_compression_levels[COMPRESSION_ALGORITHM_ZSTD] = (int)appconfig_get_number( - &stream_config, CONFIG_SECTION_STREAM, "zstd compression level", - rrdpush_compression_levels[COMPRESSION_ALGORITHM_ZSTD]); - - rrdpush_compression_levels[COMPRESSION_ALGORITHM_LZ4] = (int)appconfig_get_number( - &stream_config, CONFIG_SECTION_STREAM, "lz4 compression acceleration", - rrdpush_compression_levels[COMPRESSION_ALGORITHM_LZ4]); - - rrdpush_compression_levels[COMPRESSION_ALGORITHM_GZIP] = (int)appconfig_get_number( - &stream_config, CONFIG_SECTION_STREAM, "gzip compression level", - rrdpush_compression_levels[COMPRESSION_ALGORITHM_GZIP]); - - if(default_rrdpush_enabled && (!default_rrdpush_destination || !*default_rrdpush_destination || !default_rrdpush_api_key || !*default_rrdpush_api_key)) { - nd_log_daemon(NDLP_WARNING, "STREAM [send]: cannot enable sending thread - information is missing."); - default_rrdpush_enabled = 0; - } - - netdata_ssl_validate_certificate_sender = !appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "ssl skip certificate verification", !netdata_ssl_validate_certificate); - - if(!netdata_ssl_validate_certificate_sender) - nd_log_daemon(NDLP_NOTICE, "SSL: streaming senders will skip SSL certificates verification."); - - netdata_ssl_ca_path = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CApath", NULL); - netdata_ssl_ca_file = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CAfile", NULL); - - return default_rrdpush_enabled; -} - -// data collection happens from multiple threads -// each of these threads calls rrdset_done() -// which in turn calls rrdset_done_push() -// which uses this pipe to notify the streaming thread -// that there are more data ready to be sent -#define PIPE_READ 0 -#define PIPE_WRITE 1 - -// to have the remote netdata re-sync the charts -// to its current clock, we send for this many -// iterations a BEGIN line without microseconds -// this is for the first iterations of each chart -unsigned int remote_clock_resync_iterations = 60; - -static inline bool should_send_chart_matching(RRDSET *st, RRDSET_FLAGS flags) { - if(!(flags & RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED)) - return false; - - if(unlikely(!(flags & (RRDSET_FLAG_UPSTREAM_SEND | RRDSET_FLAG_UPSTREAM_IGNORE)))) { - RRDHOST *host = st->rrdhost; - - if (flags & RRDSET_FLAG_ANOMALY_DETECTION) { - if(ml_streaming_enabled()) - rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND); - else - rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE); - } - else { - int negative = 0, positive = 0; - SIMPLE_PATTERN_RESULT r; - - r = simple_pattern_matches_string_extract(host->rrdpush.send.charts_matching, st->context, NULL, 0); - if(r == SP_MATCHED_POSITIVE) positive++; - else if(r == SP_MATCHED_NEGATIVE) negative++; - - if(!negative) { - r = simple_pattern_matches_string_extract(host->rrdpush.send.charts_matching, st->name, NULL, 0); - if (r == SP_MATCHED_POSITIVE) positive++; - else if (r == SP_MATCHED_NEGATIVE) negative++; - } - - if(!negative) { - r = simple_pattern_matches_string_extract(host->rrdpush.send.charts_matching, st->id, NULL, 0); - if (r == SP_MATCHED_POSITIVE) positive++; - else if (r == SP_MATCHED_NEGATIVE) negative++; - } - - if(!negative && positive) - rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND); - else - rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE); - } - - // get the flags again, to know how to respond - flags = rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND|RRDSET_FLAG_UPSTREAM_IGNORE); - } - - return flags & RRDSET_FLAG_UPSTREAM_SEND; -} - -int configured_as_parent() { - return stream_conf_has_uuid_section(&stream_config); -} - -// chart labels -static int send_clabels_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { - BUFFER *wb = (BUFFER *)data; - buffer_sprintf(wb, PLUGINSD_KEYWORD_CLABEL " \"%s\" \"%s\" %d\n", name, value, ls & ~(RRDLABEL_FLAG_INTERNAL)); - return 1; -} - -static void rrdpush_send_clabels(BUFFER *wb, RRDSET *st) { - if (st->rrdlabels) { - if(rrdlabels_walkthrough_read(st->rrdlabels, send_clabels_callback, wb) > 0) - buffer_sprintf(wb, PLUGINSD_KEYWORD_CLABEL_COMMIT "\n"); - } -} - -// Send the current chart definition. -// Assumes that collector thread has already called sender_start for mutex / buffer state. -static inline bool rrdpush_send_chart_definition(BUFFER *wb, RRDSET *st) { - uint32_t version = rrdset_metadata_version(st); - - RRDHOST *host = st->rrdhost; - NUMBER_ENCODING integer_encoding = stream_has_capability(host->sender, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; - bool with_slots = stream_has_capability(host->sender, STREAM_CAP_SLOTS) ? true : false; - - bool replication_progress = false; - - // properly set the name for the remote end to parse it - char *name = ""; - if(likely(st->name)) { - if(unlikely(st->id != st->name)) { - // they differ - name = strchr(rrdset_name(st), '.'); - if(name) - name++; - else - name = ""; - } - } - - buffer_fast_strcat(wb, PLUGINSD_KEYWORD_CHART, sizeof(PLUGINSD_KEYWORD_CHART) - 1); - - if(with_slots) { - buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); - buffer_print_uint64_encoded(wb, integer_encoding, st->rrdpush.sender.chart_slot); - } - - // send the chart - buffer_sprintf( - wb - , " \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" %d %d \"%s %s %s\" \"%s\" \"%s\"\n" - , rrdset_id(st) - , name - , rrdset_title(st) - , rrdset_units(st) - , rrdset_family(st) - , rrdset_context(st) - , rrdset_type_name(st->chart_type) - , st->priority - , st->update_every - , rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)?"obsolete":"" - , rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)?"store_first":"" - , rrdset_flag_check(st, RRDSET_FLAG_HIDDEN)?"hidden":"" - , rrdset_plugin_name(st) - , rrdset_module_name(st) - ); - - // send the chart labels - if (stream_has_capability(host->sender, STREAM_CAP_CLABELS)) - rrdpush_send_clabels(wb, st); - - // send the dimensions - RRDDIM *rd; - rrddim_foreach_read(rd, st) { - buffer_fast_strcat(wb, PLUGINSD_KEYWORD_DIMENSION, sizeof(PLUGINSD_KEYWORD_DIMENSION) - 1); - - if(with_slots) { - buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); - buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdpush.sender.dim_slot); - } - - buffer_sprintf( - wb - , " \"%s\" \"%s\" \"%s\" %d %d \"%s %s %s\"\n" - , rrddim_id(rd) - , rrddim_name(rd) - , rrd_algorithm_name(rd->algorithm) - , rd->multiplier - , rd->divisor - , rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)?"obsolete":"" - , rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN)?"hidden":"" - , rrddim_option_check(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":"" - ); - } - rrddim_foreach_done(rd); - - // send the chart functions - if(stream_has_capability(host->sender, STREAM_CAP_FUNCTIONS)) - rrd_chart_functions_expose_rrdpush(st, wb); - - // send the chart local custom variables - rrdvar_print_to_streaming_custom_chart_variables(st, wb); - - if (stream_has_capability(host->sender, STREAM_CAP_REPLICATION)) { - time_t db_first_time_t, db_last_time_t; - - time_t now = now_realtime_sec(); - rrdset_get_retention_of_tier_for_collected_chart(st, &db_first_time_t, &db_last_time_t, now, 0); - - buffer_sprintf(wb, PLUGINSD_KEYWORD_CHART_DEFINITION_END " %llu %llu %llu\n", - (unsigned long long)db_first_time_t, - (unsigned long long)db_last_time_t, - (unsigned long long)now); - - if(!rrdset_flag_check(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS)) { - rrdset_flag_set(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS); - rrdset_flag_clear(st, RRDSET_FLAG_SENDER_REPLICATION_FINISHED); - rrdhost_sender_replicating_charts_plus_one(st->rrdhost); - } - replication_progress = true; - -#ifdef NETDATA_LOG_REPLICATION_REQUESTS - internal_error(true, "REPLAY: 'host:%s/chart:%s' replication starts", - rrdhost_hostname(st->rrdhost), rrdset_id(st)); -#endif - } - - sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA); - - // we can set the exposed flag, after we commit the buffer - // because replication may pick it up prematurely - rrddim_foreach_read(rd, st) { - rrddim_metadata_exposed_upstream(rd, version); - } - rrddim_foreach_done(rd); - rrdset_metadata_exposed_upstream(st, version); - - st->rrdpush.sender.resync_time_s = st->last_collected_time.tv_sec + (remote_clock_resync_iterations * st->update_every); - return replication_progress; -} - -// sends the current chart dimensions -static void rrdpush_send_chart_metrics(BUFFER *wb, RRDSET *st, struct sender_state *s __maybe_unused, RRDSET_FLAGS flags) { - buffer_fast_strcat(wb, "BEGIN \"", 7); - buffer_fast_strcat(wb, rrdset_id(st), string_strlen(st->id)); - buffer_fast_strcat(wb, "\" ", 2); - - if(st->last_collected_time.tv_sec > st->rrdpush.sender.resync_time_s) - buffer_print_uint64(wb, st->usec_since_last_update); - else - buffer_fast_strcat(wb, "0", 1); - - buffer_fast_strcat(wb, "\n", 1); - - RRDDIM *rd; - rrddim_foreach_read(rd, st) { - if(unlikely(!rrddim_check_updated(rd))) - continue; - - if(likely(rrddim_check_upstream_exposed_collector(rd))) { - buffer_fast_strcat(wb, "SET \"", 5); - buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id)); - buffer_fast_strcat(wb, "\" = ", 4); - buffer_print_int64(wb, rd->collector.collected_value); - buffer_fast_strcat(wb, "\n", 1); - } - else { - internal_error(true, "STREAM: 'host:%s/chart:%s/dim:%s' flag 'exposed' is updated but not exposed", - rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); - // we will include it in the next iteration - rrddim_metadata_updated(rd); - } - } - rrddim_foreach_done(rd); - - if(unlikely(flags & RRDSET_FLAG_UPSTREAM_SEND_VARIABLES)) - rrdvar_print_to_streaming_custom_chart_variables(st, wb); - - buffer_fast_strcat(wb, "END\n", 4); -} - -static void rrdpush_sender_thread_spawn(RRDHOST *host); - -// Called from the internal collectors to mark a chart obsolete. -bool rrdset_push_chart_definition_now(RRDSET *st) { - RRDHOST *host = st->rrdhost; - - if(unlikely(!rrdhost_can_send_definitions_to_parent(host) - || !should_send_chart_matching(st, rrdset_flag_get(st)))) { - return false; - } - - BUFFER *wb = sender_start(host->sender); - rrdpush_send_chart_definition(wb, st); - sender_thread_buffer_free(); - - return true; -} - -void rrdset_push_metrics_v1(RRDSET_STREAM_BUFFER *rsb, RRDSET *st) { - RRDHOST *host = st->rrdhost; - rrdpush_send_chart_metrics(rsb->wb, st, host->sender, rsb->rrdset_flags); -} - -void rrddim_push_metrics_v2(RRDSET_STREAM_BUFFER *rsb, RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags) { - if(!rsb->wb || !rsb->v2 || !netdata_double_isnumber(n) || !does_storage_number_exist(flags)) - return; - - bool with_slots = stream_has_capability(rsb, STREAM_CAP_SLOTS) ? true : false; - NUMBER_ENCODING integer_encoding = stream_has_capability(rsb, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; - NUMBER_ENCODING doubles_encoding = stream_has_capability(rsb, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL; - BUFFER *wb = rsb->wb; - time_t point_end_time_s = (time_t)(point_end_time_ut / USEC_PER_SEC); - if(unlikely(rsb->last_point_end_time_s != point_end_time_s)) { - - if(unlikely(rsb->begin_v2_added)) - buffer_fast_strcat(wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1); - - buffer_fast_strcat(wb, PLUGINSD_KEYWORD_BEGIN_V2, sizeof(PLUGINSD_KEYWORD_BEGIN_V2) - 1); - - if(with_slots) { - buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); - buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdset->rrdpush.sender.chart_slot); - } - - buffer_fast_strcat(wb, " '", 2); - buffer_fast_strcat(wb, rrdset_id(rd->rrdset), string_strlen(rd->rrdset->id)); - buffer_fast_strcat(wb, "' ", 2); - buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdset->update_every); - buffer_fast_strcat(wb, " ", 1); - buffer_print_uint64_encoded(wb, integer_encoding, point_end_time_s); - buffer_fast_strcat(wb, " ", 1); - if(point_end_time_s == rsb->wall_clock_time) - buffer_fast_strcat(wb, "#", 1); - else - buffer_print_uint64_encoded(wb, integer_encoding, rsb->wall_clock_time); - buffer_fast_strcat(wb, "\n", 1); - - rsb->last_point_end_time_s = point_end_time_s; - rsb->begin_v2_added = true; - } - - buffer_fast_strcat(wb, PLUGINSD_KEYWORD_SET_V2, sizeof(PLUGINSD_KEYWORD_SET_V2) - 1); - - if(with_slots) { - buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); - buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdpush.sender.dim_slot); - } - - buffer_fast_strcat(wb, " '", 2); - buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id)); - buffer_fast_strcat(wb, "' ", 2); - buffer_print_int64_encoded(wb, integer_encoding, rd->collector.last_collected_value); - buffer_fast_strcat(wb, " ", 1); - - if((NETDATA_DOUBLE)rd->collector.last_collected_value == n) - buffer_fast_strcat(wb, "#", 1); - else - buffer_print_netdata_double_encoded(wb, doubles_encoding, n); - - buffer_fast_strcat(wb, " ", 1); - buffer_print_sn_flags(wb, flags, true); - buffer_fast_strcat(wb, "\n", 1); -} - -void rrdset_push_metrics_finished(RRDSET_STREAM_BUFFER *rsb, RRDSET *st) { - if(!rsb->wb) - return; - - if(rsb->v2 && rsb->begin_v2_added) { - if(unlikely(rsb->rrdset_flags & RRDSET_FLAG_UPSTREAM_SEND_VARIABLES)) - rrdvar_print_to_streaming_custom_chart_variables(st, rsb->wb); - - buffer_fast_strcat(rsb->wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1); - } - - sender_commit(st->rrdhost->sender, rsb->wb, STREAM_TRAFFIC_TYPE_DATA); - - *rsb = (RRDSET_STREAM_BUFFER){ .wb = NULL, }; -} - -RRDSET_STREAM_BUFFER rrdset_push_metric_initialize(RRDSET *st, time_t wall_clock_time) { - RRDHOST *host = st->rrdhost; - - // fetch the flags we need to check with one atomic operation - RRDHOST_FLAGS host_flags = __atomic_load_n(&host->flags, __ATOMIC_SEQ_CST); - - // check if we are not connected - if(unlikely(!(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS))) { - - if(unlikely(!(host_flags & (RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN | RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED)))) - rrdpush_sender_thread_spawn(host); - - if(unlikely(!(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS))) { - rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS); - nd_log_daemon(NDLP_NOTICE, "STREAM %s [send]: not ready - collected metrics are not sent to parent.", rrdhost_hostname(host)); - } - - return (RRDSET_STREAM_BUFFER) { .wb = NULL, }; - } - else if(unlikely(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS)) { - nd_log_daemon(NDLP_INFO, "STREAM %s [send]: sending metrics to parent...", rrdhost_hostname(host)); - rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS); - } - - if(unlikely(host_flags & RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED)) { - BUFFER *wb = sender_start(host->sender); - rrd_global_functions_expose_rrdpush(host, wb, stream_has_capability(host->sender, STREAM_CAP_DYNCFG)); - sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); - } - - bool exposed_upstream = rrdset_check_upstream_exposed(st); - RRDSET_FLAGS rrdset_flags = rrdset_flag_get(st); - bool replication_in_progress = !(rrdset_flags & RRDSET_FLAG_SENDER_REPLICATION_FINISHED); - - if(unlikely((exposed_upstream && replication_in_progress) || - !should_send_chart_matching(st, rrdset_flags))) - return (RRDSET_STREAM_BUFFER) { .wb = NULL, }; - - if(unlikely(!exposed_upstream)) { - BUFFER *wb = sender_start(host->sender); - replication_in_progress = rrdpush_send_chart_definition(wb, st); - } - - if(replication_in_progress) - return (RRDSET_STREAM_BUFFER) { .wb = NULL, }; - - return (RRDSET_STREAM_BUFFER) { - .capabilities = host->sender->capabilities, - .v2 = stream_has_capability(host->sender, STREAM_CAP_INTERPOLATED), - .rrdset_flags = rrdset_flags, - .wb = sender_start(host->sender), - .wall_clock_time = wall_clock_time, - }; -} - -// labels -static int send_labels_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { - BUFFER *wb = (BUFFER *)data; - buffer_sprintf(wb, "LABEL \"%s\" = %d \"%s\"\n", name, ls, value); - return 1; -} - -void rrdpush_send_host_labels(RRDHOST *host) { - if(unlikely(!rrdhost_can_send_definitions_to_parent(host) - || !stream_has_capability(host->sender, STREAM_CAP_HLABELS))) - return; - - BUFFER *wb = sender_start(host->sender); - - rrdlabels_walkthrough_read(host->rrdlabels, send_labels_callback, wb); - buffer_sprintf(wb, "OVERWRITE %s\n", "labels"); - - sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA); - - sender_thread_buffer_free(); -} - -void rrdpush_send_global_functions(RRDHOST *host) { - if(!stream_has_capability(host->sender, STREAM_CAP_FUNCTIONS)) - return; - - if(unlikely(!rrdhost_can_send_definitions_to_parent(host))) - return; - - BUFFER *wb = sender_start(host->sender); - - rrd_global_functions_expose_rrdpush(host, wb, stream_has_capability(host->sender, STREAM_CAP_DYNCFG)); - - sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); - - sender_thread_buffer_free(); -} - -int connect_to_one_of_destinations( - RRDHOST *host, - int default_port, - struct timeval *timeout, - size_t *reconnects_counter, - char *connected_to, - size_t connected_to_size, - struct rrdpush_destinations **destination) -{ - int sock = -1; - - for (struct rrdpush_destinations *d = host->destinations; d; d = d->next) { - time_t now = now_realtime_sec(); - - if(nd_thread_signaled_to_cancel()) - return -1; - - if(d->postpone_reconnection_until > now) - continue; - - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM %s: connecting to '%s' (default port: %d)...", - rrdhost_hostname(host), string2str(d->destination), default_port); - - if (reconnects_counter) - *reconnects_counter += 1; - - d->since = now; - d->attempts++; - sock = connect_to_this(string2str(d->destination), default_port, timeout); - - if (sock != -1) { - if (connected_to && connected_to_size) - strncpyz(connected_to, string2str(d->destination), connected_to_size); - - *destination = d; - - // move the current item to the end of the list - // without this, this destination will break the loop again and again - // not advancing the destinations to find one that may work - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(host->destinations, d, prev, next); - DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(host->destinations, d, prev, next); - - break; - } - } - - return sock; -} - -struct destinations_init_tmp { - RRDHOST *host; - struct rrdpush_destinations *list; - int count; -}; - -bool destinations_init_add_one(char *entry, void *data) { - struct destinations_init_tmp *t = data; - - struct rrdpush_destinations *d = callocz(1, sizeof(struct rrdpush_destinations)); - char *colon_ssl = strstr(entry, ":SSL"); - if(colon_ssl) { - *colon_ssl = '\0'; - d->ssl = true; - } - else - d->ssl = false; - - d->destination = string_strdupz(entry); - - __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(struct rrdpush_destinations), __ATOMIC_RELAXED); - - DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(t->list, d, prev, next); - - t->count++; - nd_log_daemon(NDLP_INFO, "STREAM: added streaming destination No %d: '%s' to host '%s'", t->count, string2str(d->destination), rrdhost_hostname(t->host)); - - return false; // we return false, so that we will get all defined destinations -} - -void rrdpush_destinations_init(RRDHOST *host) { - if(!host->rrdpush.send.destination) return; - - rrdpush_destinations_free(host); - - struct destinations_init_tmp t = { - .host = host, - .list = NULL, - .count = 0, - }; - - foreach_entry_in_connection_string(host->rrdpush.send.destination, destinations_init_add_one, &t); - - host->destinations = t.list; -} - -void rrdpush_destinations_free(RRDHOST *host) { - while (host->destinations) { - struct rrdpush_destinations *tmp = host->destinations; - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(host->destinations, tmp, prev, next); - string_freez(tmp->destination); - freez(tmp); - __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(struct rrdpush_destinations), __ATOMIC_RELAXED); - } - - host->destinations = NULL; -} - -// ---------------------------------------------------------------------------- -// rrdpush sender thread - -// Either the receiver lost the connection or the host is being destroyed. -// The sender mutex guards thread creation, any spurious data is wiped on reconnection. -void rrdpush_sender_thread_stop(RRDHOST *host, STREAM_HANDSHAKE reason, bool wait) { - if (!host->sender) - return; - - sender_lock(host->sender); - - if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN)) { - - host->sender->exit.shutdown = true; - host->sender->exit.reason = reason; - - // signal it to cancel - nd_thread_signal_cancel(host->rrdpush_sender_thread); - } - - sender_unlock(host->sender); - - if(wait) { - sender_lock(host->sender); - while(host->sender->tid) { - sender_unlock(host->sender); - sleep_usec(10 * USEC_PER_MS); - sender_lock(host->sender); - } - sender_unlock(host->sender); - } -} - -// ---------------------------------------------------------------------------- -// rrdpush receiver thread - -static void rrdpush_sender_thread_spawn(RRDHOST *host) { - sender_lock(host->sender); - - if(!rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN)) { - char tag[NETDATA_THREAD_TAG_MAX + 1]; - snprintfz(tag, NETDATA_THREAD_TAG_MAX, THREAD_TAG_STREAM_SENDER "[%s]", rrdhost_hostname(host)); - - host->rrdpush_sender_thread = nd_thread_create(tag, NETDATA_THREAD_OPTION_DEFAULT, - rrdpush_sender_thread, (void *)host->sender); - if(!host->rrdpush_sender_thread) - nd_log_daemon(NDLP_ERR, "STREAM %s [send]: failed to create new thread for client.", rrdhost_hostname(host)); - else - rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN); - } - - sender_unlock(host->sender); -} - -int rrdpush_receiver_permission_denied(struct web_client *w) { - // we always respond with the same message and error code - // to prevent an attacker from gaining info about the error - buffer_flush(w->response.data); - buffer_strcat(w->response.data, START_STREAMING_ERROR_NOT_PERMITTED); - return HTTP_RESP_UNAUTHORIZED; -} - -int rrdpush_receiver_too_busy_now(struct web_client *w) { - // we always respond with the same message and error code - // to prevent an attacker from gaining info about the error - buffer_flush(w->response.data); - buffer_strcat(w->response.data, START_STREAMING_ERROR_BUSY_TRY_LATER); - return HTTP_RESP_SERVICE_UNAVAILABLE; -} - -static void rrdpush_receiver_takeover_web_connection(struct web_client *w, struct receiver_state *rpt) { - rpt->fd = w->ifd; - - rpt->ssl.conn = w->ssl.conn; - rpt->ssl.state = w->ssl.state; - - w->ssl = NETDATA_SSL_UNSET_CONNECTION; - - WEB_CLIENT_IS_DEAD(w); - - if(web_server_mode == WEB_SERVER_MODE_STATIC_THREADED) { - web_client_flag_set(w, WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET); - } - else { - if(w->ifd == w->ofd) - w->ifd = w->ofd = -1; - else - w->ifd = -1; - } - - buffer_flush(w->response.data); -} - -void *rrdpush_receiver_thread(void *ptr); -int rrdpush_receiver_thread_spawn(struct web_client *w, char *decoded_query_string, void *h2o_ctx __maybe_unused) { - - if(!service_running(ABILITY_STREAMING_CONNECTIONS)) - return rrdpush_receiver_too_busy_now(w); - - struct receiver_state *rpt = callocz(1, sizeof(*rpt)); - rpt->connected_since_s = now_realtime_sec(); - rpt->last_msg_t = now_monotonic_sec(); - rpt->hops = 1; - - rpt->capabilities = STREAM_CAP_INVALID; - -#ifdef ENABLE_H2O - rpt->h2o_ctx = h2o_ctx; -#endif - - __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_receivers, sizeof(*rpt), __ATOMIC_RELAXED); - __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED); - - rpt->system_info = callocz(1, sizeof(struct rrdhost_system_info)); - rpt->system_info->hops = rpt->hops; - - rpt->fd = -1; - rpt->client_ip = strdupz(w->client_ip); - rpt->client_port = strdupz(w->client_port); - - rpt->ssl = NETDATA_SSL_UNSET_CONNECTION; - - rpt->config.update_every = default_rrd_update_every; - - // parse the parameters and fill rpt and rpt->system_info - - while(decoded_query_string) { - char *value = strsep_skip_consecutive_separators(&decoded_query_string, "&"); - if(!value || !*value) continue; - - char *name = strsep_skip_consecutive_separators(&value, "="); - if(!name || !*name) continue; - if(!value || !*value) continue; - - if(!strcmp(name, "key") && !rpt->key) - rpt->key = strdupz(value); - - else if(!strcmp(name, "hostname") && !rpt->hostname) - rpt->hostname = strdupz(value); - - else if(!strcmp(name, "registry_hostname") && !rpt->registry_hostname) - rpt->registry_hostname = strdupz(value); - - else if(!strcmp(name, "machine_guid") && !rpt->machine_guid) - rpt->machine_guid = strdupz(value); - - else if(!strcmp(name, "update_every")) - rpt->config.update_every = (int)strtoul(value, NULL, 0); - - else if(!strcmp(name, "os") && !rpt->os) - rpt->os = strdupz(value); - - else if(!strcmp(name, "timezone") && !rpt->timezone) - rpt->timezone = strdupz(value); - - else if(!strcmp(name, "abbrev_timezone") && !rpt->abbrev_timezone) - rpt->abbrev_timezone = strdupz(value); - - else if(!strcmp(name, "utc_offset")) - rpt->utc_offset = (int32_t)strtol(value, NULL, 0); - - else if(!strcmp(name, "hops")) - rpt->hops = rpt->system_info->hops = (uint16_t) strtoul(value, NULL, 0); - - else if(!strcmp(name, "ml_capable")) - rpt->system_info->ml_capable = strtoul(value, NULL, 0); - - else if(!strcmp(name, "ml_enabled")) - rpt->system_info->ml_enabled = strtoul(value, NULL, 0); - - else if(!strcmp(name, "mc_version")) - rpt->system_info->mc_version = strtoul(value, NULL, 0); - - else if(!strcmp(name, "ver") && (rpt->capabilities & STREAM_CAP_INVALID)) - rpt->capabilities = convert_stream_version_to_capabilities(strtoul(value, NULL, 0), NULL, false); - - else { - // An old Netdata child does not have a compatible streaming protocol, map to something sane. - if (!strcmp(name, "NETDATA_SYSTEM_OS_NAME")) - name = "NETDATA_HOST_OS_NAME"; - - else if (!strcmp(name, "NETDATA_SYSTEM_OS_ID")) - name = "NETDATA_HOST_OS_ID"; - - else if (!strcmp(name, "NETDATA_SYSTEM_OS_ID_LIKE")) - name = "NETDATA_HOST_OS_ID_LIKE"; - - else if (!strcmp(name, "NETDATA_SYSTEM_OS_VERSION")) - name = "NETDATA_HOST_OS_VERSION"; - - else if (!strcmp(name, "NETDATA_SYSTEM_OS_VERSION_ID")) - name = "NETDATA_HOST_OS_VERSION_ID"; - - else if (!strcmp(name, "NETDATA_SYSTEM_OS_DETECTION")) - name = "NETDATA_HOST_OS_DETECTION"; - - else if(!strcmp(name, "NETDATA_PROTOCOL_VERSION") && (rpt->capabilities & STREAM_CAP_INVALID)) - rpt->capabilities = convert_stream_version_to_capabilities(1, NULL, false); - - if (unlikely(rrdhost_set_system_info_variable(rpt->system_info, name, value))) { - nd_log_daemon(NDLP_NOTICE, "STREAM '%s' [receive from [%s]:%s]: " - "request has parameter '%s' = '%s', which is not used." - , (rpt->hostname && *rpt->hostname) ? rpt->hostname : "-" - , rpt->client_ip, rpt->client_port - , name, value); - } - } - } - - if (rpt->capabilities & STREAM_CAP_INVALID) - // no version is supplied, assume version 0; - rpt->capabilities = convert_stream_version_to_capabilities(0, NULL, false); - - // find the program name and version - if(w->user_agent && w->user_agent[0]) { - char *t = strchr(w->user_agent, '/'); - if(t && *t) { - *t = '\0'; - t++; - } - - rpt->program_name = strdupz(w->user_agent); - if(t && *t) rpt->program_version = strdupz(t); - } - - // check if we should accept this connection - - if(!rpt->key || !*rpt->key) { - rrdpush_receive_log_status( - rpt, "request without an API key, rejecting connection", - RRDPUSH_STATUS_NO_API_KEY, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - if(!rpt->hostname || !*rpt->hostname) { - rrdpush_receive_log_status( - rpt, "request without a hostname, rejecting connection", - RRDPUSH_STATUS_NO_HOSTNAME, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - if(!rpt->registry_hostname) - rpt->registry_hostname = strdupz(rpt->hostname); - - if(!rpt->machine_guid || !*rpt->machine_guid) { - rrdpush_receive_log_status( - rpt, "request without a machine GUID, rejecting connection", - RRDPUSH_STATUS_NO_MACHINE_GUID, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - { - char buf[GUID_LEN + 1]; - - if (regenerate_guid(rpt->key, buf) == -1) { - rrdpush_receive_log_status( - rpt, "API key is not a valid UUID (use the command uuidgen to generate one)", - RRDPUSH_STATUS_INVALID_API_KEY, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - if (regenerate_guid(rpt->machine_guid, buf) == -1) { - rrdpush_receive_log_status( - rpt, "machine GUID is not a valid UUID", - RRDPUSH_STATUS_INVALID_MACHINE_GUID, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - } - - const char *api_key_type = appconfig_get(&stream_config, rpt->key, "type", "api"); - if(!api_key_type || !*api_key_type) api_key_type = "unknown"; - if(strcmp(api_key_type, "api") != 0) { - rrdpush_receive_log_status( - rpt, "API key is a machine GUID", - RRDPUSH_STATUS_INVALID_API_KEY, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - if(!appconfig_get_boolean(&stream_config, rpt->key, "enabled", 0)) { - rrdpush_receive_log_status( - rpt, "API key is not enabled", - RRDPUSH_STATUS_API_KEY_DISABLED, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - { - SIMPLE_PATTERN *key_allow_from = simple_pattern_create( - appconfig_get(&stream_config, rpt->key, "allow from", "*"), - NULL, SIMPLE_PATTERN_EXACT, true); - - if(key_allow_from) { - if(!simple_pattern_matches(key_allow_from, w->client_ip)) { - simple_pattern_free(key_allow_from); - - rrdpush_receive_log_status( - rpt, "API key is not allowed from this IP", - RRDPUSH_STATUS_NOT_ALLOWED_IP, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - simple_pattern_free(key_allow_from); - } - } - - { - const char *machine_guid_type = appconfig_get(&stream_config, rpt->machine_guid, "type", "machine"); - if (!machine_guid_type || !*machine_guid_type) machine_guid_type = "unknown"; - - if (strcmp(machine_guid_type, "machine") != 0) { - rrdpush_receive_log_status( - rpt, "machine GUID is an API key", - RRDPUSH_STATUS_INVALID_MACHINE_GUID, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - } - - if(!appconfig_get_boolean(&stream_config, rpt->machine_guid, "enabled", 1)) { - rrdpush_receive_log_status( - rpt, "machine GUID is not enabled", - RRDPUSH_STATUS_MACHINE_GUID_DISABLED, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - { - SIMPLE_PATTERN *machine_allow_from = simple_pattern_create( - appconfig_get(&stream_config, rpt->machine_guid, "allow from", "*"), - NULL, SIMPLE_PATTERN_EXACT, true); - - if(machine_allow_from) { - if(!simple_pattern_matches(machine_allow_from, w->client_ip)) { - simple_pattern_free(machine_allow_from); - - rrdpush_receive_log_status( - rpt, "machine GUID is not allowed from this IP", - RRDPUSH_STATUS_NOT_ALLOWED_IP, NDLP_WARNING); - - receiver_state_free(rpt); - return rrdpush_receiver_permission_denied(w); - } - - simple_pattern_free(machine_allow_from); - } - } - - if (strcmp(rpt->machine_guid, localhost->machine_guid) == 0) { - - rrdpush_receiver_takeover_web_connection(w, rpt); - - rrdpush_receive_log_status( - rpt, "machine GUID is my own", - RRDPUSH_STATUS_LOCALHOST, NDLP_DEBUG); - - char initial_response[HTTP_HEADER_SIZE + 1]; - snprintfz(initial_response, HTTP_HEADER_SIZE, "%s", START_STREAMING_ERROR_SAME_LOCALHOST); - - if(send_timeout( - &rpt->ssl, - rpt->fd, initial_response, strlen(initial_response), 0, 60) != (ssize_t)strlen(initial_response)) { - - nd_log_daemon(NDLP_ERR, "STREAM '%s' [receive from [%s]:%s]: " - "failed to reply." - , rpt->hostname - , rpt->client_ip, rpt->client_port - ); - } - - receiver_state_free(rpt); - return HTTP_RESP_OK; - } - - if(unlikely(web_client_streaming_rate_t > 0)) { - static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER; - static time_t last_stream_accepted_t = 0; - - time_t now = now_realtime_sec(); - spinlock_lock(&spinlock); - - if(unlikely(last_stream_accepted_t == 0)) - last_stream_accepted_t = now; - - if(now - last_stream_accepted_t < web_client_streaming_rate_t) { - spinlock_unlock(&spinlock); - - char msg[100 + 1]; - snprintfz(msg, sizeof(msg) - 1, - "rate limit, will accept new connection in %ld secs", - (long)(web_client_streaming_rate_t - (now - last_stream_accepted_t))); - - rrdpush_receive_log_status( - rpt, msg, - RRDPUSH_STATUS_RATE_LIMIT, NDLP_NOTICE); - - receiver_state_free(rpt); - return rrdpush_receiver_too_busy_now(w); - } - - last_stream_accepted_t = now; - spinlock_unlock(&spinlock); - } - - /* - * Quick path for rejecting multiple connections. The lock taken is fine-grained - it only protects the receiver - * pointer within the host (if a host exists). This protects against multiple concurrent web requests hitting - * separate threads within the web-server and landing here. The lock guards the thread-shutdown sequence that - * detaches the receiver from the host. If the host is being created (first time-access) then we also use the - * lock to prevent race-hazard (two threads try to create the host concurrently, one wins and the other does a - * lookup to the now-attached structure). - */ - - { - time_t age = 0; - bool receiver_stale = false; - bool receiver_working = false; - - rrd_rdlock(); - RRDHOST *host = rrdhost_find_by_guid(rpt->machine_guid); - if (unlikely(host && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) /* Ignore archived hosts. */ - host = NULL; - - if (host) { - spinlock_lock(&host->receiver_lock); - if (host->receiver) { - age = now_monotonic_sec() - host->receiver->last_msg_t; - - if (age < 30) - receiver_working = true; - else - receiver_stale = true; - } - spinlock_unlock(&host->receiver_lock); - } - rrd_rdunlock(); - - if (receiver_stale && stop_streaming_receiver(host, STREAM_HANDSHAKE_DISCONNECT_STALE_RECEIVER)) { - // we stopped the receiver - // we can proceed with this connection - receiver_stale = false; - - nd_log_daemon(NDLP_NOTICE, "STREAM '%s' [receive from [%s]:%s]: " - "stopped previous stale receiver to accept this one." - , rpt->hostname - , rpt->client_ip, rpt->client_port - ); - } - - if (receiver_working || receiver_stale) { - // another receiver is already connected - // try again later - - char msg[200 + 1]; - snprintfz(msg, sizeof(msg) - 1, - "multiple connections for same host, " - "old connection was last used %ld secs ago%s", - age, receiver_stale ? " (signaled old receiver to stop)" : " (new connection not accepted)"); - - rrdpush_receive_log_status( - rpt, msg, - RRDPUSH_STATUS_ALREADY_CONNECTED, NDLP_DEBUG); - - // Have not set WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET - caller should clean up - buffer_flush(w->response.data); - buffer_strcat(w->response.data, START_STREAMING_ERROR_ALREADY_STREAMING); - receiver_state_free(rpt); - return HTTP_RESP_CONFLICT; - } - } - - rrdpush_receiver_takeover_web_connection(w, rpt); - - char tag[NETDATA_THREAD_TAG_MAX + 1]; - snprintfz(tag, NETDATA_THREAD_TAG_MAX, THREAD_TAG_STREAM_RECEIVER "[%s]", rpt->hostname); - tag[NETDATA_THREAD_TAG_MAX] = '\0'; - - rpt->thread = nd_thread_create(tag, NETDATA_THREAD_OPTION_DEFAULT, rrdpush_receiver_thread, (void *)rpt); - if(!rpt->thread) { - rrdpush_receive_log_status( - rpt, "can't create receiver thread", - RRDPUSH_STATUS_INTERNAL_SERVER_ERROR, NDLP_ERR); - - buffer_flush(w->response.data); - buffer_strcat(w->response.data, "Can't handle this request"); - receiver_state_free(rpt); - return HTTP_RESP_INTERNAL_SERVER_ERROR; - } - - // prevent the caller from closing the streaming socket - return HTTP_RESP_OK; -} - -void rrdpush_reset_destinations_postpone_time(RRDHOST *host) { - uint32_t wait = (host->sender) ? host->sender->reconnect_delay : 5; - time_t now = now_realtime_sec(); - for (struct rrdpush_destinations *d = host->destinations; d; d = d->next) - d->postpone_reconnection_until = now + wait; -} - -static struct { - STREAM_HANDSHAKE err; - const char *str; -} handshake_errors[] = { - { STREAM_HANDSHAKE_OK_V3, "CONNECTED" }, - { STREAM_HANDSHAKE_OK_V2, "CONNECTED" }, - { STREAM_HANDSHAKE_OK_V1, "CONNECTED" }, - { STREAM_HANDSHAKE_NEVER, "" }, - { STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE, "BAD HANDSHAKE" }, - { STREAM_HANDSHAKE_ERROR_LOCALHOST, "LOCALHOST" }, - { STREAM_HANDSHAKE_ERROR_ALREADY_CONNECTED, "ALREADY CONNECTED" }, - { STREAM_HANDSHAKE_ERROR_DENIED, "DENIED" }, - { STREAM_HANDSHAKE_ERROR_SEND_TIMEOUT, "SEND TIMEOUT" }, - { STREAM_HANDSHAKE_ERROR_RECEIVE_TIMEOUT, "RECEIVE TIMEOUT" }, - { STREAM_HANDSHAKE_ERROR_INVALID_CERTIFICATE, "INVALID CERTIFICATE" }, - { STREAM_HANDSHAKE_ERROR_SSL_ERROR, "SSL ERROR" }, - { STREAM_HANDSHAKE_ERROR_CANT_CONNECT, "CANT CONNECT" }, - { STREAM_HANDSHAKE_BUSY_TRY_LATER, "BUSY TRY LATER" }, - { STREAM_HANDSHAKE_INTERNAL_ERROR, "INTERNAL ERROR" }, - { STREAM_HANDSHAKE_INITIALIZATION, "REMOTE IS INITIALIZING" }, - { STREAM_HANDSHAKE_DISCONNECT_HOST_CLEANUP, "DISCONNECTED HOST CLEANUP" }, - { STREAM_HANDSHAKE_DISCONNECT_STALE_RECEIVER, "DISCONNECTED STALE RECEIVER" }, - { STREAM_HANDSHAKE_DISCONNECT_SHUTDOWN, "DISCONNECTED SHUTDOWN REQUESTED" }, - { STREAM_HANDSHAKE_DISCONNECT_NETDATA_EXIT, "DISCONNECTED NETDATA EXIT" }, - { STREAM_HANDSHAKE_DISCONNECT_PARSER_EXIT, "DISCONNECTED PARSE ENDED" }, - {STREAM_HANDSHAKE_DISCONNECT_UNKNOWN_SOCKET_READ_ERROR, "DISCONNECTED UNKNOWN SOCKET READ ERROR" }, - { STREAM_HANDSHAKE_DISCONNECT_PARSER_FAILED, "DISCONNECTED PARSE ERROR" }, - { STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT, "DISCONNECTED RECEIVER LEFT" }, - { STREAM_HANDSHAKE_DISCONNECT_ORPHAN_HOST, "DISCONNECTED ORPHAN HOST" }, - { STREAM_HANDSHAKE_NON_STREAMABLE_HOST, "NON STREAMABLE HOST" }, - { STREAM_HANDSHAKE_DISCONNECT_NOT_SUFFICIENT_READ_BUFFER, "DISCONNECTED NOT SUFFICIENT READ BUFFER" }, - {STREAM_HANDSHAKE_DISCONNECT_SOCKET_EOF, "DISCONNECTED SOCKET EOF" }, - {STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_FAILED, "DISCONNECTED SOCKET READ FAILED" }, - {STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_TIMEOUT, "DISCONNECTED SOCKET READ TIMEOUT" }, - { 0, NULL }, -}; - -const char *stream_handshake_error_to_string(STREAM_HANDSHAKE handshake_error) { - if(handshake_error >= STREAM_HANDSHAKE_OK_V1) - // handshake_error is the whole version / capabilities number - return "CONNECTED"; - - for(size_t i = 0; handshake_errors[i].str ; i++) { - if(handshake_error == handshake_errors[i].err) - return handshake_errors[i].str; - } - - return "UNKNOWN"; -} diff --git a/src/streaming/rrdpush.h b/src/streaming/rrdpush.h index e7bad86a8c3b70..55d0c296c6af4a 100644 --- a/src/streaming/rrdpush.h +++ b/src/streaming/rrdpush.h @@ -3,689 +3,16 @@ #ifndef NETDATA_RRDPUSH_H #define NETDATA_RRDPUSH_H 1 -#include "libnetdata/libnetdata.h" -#include "daemon/common.h" -#include "web/server/web_client.h" -#include "database/rrdfunctions.h" -#include "database/rrd.h" -#include "stream_capabilities.h" +#include "stream-handshake.h" +#include "stream-capabilities.h" +#include "stream-conf.h" +#include "stream-compression/compression.h" -// When a child disconnects this is the maximum we will wait -// before we update the cloud that the child is offline -#define MAX_CHILD_DISC_DELAY (30000) -#define MAX_CHILD_DISC_TOLERANCE (125 / 100) - -#define CONNECTED_TO_SIZE 100 -#define CBUFFER_INITIAL_SIZE (16 * 1024) -#define THREAD_BUFFER_INITIAL_SIZE (CBUFFER_INITIAL_SIZE / 2) - -// ---------------------------------------------------------------------------- -// stream handshake - -#define HTTP_HEADER_SIZE 8192 - -#define STREAMING_PROTOCOL_VERSION "1.1" -#define START_STREAMING_PROMPT_V1 "Hit me baby, push them over..." -#define START_STREAMING_PROMPT_V2 "Hit me baby, push them over and bring the host labels..." -#define START_STREAMING_PROMPT_VN "Hit me baby, push them over with the version=" - -#define START_STREAMING_ERROR_SAME_LOCALHOST "Don't hit me baby, you are trying to stream my localhost back" -#define START_STREAMING_ERROR_ALREADY_STREAMING "This GUID is already streaming to this server" -#define START_STREAMING_ERROR_NOT_PERMITTED "You are not permitted to access this. Check the logs for more info." -#define START_STREAMING_ERROR_BUSY_TRY_LATER "The server is too busy now to accept this request. Try later." -#define START_STREAMING_ERROR_INTERNAL_ERROR "The server encountered an internal error. Try later." -#define START_STREAMING_ERROR_INITIALIZATION "The server is initializing. Try later." - -#define RRDPUSH_STATUS_CONNECTED "CONNECTED" -#define RRDPUSH_STATUS_ALREADY_CONNECTED "ALREADY CONNECTED" -#define RRDPUSH_STATUS_DISCONNECTED "DISCONNECTED" -#define RRDPUSH_STATUS_RATE_LIMIT "RATE LIMIT TRY LATER" -#define RRDPUSH_STATUS_INITIALIZATION_IN_PROGRESS "INITIALIZATION IN PROGRESS RETRY LATER" -#define RRDPUSH_STATUS_INTERNAL_SERVER_ERROR "INTERNAL SERVER ERROR DROPPING CONNECTION" -#define RRDPUSH_STATUS_DUPLICATE_RECEIVER "DUPLICATE RECEIVER DROPPING CONNECTION" -#define RRDPUSH_STATUS_CANT_REPLY "CANT REPLY DROPPING CONNECTION" -#define RRDPUSH_STATUS_NO_HOSTNAME "NO HOSTNAME PERMISSION DENIED" -#define RRDPUSH_STATUS_NO_API_KEY "NO API KEY PERMISSION DENIED" -#define RRDPUSH_STATUS_INVALID_API_KEY "INVALID API KEY PERMISSION DENIED" -#define RRDPUSH_STATUS_NO_MACHINE_GUID "NO MACHINE GUID PERMISSION DENIED" -#define RRDPUSH_STATUS_MACHINE_GUID_DISABLED "MACHINE GUID DISABLED PERMISSION DENIED" -#define RRDPUSH_STATUS_INVALID_MACHINE_GUID "INVALID MACHINE GUID PERMISSION DENIED" -#define RRDPUSH_STATUS_API_KEY_DISABLED "API KEY DISABLED PERMISSION DENIED" -#define RRDPUSH_STATUS_NOT_ALLOWED_IP "NOT ALLOWED IP PERMISSION DENIED" -#define RRDPUSH_STATUS_LOCALHOST "LOCALHOST PERMISSION DENIED" -#define RRDPUSH_STATUS_PERMISSION_DENIED "PERMISSION DENIED" -#define RRDPUSH_STATUS_BAD_HANDSHAKE "BAD HANDSHAKE" -#define RRDPUSH_STATUS_TIMEOUT "TIMEOUT" -#define RRDPUSH_STATUS_CANT_UPGRADE_CONNECTION "CANT UPGRADE CONNECTION" -#define RRDPUSH_STATUS_SSL_ERROR "SSL ERROR" -#define RRDPUSH_STATUS_INVALID_SSL_CERTIFICATE "INVALID SSL CERTIFICATE" -#define RRDPUSH_STATUS_CANT_ESTABLISH_SSL_CONNECTION "CANT ESTABLISH SSL CONNECTION" - -typedef enum { - STREAM_HANDSHAKE_OK_V3 = 3, // v3+ - STREAM_HANDSHAKE_OK_V2 = 2, // v2 - STREAM_HANDSHAKE_OK_V1 = 1, // v1 - STREAM_HANDSHAKE_NEVER = 0, // never tried to connect - STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE = -1, - STREAM_HANDSHAKE_ERROR_LOCALHOST = -2, - STREAM_HANDSHAKE_ERROR_ALREADY_CONNECTED = -3, - STREAM_HANDSHAKE_ERROR_DENIED = -4, - STREAM_HANDSHAKE_ERROR_SEND_TIMEOUT = -5, - STREAM_HANDSHAKE_ERROR_RECEIVE_TIMEOUT = -6, - STREAM_HANDSHAKE_ERROR_INVALID_CERTIFICATE = -7, - STREAM_HANDSHAKE_ERROR_SSL_ERROR = -8, - STREAM_HANDSHAKE_ERROR_CANT_CONNECT = -9, - STREAM_HANDSHAKE_BUSY_TRY_LATER = -10, - STREAM_HANDSHAKE_INTERNAL_ERROR = -11, - STREAM_HANDSHAKE_INITIALIZATION = -12, - STREAM_HANDSHAKE_DISCONNECT_HOST_CLEANUP = -13, - STREAM_HANDSHAKE_DISCONNECT_STALE_RECEIVER = -14, - STREAM_HANDSHAKE_DISCONNECT_SHUTDOWN = -15, - STREAM_HANDSHAKE_DISCONNECT_NETDATA_EXIT = -16, - STREAM_HANDSHAKE_DISCONNECT_PARSER_EXIT = -17, - STREAM_HANDSHAKE_DISCONNECT_UNKNOWN_SOCKET_READ_ERROR = -18, - STREAM_HANDSHAKE_DISCONNECT_PARSER_FAILED = -19, - STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT = -20, - STREAM_HANDSHAKE_DISCONNECT_ORPHAN_HOST = -21, - STREAM_HANDSHAKE_NON_STREAMABLE_HOST = -22, - STREAM_HANDSHAKE_DISCONNECT_NOT_SUFFICIENT_READ_BUFFER = -23, - STREAM_HANDSHAKE_DISCONNECT_SOCKET_EOF = -24, - STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_FAILED = -25, - STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_TIMEOUT = -26, - STREAM_HANDSHAKE_ERROR_HTTP_UPGRADE = -27, - -} STREAM_HANDSHAKE; - - -// ---------------------------------------------------------------------------- - -typedef struct { - char *os_name; - char *os_id; - char *os_version; - char *kernel_name; - char *kernel_version; -} stream_encoded_t; - -#include "compression.h" - -// Thread-local storage -// Metric transmission: collector threads asynchronously fill the buffer, sender thread uses it. - -typedef enum __attribute__((packed)) { - STREAM_TRAFFIC_TYPE_REPLICATION = 0, - STREAM_TRAFFIC_TYPE_FUNCTIONS, - STREAM_TRAFFIC_TYPE_METADATA, - STREAM_TRAFFIC_TYPE_DATA, - STREAM_TRAFFIC_TYPE_DYNCFG, - - // terminator - STREAM_TRAFFIC_TYPE_MAX, -} STREAM_TRAFFIC_TYPE; - -typedef enum __attribute__((packed)) { - SENDER_FLAG_OVERFLOW = (1 << 0), // The buffer has been overflown -} SENDER_FLAGS; - -typedef void (*rrdpush_defer_action_t)(struct sender_state *s, void *data); -typedef void (*rrdpush_defer_cleanup_t)(struct sender_state *s, void *data); - -struct sender_state { - RRDHOST *host; - pid_t tid; // the thread id of the sender, from gettid_cached() - SENDER_FLAGS flags; - int timeout; - int default_port; - uint32_t reconnect_delay; - char connected_to[CONNECTED_TO_SIZE + 1]; // We don't know which proxy we connect to, passed back from socket.c - size_t begin; - size_t reconnects_counter; - size_t sent_bytes; - size_t sent_bytes_on_this_connection; - size_t send_attempts; - time_t last_traffic_seen_t; - time_t last_state_since_t; // the timestamp of the last state (online/offline) change - size_t not_connected_loops; - // Metrics are collected asynchronously by collector threads calling rrdset_done_push(). This can also trigger - // the lazy creation of the sender thread - both cases (buffer access and thread creation) are guarded here. - SPINLOCK spinlock; - struct circular_buffer *buffer; - char read_buffer[PLUGINSD_LINE_MAX + 1]; - ssize_t read_len; - STREAM_CAPABILITIES capabilities; - STREAM_CAPABILITIES disabled_capabilities; - - size_t sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_MAX]; - - int rrdpush_sender_pipe[2]; // collector to sender thread signaling - int rrdpush_sender_socket; - - uint16_t hops; - - struct line_splitter line; - struct compressor_state compressor; - -#ifdef NETDATA_LOG_STREAM_SENDER - FILE *stream_log_fp; -#endif - - NETDATA_SSL ssl; // structure used to encrypt the connection - - struct { - bool shutdown; - STREAM_HANDSHAKE reason; - } exit; - - struct { - DICTIONARY *requests; // de-duplication of replication requests, per chart - time_t oldest_request_after_t; // the timestamp of the oldest replication request - time_t latest_completed_before_t; // the timestamp of the latest replication request - - struct { - size_t pending_requests; // the currently outstanding replication requests - size_t charts_replicating; // the number of unique charts having pending replication requests (on every request one is added and is removed when we finish it - it does not track completion of the replication for this chart) - bool reached_max; // true when the sender buffer should not get more replication responses - } atomic; - - } replication; - - struct { - bool pending_data; - size_t buffer_used_percentage; // the current utilization of the sending buffer - usec_t last_flush_time_ut; // the last time the sender flushed the sending buffer in USEC - time_t last_buffer_recreate_s; // true when the sender buffer should be re-created - } atomic; - - struct { - const char *end_keyword; - BUFFER *payload; - rrdpush_defer_action_t action; - rrdpush_defer_cleanup_t cleanup; - void *action_data; - } defer; - - int parent_using_h2o; -}; - -#define sender_lock(sender) spinlock_lock(&(sender)->spinlock) -#define sender_unlock(sender) spinlock_unlock(&(sender)->spinlock) - -#define rrdpush_sender_pipe_has_pending_data(sender) __atomic_load_n(&(sender)->atomic.pending_data, __ATOMIC_RELAXED) -#define rrdpush_sender_pipe_set_pending_data(sender) __atomic_store_n(&(sender)->atomic.pending_data, true, __ATOMIC_RELAXED) -#define rrdpush_sender_pipe_clear_pending_data(sender) __atomic_store_n(&(sender)->atomic.pending_data, false, __ATOMIC_RELAXED) - -#define rrdpush_sender_last_buffer_recreate_get(sender) __atomic_load_n(&(sender)->atomic.last_buffer_recreate_s, __ATOMIC_RELAXED) -#define rrdpush_sender_last_buffer_recreate_set(sender, value) __atomic_store_n(&(sender)->atomic.last_buffer_recreate_s, value, __ATOMIC_RELAXED) - -#define rrdpush_sender_replication_buffer_full_set(sender, value) __atomic_store_n(&((sender)->replication.atomic.reached_max), value, __ATOMIC_SEQ_CST) -#define rrdpush_sender_replication_buffer_full_get(sender) __atomic_load_n(&((sender)->replication.atomic.reached_max), __ATOMIC_SEQ_CST) - -#define rrdpush_sender_set_buffer_used_percent(sender, value) __atomic_store_n(&((sender)->atomic.buffer_used_percentage), value, __ATOMIC_RELAXED) -#define rrdpush_sender_get_buffer_used_percent(sender) __atomic_load_n(&((sender)->atomic.buffer_used_percentage), __ATOMIC_RELAXED) - -#define rrdpush_sender_set_flush_time(sender) __atomic_store_n(&((sender)->atomic.last_flush_time_ut), now_realtime_usec(), __ATOMIC_RELAXED) -#define rrdpush_sender_get_flush_time(sender) __atomic_load_n(&((sender)->atomic.last_flush_time_ut), __ATOMIC_RELAXED) - -#define rrdpush_sender_replicating_charts(sender) __atomic_load_n(&((sender)->replication.atomic.charts_replicating), __ATOMIC_RELAXED) -#define rrdpush_sender_replicating_charts_plus_one(sender) __atomic_add_fetch(&((sender)->replication.atomic.charts_replicating), 1, __ATOMIC_RELAXED) -#define rrdpush_sender_replicating_charts_minus_one(sender) __atomic_sub_fetch(&((sender)->replication.atomic.charts_replicating), 1, __ATOMIC_RELAXED) -#define rrdpush_sender_replicating_charts_zero(sender) __atomic_store_n(&((sender)->replication.atomic.charts_replicating), 0, __ATOMIC_RELAXED) - -#define rrdpush_sender_pending_replication_requests(sender) __atomic_load_n(&((sender)->replication.atomic.pending_requests), __ATOMIC_RELAXED) -#define rrdpush_sender_pending_replication_requests_plus_one(sender) __atomic_add_fetch(&((sender)->replication.atomic.pending_requests), 1, __ATOMIC_RELAXED) -#define rrdpush_sender_pending_replication_requests_minus_one(sender) __atomic_sub_fetch(&((sender)->replication.atomic.pending_requests), 1, __ATOMIC_RELAXED) -#define rrdpush_sender_pending_replication_requests_zero(sender) __atomic_store_n(&((sender)->replication.atomic.pending_requests), 0, __ATOMIC_RELAXED) - -/* -typedef enum { - STREAM_NODE_INSTANCE_FEATURE_CLOUD_ONLINE = (1 << 0), - STREAM_NODE_INSTANCE_FEATURE_VIRTUAL_HOST = (1 << 1), - STREAM_NODE_INSTANCE_FEATURE_HEALTH_ENABLED = (1 << 2), - STREAM_NODE_INSTANCE_FEATURE_ML_SELF = (1 << 3), - STREAM_NODE_INSTANCE_FEATURE_ML_RECEIVED = (1 << 4), - STREAM_NODE_INSTANCE_FEATURE_SSL = (1 << 5), -} STREAM_NODE_INSTANCE_FEATURES; - -typedef struct stream_node_instance { - uuid_t uuid; - STRING *agent; - STREAM_NODE_INSTANCE_FEATURES features; - uint32_t hops; - - // receiver information on that agent - int32_t capabilities; - uint32_t local_port; - uint32_t remote_port; - STRING *local_ip; - STRING *remote_ip; -} STREAM_NODE_INSTANCE; -*/ - -struct parser; - -struct receiver_state { - RRDHOST *host; - pid_t tid; - ND_THREAD *thread; - int fd; - char *key; - char *hostname; - char *registry_hostname; - char *machine_guid; - char *os; - char *timezone; // Unused? - char *abbrev_timezone; - int32_t utc_offset; - char *client_ip; // Duplicated in pluginsd - char *client_port; // Duplicated in pluginsd - char *program_name; // Duplicated in pluginsd - char *program_version; - struct rrdhost_system_info *system_info; - STREAM_CAPABILITIES capabilities; - time_t last_msg_t; - time_t connected_since_s; - - struct buffered_reader reader; - - uint16_t hops; - - struct { - bool shutdown; // signal the streaming parser to exit - STREAM_HANDSHAKE reason; - } exit; - - struct { - RRD_MEMORY_MODE mode; - int history; - int update_every; - int health_enabled; // CONFIG_BOOLEAN_YES, CONFIG_BOOLEAN_NO, CONFIG_BOOLEAN_AUTO - time_t alarms_delay; - uint32_t alarms_history; - int rrdpush_enabled; - const char *rrdpush_api_key; // DONT FREE - it is allocated in appconfig - const char *rrdpush_send_charts_matching; // DONT FREE - it is allocated in appconfig - bool rrdpush_enable_replication; - time_t rrdpush_seconds_to_replicate; - time_t rrdpush_replication_step; - const char *rrdpush_destination; // DONT FREE - it is allocated in appconfig - unsigned int rrdpush_compression; - STREAM_CAPABILITIES compression_priorities[COMPRESSION_ALGORITHM_MAX]; - } config; - - NETDATA_SSL ssl; - - time_t replication_first_time_t; - - struct decompressor_state decompressor; -/* - struct { - uint32_t count; - STREAM_NODE_INSTANCE *array; - } instances; -*/ - - // The parser pointer is safe to read and use, only when having the host receiver lock. - // Without this lock, the data pointed by the pointer may vanish randomly. - // Also, since the receiver sets it when it starts, it should be read with - // an atomic read. - struct parser *parser; - -#ifdef ENABLE_H2O - void *h2o_ctx; -#endif -}; - -#ifdef ENABLE_H2O -#define is_h2o_rrdpush(x) ((x)->h2o_ctx != NULL) -#define unless_h2o_rrdpush(x) if(!is_h2o_rrdpush(x)) -#endif - -struct rrdpush_destinations { - STRING *destination; - bool ssl; - uint32_t attempts; - time_t since; - time_t postpone_reconnection_until; - STREAM_HANDSHAKE reason; - - struct rrdpush_destinations *prev; - struct rrdpush_destinations *next; -}; - -extern unsigned int default_rrdpush_enabled; -extern unsigned int default_rrdpush_compression_enabled; -extern const char *default_rrdpush_destination; -extern const char *default_rrdpush_api_key; -extern const char *default_rrdpush_send_charts_matching; -extern bool default_rrdpush_enable_replication; -extern time_t default_rrdpush_seconds_to_replicate; -extern time_t default_rrdpush_replication_step; -extern unsigned int remote_clock_resync_iterations; - -void rrdpush_destinations_init(RRDHOST *host); -void rrdpush_destinations_free(RRDHOST *host); - -BUFFER *sender_start(struct sender_state *s); -void sender_commit(struct sender_state *s, BUFFER *wb, STREAM_TRAFFIC_TYPE type); -int rrdpush_init(); -bool rrdpush_receiver_needs_dbengine(); -int configured_as_parent(); - -typedef struct rrdset_stream_buffer { - STREAM_CAPABILITIES capabilities; - bool v2; - bool begin_v2_added; - time_t wall_clock_time; - uint64_t rrdset_flags; // RRDSET_FLAGS - time_t last_point_end_time_s; - BUFFER *wb; -} RRDSET_STREAM_BUFFER; - -RRDSET_STREAM_BUFFER rrdset_push_metric_initialize(RRDSET *st, time_t wall_clock_time); -void rrdset_push_metrics_v1(RRDSET_STREAM_BUFFER *rsb, RRDSET *st); -void rrdset_push_metrics_finished(RRDSET_STREAM_BUFFER *rsb, RRDSET *st); -void rrddim_push_metrics_v2(RRDSET_STREAM_BUFFER *rsb, RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags); - -bool rrdset_push_chart_definition_now(RRDSET *st); -void *rrdpush_sender_thread(void *ptr); -void rrdpush_send_host_labels(RRDHOST *host); -void rrdpush_send_global_functions(RRDHOST *host); - -int rrdpush_receiver_thread_spawn(struct web_client *w, char *decoded_query_string, void *h2o_ctx); -void rrdpush_sender_thread_stop(RRDHOST *host, STREAM_HANDSHAKE reason, bool wait); - -void rrdpush_sender_send_this_host_variable_now(RRDHOST *host, const RRDVAR_ACQUIRED *rva); -int connect_to_one_of_destinations( - RRDHOST *host, - int default_port, - struct timeval *timeout, - size_t *reconnects_counter, - char *connected_to, - size_t connected_to_size, - struct rrdpush_destinations **destination); - -void rrdpush_signal_sender_to_wake_up(struct sender_state *s); - -void rrdpush_reset_destinations_postpone_time(RRDHOST *host); -const char *stream_handshake_error_to_string(STREAM_HANDSHAKE handshake_error); -void rrdpush_receive_log_status(struct receiver_state *rpt, const char *msg, const char *status, ND_LOG_FIELD_PRIORITY priority); - -void receiver_state_free(struct receiver_state *rpt); -bool stop_streaming_receiver(RRDHOST *host, STREAM_HANDSHAKE reason); - -void sender_thread_buffer_free(void); - -#include "replication.h" - -typedef enum __attribute__((packed)) { - RRDHOST_DB_STATUS_INITIALIZING = 0, - RRDHOST_DB_STATUS_QUERYABLE, -} RRDHOST_DB_STATUS; - -static inline const char *rrdhost_db_status_to_string(RRDHOST_DB_STATUS status) { - switch(status) { - default: - case RRDHOST_DB_STATUS_INITIALIZING: - return "initializing"; - - case RRDHOST_DB_STATUS_QUERYABLE: - return "online"; - } -} - -typedef enum __attribute__((packed)) { - RRDHOST_DB_LIVENESS_STALE = 0, - RRDHOST_DB_LIVENESS_LIVE, -} RRDHOST_DB_LIVENESS; - -static inline const char *rrdhost_db_liveness_to_string(RRDHOST_DB_LIVENESS status) { - switch(status) { - default: - case RRDHOST_DB_LIVENESS_STALE: - return "stale"; - - case RRDHOST_DB_LIVENESS_LIVE: - return "live"; - } -} - -typedef enum __attribute__((packed)) { - RRDHOST_INGEST_STATUS_ARCHIVED = 0, - RRDHOST_INGEST_STATUS_INITIALIZING, - RRDHOST_INGEST_STATUS_REPLICATING, - RRDHOST_INGEST_STATUS_ONLINE, - RRDHOST_INGEST_STATUS_OFFLINE, -} RRDHOST_INGEST_STATUS; - -static inline const char *rrdhost_ingest_status_to_string(RRDHOST_INGEST_STATUS status) { - switch(status) { - case RRDHOST_INGEST_STATUS_ARCHIVED: - return "archived"; - - case RRDHOST_INGEST_STATUS_INITIALIZING: - return "initializing"; - - case RRDHOST_INGEST_STATUS_REPLICATING: - return "replicating"; - - case RRDHOST_INGEST_STATUS_ONLINE: - return "online"; - - default: - case RRDHOST_INGEST_STATUS_OFFLINE: - return "offline"; - } -} - -typedef enum __attribute__((packed)) { - RRDHOST_INGEST_TYPE_LOCALHOST = 0, - RRDHOST_INGEST_TYPE_VIRTUAL, - RRDHOST_INGEST_TYPE_CHILD, - RRDHOST_INGEST_TYPE_ARCHIVED, -} RRDHOST_INGEST_TYPE; - -static inline const char *rrdhost_ingest_type_to_string(RRDHOST_INGEST_TYPE type) { - switch(type) { - case RRDHOST_INGEST_TYPE_LOCALHOST: - return "localhost"; - - case RRDHOST_INGEST_TYPE_VIRTUAL: - return "virtual"; - - case RRDHOST_INGEST_TYPE_CHILD: - return "child"; - - default: - case RRDHOST_INGEST_TYPE_ARCHIVED: - return "archived"; - } -} - -typedef enum __attribute__((packed)) { - RRDHOST_STREAM_STATUS_DISABLED = 0, - RRDHOST_STREAM_STATUS_REPLICATING, - RRDHOST_STREAM_STATUS_ONLINE, - RRDHOST_STREAM_STATUS_OFFLINE, -} RRDHOST_STREAMING_STATUS; - -static inline const char *rrdhost_streaming_status_to_string(RRDHOST_STREAMING_STATUS status) { - switch(status) { - case RRDHOST_STREAM_STATUS_DISABLED: - return "disabled"; - - case RRDHOST_STREAM_STATUS_REPLICATING: - return "replicating"; - - case RRDHOST_STREAM_STATUS_ONLINE: - return "online"; - - default: - case RRDHOST_STREAM_STATUS_OFFLINE: - return "offline"; - } -} - -typedef enum __attribute__((packed)) { - RRDHOST_ML_STATUS_DISABLED = 0, - RRDHOST_ML_STATUS_OFFLINE, - RRDHOST_ML_STATUS_RUNNING, -} RRDHOST_ML_STATUS; - -static inline const char *rrdhost_ml_status_to_string(RRDHOST_ML_STATUS status) { - switch(status) { - case RRDHOST_ML_STATUS_RUNNING: - return "online"; - - case RRDHOST_ML_STATUS_OFFLINE: - return "offline"; - - default: - case RRDHOST_ML_STATUS_DISABLED: - return "disabled"; - } -} - -typedef enum __attribute__((packed)) { - RRDHOST_ML_TYPE_DISABLED = 0, - RRDHOST_ML_TYPE_SELF, - RRDHOST_ML_TYPE_RECEIVED, -} RRDHOST_ML_TYPE; - -static inline const char *rrdhost_ml_type_to_string(RRDHOST_ML_TYPE type) { - switch(type) { - case RRDHOST_ML_TYPE_SELF: - return "self"; - - case RRDHOST_ML_TYPE_RECEIVED: - return "received"; - - default: - case RRDHOST_ML_TYPE_DISABLED: - return "disabled"; - } -} - -typedef enum __attribute__((packed)) { - RRDHOST_HEALTH_STATUS_DISABLED = 0, - RRDHOST_HEALTH_STATUS_INITIALIZING, - RRDHOST_HEALTH_STATUS_RUNNING, -} RRDHOST_HEALTH_STATUS; - -static inline const char *rrdhost_health_status_to_string(RRDHOST_HEALTH_STATUS status) { - switch(status) { - default: - case RRDHOST_HEALTH_STATUS_DISABLED: - return "disabled"; - - case RRDHOST_HEALTH_STATUS_INITIALIZING: - return "initializing"; - - case RRDHOST_HEALTH_STATUS_RUNNING: - return "online"; - } -} - -typedef enum __attribute__((packed)) { - RRDHOST_DYNCFG_STATUS_UNAVAILABLE = 0, - RRDHOST_DYNCFG_STATUS_AVAILABLE, -} RRDHOST_DYNCFG_STATUS; - -static inline const char *rrdhost_dyncfg_status_to_string(RRDHOST_DYNCFG_STATUS status) { - switch(status) { - default: - case RRDHOST_DYNCFG_STATUS_UNAVAILABLE: - return "unavailable"; - - case RRDHOST_DYNCFG_STATUS_AVAILABLE: - return "online"; - } -} - -typedef struct { - RRDHOST *host; - time_t now; - - struct { - RRDHOST_DYNCFG_STATUS status; - } dyncfg; - - struct { - RRDHOST_DB_STATUS status; - RRDHOST_DB_LIVENESS liveness; - RRD_MEMORY_MODE mode; - time_t first_time_s; - time_t last_time_s; - size_t metrics; - size_t instances; - size_t contexts; - } db; - - struct { - RRDHOST_ML_STATUS status; - RRDHOST_ML_TYPE type; - struct ml_metrics_statistics metrics; - } ml; - - struct { - size_t hops; - RRDHOST_INGEST_TYPE type; - RRDHOST_INGEST_STATUS status; - SOCKET_PEERS peers; - bool ssl; - STREAM_CAPABILITIES capabilities; - uint32_t id; - time_t since; - STREAM_HANDSHAKE reason; - - struct { - bool in_progress; - NETDATA_DOUBLE completion; - size_t instances; - } replication; - } ingest; - - struct { - size_t hops; - RRDHOST_STREAMING_STATUS status; - SOCKET_PEERS peers; - bool ssl; - bool compression; - STREAM_CAPABILITIES capabilities; - uint32_t id; - time_t since; - STREAM_HANDSHAKE reason; - - struct { - bool in_progress; - NETDATA_DOUBLE completion; - size_t instances; - } replication; - - size_t sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_MAX]; - } stream; - - struct { - RRDHOST_HEALTH_STATUS status; - struct { - uint32_t undefined; - uint32_t uninitialized; - uint32_t clear; - uint32_t warning; - uint32_t critical; - } alerts; - } health; -} RRDHOST_STATUS; - -void rrdhost_status(RRDHOST *host, time_t now, RRDHOST_STATUS *s); -bool rrdhost_state_cloud_emulation(RRDHOST *host); - -bool rrdpush_compression_initialize(struct sender_state *s); -bool rrdpush_decompression_initialize(struct receiver_state *rpt); -void rrdpush_parse_compression_order(struct receiver_state *rpt, const char *order); -void rrdpush_select_receiver_compression_algorithm(struct receiver_state *rpt); -void rrdpush_compression_deactivate(struct sender_state *s); +#include "sender.h" +#include "receiver.h" +#include "rrdhost-status.h" #include "protocol/commands.h" -#include "stream_path.h" +#include "stream-path.h" #endif //NETDATA_RRDPUSH_H diff --git a/src/streaming/sender_commit.c b/src/streaming/sender-commit.c similarity index 99% rename from src/streaming/sender_commit.c rename to src/streaming/sender-commit.c index 3219569b03d57a..6ff7cb2ba766ed 100644 --- a/src/streaming/sender_commit.c +++ b/src/streaming/sender-commit.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "sender_internals.h" +#include "sender-internals.h" static __thread BUFFER *sender_thread_buffer = NULL; static __thread bool sender_thread_buffer_used = false; diff --git a/src/streaming/sender_connect.c b/src/streaming/sender-connect.c similarity index 92% rename from src/streaming/sender_connect.c rename to src/streaming/sender-connect.c index 1085cd9013138e..ac5f392a0432f3 100644 --- a/src/streaming/sender_connect.c +++ b/src/streaming/sender-connect.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "sender_internals.h" +#include "sender-internals.h" void rrdpush_sender_thread_close_socket(struct sender_state *s) { rrdhost_flag_clear(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED | RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); @@ -387,7 +387,7 @@ static int rrdpush_http_upgrade_prelude(RRDHOST *host, struct sender_state *s) { return 1; } -static bool rrdpush_sender_thread_connect_to_parent(RRDHOST *host, int default_port, int timeout, struct sender_state *s) { +static bool sender_send_connection_request(RRDHOST *host, int default_port, int timeout, struct sender_state *s) { struct timeval tv = { .tv_sec = timeout, @@ -638,7 +638,7 @@ bool attempt_to_connect(struct sender_state *state) { state->sent_bytes_on_this_connection = 0; memset(state->sent_bytes_on_this_connection_per_type, 0, sizeof(state->sent_bytes_on_this_connection_per_type)); - if(rrdpush_sender_thread_connect_to_parent(state->host, state->default_port, state->timeout, state)) { + if(sender_send_connection_request(state->host, state->default_port, state->timeout, state)) { // reset the buffer, to properly send charts and metrics rrdpush_sender_on_connect(state->host); @@ -674,3 +674,68 @@ bool attempt_to_connect(struct sender_state *state) { return false; } + +bool rrdpush_sender_connect(struct sender_state *s) { + worker_is_busy(WORKER_SENDER_JOB_CONNECT); + + time_t now_s = now_monotonic_sec(); + rrdpush_sender_cbuffer_recreate_timed(s, now_s, false, true); + rrdpush_sender_execute_commands_cleanup(s); + + rrdhost_flag_clear(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); + s->flags &= ~SENDER_FLAG_OVERFLOW; + s->read_len = 0; + s->buffer->read = 0; + s->buffer->write = 0; + + if(!attempt_to_connect(s)) + return false; + + if(rrdhost_sender_should_exit(s)) + return false; + + s->last_traffic_seen_t = now_monotonic_sec(); + stream_path_send_to_parent(s->host); + rrdpush_sender_send_claimed_id(s->host); + rrdpush_send_host_labels(s->host); + rrdpush_send_global_functions(s->host); + s->replication.oldest_request_after_t = 0; + + rrdhost_flag_set(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "STREAM %s [send to %s]: enabling metrics streaming...", + rrdhost_hostname(s->host), s->connected_to); + + return true; +} + +// Either the receiver lost the connection or the host is being destroyed. +// The sender mutex guards thread creation, any spurious data is wiped on reconnection. +void rrdpush_sender_thread_stop(RRDHOST *host, STREAM_HANDSHAKE reason, bool wait) { + if (!host->sender) + return; + + sender_lock(host->sender); + + if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN)) { + + host->sender->exit.shutdown = true; + host->sender->exit.reason = reason; + + // signal it to cancel + nd_thread_signal_cancel(host->rrdpush_sender_thread); + } + + sender_unlock(host->sender); + + if(wait) { + sender_lock(host->sender); + while(host->sender->tid) { + sender_unlock(host->sender); + sleep_usec(10 * USEC_PER_MS); + sender_lock(host->sender); + } + sender_unlock(host->sender); + } +} diff --git a/src/streaming/sender-destinations.c b/src/streaming/sender-destinations.c new file mode 100644 index 00000000000000..5e67ca0397f4d6 --- /dev/null +++ b/src/streaming/sender-destinations.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sender-internals.h" + +void rrdpush_reset_destinations_postpone_time(RRDHOST *host) { + uint32_t wait = (host->sender) ? host->sender->reconnect_delay : 5; + time_t now = now_realtime_sec(); + for (struct rrdpush_destinations *d = host->destinations; d; d = d->next) + d->postpone_reconnection_until = now + wait; +} + +void rrdpush_sender_ssl_init(RRDHOST *host) { + static SPINLOCK sp = NETDATA_SPINLOCK_INITIALIZER; + spinlock_lock(&sp); + + if(netdata_ssl_streaming_sender_ctx || !host) { + spinlock_unlock(&sp); + return; + } + + for(struct rrdpush_destinations *d = host->destinations; d ; d = d->next) { + if (d->ssl) { + // we need to initialize SSL + + netdata_ssl_initialize_ctx(NETDATA_SSL_STREAMING_SENDER_CTX); + ssl_security_location_for_context(netdata_ssl_streaming_sender_ctx, stream_conf_ssl_ca_file, stream_conf_ssl_ca_path); + + // stop the loop + break; + } + } + + spinlock_unlock(&sp); +} + +int connect_to_one_of_destinations( + RRDHOST *host, + int default_port, + struct timeval *timeout, + size_t *reconnects_counter, + char *connected_to, + size_t connected_to_size, + struct rrdpush_destinations **destination) +{ + int sock = -1; + + for (struct rrdpush_destinations *d = host->destinations; d; d = d->next) { + time_t now = now_realtime_sec(); + + if(nd_thread_signaled_to_cancel()) + return -1; + + if(d->postpone_reconnection_until > now) + continue; + + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "STREAM %s: connecting to '%s' (default port: %d)...", + rrdhost_hostname(host), string2str(d->destination), default_port); + + if (reconnects_counter) + *reconnects_counter += 1; + + d->since = now; + d->attempts++; + sock = connect_to_this(string2str(d->destination), default_port, timeout); + + if (sock != -1) { + if (connected_to && connected_to_size) + strncpyz(connected_to, string2str(d->destination), connected_to_size); + + *destination = d; + + // move the current item to the end of the list + // without this, this destination will break the loop again and again + // not advancing the destinations to find one that may work + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(host->destinations, d, prev, next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(host->destinations, d, prev, next); + + break; + } + } + + return sock; +} + +struct destinations_init_tmp { + RRDHOST *host; + struct rrdpush_destinations *list; + int count; +}; + +static bool destinations_init_add_one(char *entry, void *data) { + struct destinations_init_tmp *t = data; + + struct rrdpush_destinations *d = callocz(1, sizeof(struct rrdpush_destinations)); + char *colon_ssl = strstr(entry, ":SSL"); + if(colon_ssl) { + *colon_ssl = '\0'; + d->ssl = true; + } + else + d->ssl = false; + + d->destination = string_strdupz(entry); + + __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(struct rrdpush_destinations), __ATOMIC_RELAXED); + + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(t->list, d, prev, next); + + t->count++; + nd_log_daemon(NDLP_INFO, "STREAM: added streaming destination No %d: '%s' to host '%s'", t->count, string2str(d->destination), rrdhost_hostname(t->host)); + + return false; // we return false, so that we will get all defined destinations +} + +void rrdpush_destinations_init(RRDHOST *host) { + if(!host->rrdpush.send.destination) return; + + rrdpush_destinations_free(host); + + struct destinations_init_tmp t = { + .host = host, + .list = NULL, + .count = 0, + }; + + foreach_entry_in_connection_string(host->rrdpush.send.destination, destinations_init_add_one, &t); + + host->destinations = t.list; +} + +void rrdpush_destinations_free(RRDHOST *host) { + while (host->destinations) { + struct rrdpush_destinations *tmp = host->destinations; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(host->destinations, tmp, prev, next); + string_freez(tmp->destination); + freez(tmp); + __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(struct rrdpush_destinations), __ATOMIC_RELAXED); + } + + host->destinations = NULL; +} + diff --git a/src/streaming/sender-destinations.h b/src/streaming/sender-destinations.h new file mode 100644 index 00000000000000..e7c72cef7b3dde --- /dev/null +++ b/src/streaming/sender-destinations.h @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SENDER_DESTINATIONS_H +#define NETDATA_SENDER_DESTINATIONS_H + +#include "libnetdata/libnetdata.h" +#include "stream-handshake.h" +#include "database/rrd.h" + +struct rrdpush_destinations { + STRING *destination; + bool ssl; + uint32_t attempts; + time_t since; + time_t postpone_reconnection_until; + STREAM_HANDSHAKE reason; + + struct rrdpush_destinations *prev; + struct rrdpush_destinations *next; +}; + +void rrdpush_sender_ssl_init(RRDHOST *host); + +void rrdpush_reset_destinations_postpone_time(RRDHOST *host); + +void rrdpush_destinations_init(RRDHOST *host); +void rrdpush_destinations_free(RRDHOST *host); + +int connect_to_one_of_destinations( + RRDHOST *host, + int default_port, + struct timeval *timeout, + size_t *reconnects_counter, + char *connected_to, + size_t connected_to_size, + struct rrdpush_destinations **destination); + +#endif //NETDATA_SENDER_DESTINATIONS_H diff --git a/src/streaming/sender_execute.c b/src/streaming/sender-execute.c similarity index 99% rename from src/streaming/sender_execute.c rename to src/streaming/sender-execute.c index 158569cd2b30e6..e180710e90b891 100644 --- a/src/streaming/sender_execute.c +++ b/src/streaming/sender-execute.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "sender_internals.h" +#include "sender-internals.h" struct inflight_stream_function { struct sender_state *sender; diff --git a/src/streaming/sender_internals.h b/src/streaming/sender-internals.h similarity index 94% rename from src/streaming/sender_internals.h rename to src/streaming/sender-internals.h index 0db4ac6985b2ae..574369afa9c28b 100644 --- a/src/streaming/sender_internals.h +++ b/src/streaming/sender-internals.h @@ -4,7 +4,7 @@ #define NETDATA_SENDER_INTERNALS_H #include "rrdpush.h" -#include "common.h" +#include "h2o-common.h" #include "aclk/https_client.h" #define WORKER_SENDER_JOB_CONNECT 0 @@ -37,10 +37,6 @@ #error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 25 #endif -extern struct config stream_config; -extern const char *netdata_ssl_ca_path; -extern const char *netdata_ssl_ca_file; - bool attempt_to_connect(struct sender_state *state); void rrdpush_sender_on_connect(RRDHOST *host); void rrdpush_sender_after_connect(RRDHOST *host); diff --git a/src/streaming/sender.c b/src/streaming/sender.c index 6b33ccd8788410..666409b1c7fcce 100644 --- a/src/streaming/sender.c +++ b/src/streaming/sender.c @@ -1,54 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "sender_internals.h" - -static inline void rrdpush_sender_add_host_variable_to_buffer(BUFFER *wb, const RRDVAR_ACQUIRED *rva) { - buffer_sprintf( - wb - , "VARIABLE HOST %s = " NETDATA_DOUBLE_FORMAT "\n" - , rrdvar_name(rva) - , rrdvar2number(rva) - ); - - netdata_log_debug(D_STREAM, "RRDVAR pushed HOST VARIABLE %s = " NETDATA_DOUBLE_FORMAT, rrdvar_name(rva), rrdvar2number(rva)); -} - -void rrdpush_sender_send_this_host_variable_now(RRDHOST *host, const RRDVAR_ACQUIRED *rva) { - if(rrdhost_can_send_definitions_to_parent(host)) { - BUFFER *wb = sender_start(host->sender); - rrdpush_sender_add_host_variable_to_buffer(wb, rva); - sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA); - sender_thread_buffer_free(); - } -} - -struct custom_host_variables_callback { - BUFFER *wb; -}; - -static int rrdpush_sender_thread_custom_host_variables_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdvar_ptr __maybe_unused, void *struct_ptr) { - const RRDVAR_ACQUIRED *rv = (const RRDVAR_ACQUIRED *)item; - struct custom_host_variables_callback *tmp = struct_ptr; - BUFFER *wb = tmp->wb; - - rrdpush_sender_add_host_variable_to_buffer(wb, rv); - return 1; -} - -static void rrdpush_sender_thread_send_custom_host_variables(RRDHOST *host) { - if(rrdhost_can_send_definitions_to_parent(host)) { - BUFFER *wb = sender_start(host->sender); - struct custom_host_variables_callback tmp = { - .wb = wb - }; - int ret = rrdvar_walkthrough_read(host->rrdvars, rrdpush_sender_thread_custom_host_variables_callback, &tmp); - (void)ret; - sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA); - sender_thread_buffer_free(); - - netdata_log_debug(D_STREAM, "RRDVAR sent %d VARIABLES", ret); - } -} +#include "sender-internals.h" // resets all the chart, so that their definitions // will be resent to the central netdata @@ -72,7 +24,7 @@ static void rrdpush_sender_thread_reset_all_charts(RRDHOST *host) { rrdhost_sender_replicating_charts_zero(host); } -static void rrdpush_sender_cbuffer_recreate_timed(struct sender_state *s, time_t now_s, bool have_mutex, bool force) { +void rrdpush_sender_cbuffer_recreate_timed(struct sender_state *s, time_t now_s, bool have_mutex, bool force) { static __thread time_t last_reset_time_s = 0; if(!force && now_s - last_reset_time_s < 300) @@ -307,7 +259,7 @@ static void rrdhost_clear_sender___while_having_sender_mutex(RRDHOST *host) { rrdpush_reset_destinations_postpone_time(host); } -static bool rrdhost_sender_should_exit(struct sender_state *s) { +bool rrdhost_sender_should_exit(struct sender_state *s) { if(unlikely(nd_thread_signaled_to_cancel())) { if(!s->exit.reason) s->exit.reason = STREAM_HANDSHAKE_DISCONNECT_SHUTDOWN; @@ -341,30 +293,6 @@ static bool rrdhost_sender_should_exit(struct sender_state *s) { return false; } -void rrdpush_initialize_ssl_ctx(RRDHOST *host __maybe_unused) { - static SPINLOCK sp = NETDATA_SPINLOCK_INITIALIZER; - spinlock_lock(&sp); - - if(netdata_ssl_streaming_sender_ctx || !host) { - spinlock_unlock(&sp); - return; - } - - for(struct rrdpush_destinations *d = host->destinations; d ; d = d->next) { - if (d->ssl) { - // we need to initialize SSL - - netdata_ssl_initialize_ctx(NETDATA_SSL_STREAMING_SENDER_CTX); - ssl_security_location_for_context(netdata_ssl_streaming_sender_ctx, netdata_ssl_ca_file, netdata_ssl_ca_path); - - // stop the loop - break; - } - } - - spinlock_unlock(&sp); -} - static bool stream_sender_log_capabilities(BUFFER *wb, void *ptr) { struct sender_state *state = ptr; if(!state) @@ -461,7 +389,7 @@ void *rrdpush_sender_thread(void *ptr) { return NULL; } - rrdpush_initialize_ssl_ctx(s->host); + rrdpush_sender_ssl_init(s->host); netdata_log_info("STREAM %s [send]: thread created (task id %d)", rrdhost_hostname(s->host), gettid_cached()); @@ -477,10 +405,10 @@ void *rrdpush_sender_thread(void *ptr) { s->reconnect_delay = (unsigned int)appconfig_get_duration_seconds( &stream_config, CONFIG_SECTION_STREAM, "reconnect delay", 5); - remote_clock_resync_iterations = (unsigned int)appconfig_get_number( + stream_conf_initial_clock_resync_iterations = (unsigned int)appconfig_get_number( &stream_config, CONFIG_SECTION_STREAM, "initial clock resync iterations", - remote_clock_resync_iterations); // TODO: REMOVE FOR SLEW / GAPFILLING + stream_conf_initial_clock_resync_iterations); // TODO: REMOVE FOR SLEW / GAPFILLING s->parent_using_h2o = appconfig_get_boolean( &stream_config, CONFIG_SECTION_STREAM, "parent using h2o", false); @@ -511,43 +439,11 @@ void *rrdpush_sender_thread(void *ptr) { // The connection attempt blocks (after which we use the socket in nonblocking) if(unlikely(s->rrdpush_sender_socket == -1)) { - if(was_connected) { + if(was_connected) rrdpush_sender_on_disconnect(s->host); - was_connected = false; - } - - worker_is_busy(WORKER_SENDER_JOB_CONNECT); - - now_s = now_monotonic_sec(); - rrdpush_sender_cbuffer_recreate_timed(s, now_s, false, true); - rrdpush_sender_execute_commands_cleanup(s); - - rrdhost_flag_clear(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); - s->flags &= ~SENDER_FLAG_OVERFLOW; - s->read_len = 0; - s->buffer->read = 0; - s->buffer->write = 0; - - if(!attempt_to_connect(s)) - continue; - - if(rrdhost_sender_should_exit(s)) - break; - - now_s = s->last_traffic_seen_t = now_monotonic_sec(); - stream_path_send_to_parent(s->host); - rrdpush_sender_send_claimed_id(s->host); - rrdpush_send_host_labels(s->host); - rrdpush_send_global_functions(s->host); - s->replication.oldest_request_after_t = 0; - was_connected = true; - - rrdhost_flag_set(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); - - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM %s [send to %s]: enabling metrics streaming...", - rrdhost_hostname(s->host), s->connected_to); + was_connected = rrdpush_sender_connect(s); + now_s = s->last_traffic_seen_t; continue; } @@ -755,3 +651,21 @@ void *rrdpush_sender_thread(void *ptr) { return NULL; } + +void rrdpush_sender_thread_spawn(RRDHOST *host) { + sender_lock(host->sender); + + if(!rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN)) { + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, THREAD_TAG_STREAM_SENDER "[%s]", rrdhost_hostname(host)); + + host->rrdpush_sender_thread = nd_thread_create(tag, NETDATA_THREAD_OPTION_DEFAULT, + rrdpush_sender_thread, (void *)host->sender); + if(!host->rrdpush_sender_thread) + nd_log_daemon(NDLP_ERR, "STREAM %s [send]: failed to create new thread for client.", rrdhost_hostname(host)); + else + rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN); + } + + sender_unlock(host->sender); +} diff --git a/src/streaming/sender.h b/src/streaming/sender.h new file mode 100644 index 00000000000000..94d104f5f021d6 --- /dev/null +++ b/src/streaming/sender.h @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SENDER_H +#define NETDATA_SENDER_H + +#include "libnetdata/libnetdata.h" + +#define CONNECTED_TO_SIZE 100 + +#define CBUFFER_INITIAL_SIZE (16 * 1024) +#define THREAD_BUFFER_INITIAL_SIZE (CBUFFER_INITIAL_SIZE / 2) + +typedef enum __attribute__((packed)) { + STREAM_TRAFFIC_TYPE_REPLICATION = 0, + STREAM_TRAFFIC_TYPE_FUNCTIONS, + STREAM_TRAFFIC_TYPE_METADATA, + STREAM_TRAFFIC_TYPE_DATA, + STREAM_TRAFFIC_TYPE_DYNCFG, + + // terminator + STREAM_TRAFFIC_TYPE_MAX, +} STREAM_TRAFFIC_TYPE; + +typedef enum __attribute__((packed)) { + SENDER_FLAG_OVERFLOW = (1 << 0), // The buffer has been overflown +} SENDER_FLAGS; + +typedef struct { + char *os_name; + char *os_id; + char *os_version; + char *kernel_name; + char *kernel_version; +} stream_encoded_t; + +#include "stream-handshake.h" +#include "stream-capabilities.h" +#include "stream-conf.h" +#include "stream-compression/compression.h" + +#include "sender-destinations.h" + +typedef void (*rrdpush_defer_action_t)(struct sender_state *s, void *data); +typedef void (*rrdpush_defer_cleanup_t)(struct sender_state *s, void *data); + +struct sender_state { + RRDHOST *host; + pid_t tid; // the thread id of the sender, from gettid_cached() + SENDER_FLAGS flags; + int timeout; + int default_port; + uint32_t reconnect_delay; + char connected_to[CONNECTED_TO_SIZE + 1]; // We don't know which proxy we connect to, passed back from socket.c + size_t begin; + size_t reconnects_counter; + size_t sent_bytes; + size_t sent_bytes_on_this_connection; + size_t send_attempts; + time_t last_traffic_seen_t; + time_t last_state_since_t; // the timestamp of the last state (online/offline) change + size_t not_connected_loops; + // Metrics are collected asynchronously by collector threads calling rrdset_done_push(). This can also trigger + // the lazy creation of the sender thread - both cases (buffer access and thread creation) are guarded here. + SPINLOCK spinlock; + struct circular_buffer *buffer; + char read_buffer[PLUGINSD_LINE_MAX + 1]; + ssize_t read_len; + STREAM_CAPABILITIES capabilities; + STREAM_CAPABILITIES disabled_capabilities; + + size_t sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_MAX]; + + int rrdpush_sender_pipe[2]; // collector to sender thread signaling + int rrdpush_sender_socket; + + uint16_t hops; + + struct line_splitter line; + struct compressor_state compressor; + +#ifdef NETDATA_LOG_STREAM_SENDER + FILE *stream_log_fp; +#endif + + NETDATA_SSL ssl; // structure used to encrypt the connection + + struct { + bool shutdown; + STREAM_HANDSHAKE reason; + } exit; + + struct { + DICTIONARY *requests; // de-duplication of replication requests, per chart + time_t oldest_request_after_t; // the timestamp of the oldest replication request + time_t latest_completed_before_t; // the timestamp of the latest replication request + + struct { + size_t pending_requests; // the currently outstanding replication requests + size_t charts_replicating; // the number of unique charts having pending replication requests (on every request one is added and is removed when we finish it - it does not track completion of the replication for this chart) + bool reached_max; // true when the sender buffer should not get more replication responses + } atomic; + + } replication; + + struct { + bool pending_data; + size_t buffer_used_percentage; // the current utilization of the sending buffer + usec_t last_flush_time_ut; // the last time the sender flushed the sending buffer in USEC + time_t last_buffer_recreate_s; // true when the sender buffer should be re-created + } atomic; + + struct { + const char *end_keyword; + BUFFER *payload; + rrdpush_defer_action_t action; + rrdpush_defer_cleanup_t cleanup; + void *action_data; + } defer; + + bool parent_using_h2o; +}; + +#define sender_lock(sender) spinlock_lock(&(sender)->spinlock) +#define sender_unlock(sender) spinlock_unlock(&(sender)->spinlock) + +#define rrdpush_sender_pipe_has_pending_data(sender) __atomic_load_n(&(sender)->atomic.pending_data, __ATOMIC_RELAXED) +#define rrdpush_sender_pipe_set_pending_data(sender) __atomic_store_n(&(sender)->atomic.pending_data, true, __ATOMIC_RELAXED) +#define rrdpush_sender_pipe_clear_pending_data(sender) __atomic_store_n(&(sender)->atomic.pending_data, false, __ATOMIC_RELAXED) + +#define rrdpush_sender_last_buffer_recreate_get(sender) __atomic_load_n(&(sender)->atomic.last_buffer_recreate_s, __ATOMIC_RELAXED) +#define rrdpush_sender_last_buffer_recreate_set(sender, value) __atomic_store_n(&(sender)->atomic.last_buffer_recreate_s, value, __ATOMIC_RELAXED) + +#define rrdpush_sender_replication_buffer_full_set(sender, value) __atomic_store_n(&((sender)->replication.atomic.reached_max), value, __ATOMIC_SEQ_CST) +#define rrdpush_sender_replication_buffer_full_get(sender) __atomic_load_n(&((sender)->replication.atomic.reached_max), __ATOMIC_SEQ_CST) + +#define rrdpush_sender_set_buffer_used_percent(sender, value) __atomic_store_n(&((sender)->atomic.buffer_used_percentage), value, __ATOMIC_RELAXED) +#define rrdpush_sender_get_buffer_used_percent(sender) __atomic_load_n(&((sender)->atomic.buffer_used_percentage), __ATOMIC_RELAXED) + +#define rrdpush_sender_set_flush_time(sender) __atomic_store_n(&((sender)->atomic.last_flush_time_ut), now_realtime_usec(), __ATOMIC_RELAXED) +#define rrdpush_sender_get_flush_time(sender) __atomic_load_n(&((sender)->atomic.last_flush_time_ut), __ATOMIC_RELAXED) + +#define rrdpush_sender_replicating_charts(sender) __atomic_load_n(&((sender)->replication.atomic.charts_replicating), __ATOMIC_RELAXED) +#define rrdpush_sender_replicating_charts_plus_one(sender) __atomic_add_fetch(&((sender)->replication.atomic.charts_replicating), 1, __ATOMIC_RELAXED) +#define rrdpush_sender_replicating_charts_minus_one(sender) __atomic_sub_fetch(&((sender)->replication.atomic.charts_replicating), 1, __ATOMIC_RELAXED) +#define rrdpush_sender_replicating_charts_zero(sender) __atomic_store_n(&((sender)->replication.atomic.charts_replicating), 0, __ATOMIC_RELAXED) + +#define rrdpush_sender_pending_replication_requests(sender) __atomic_load_n(&((sender)->replication.atomic.pending_requests), __ATOMIC_RELAXED) +#define rrdpush_sender_pending_replication_requests_plus_one(sender) __atomic_add_fetch(&((sender)->replication.atomic.pending_requests), 1, __ATOMIC_RELAXED) +#define rrdpush_sender_pending_replication_requests_minus_one(sender) __atomic_sub_fetch(&((sender)->replication.atomic.pending_requests), 1, __ATOMIC_RELAXED) +#define rrdpush_sender_pending_replication_requests_zero(sender) __atomic_store_n(&((sender)->replication.atomic.pending_requests), 0, __ATOMIC_RELAXED) + +BUFFER *sender_start(struct sender_state *s); +void sender_commit(struct sender_state *s, BUFFER *wb, STREAM_TRAFFIC_TYPE type); + +void *rrdpush_sender_thread(void *ptr); +void rrdpush_sender_thread_stop(RRDHOST *host, STREAM_HANDSHAKE reason, bool wait); + +void sender_thread_buffer_free(void); + +void rrdpush_signal_sender_to_wake_up(struct sender_state *s); + +bool rrdpush_sender_connect(struct sender_state *s); +void rrdpush_sender_cbuffer_recreate_timed(struct sender_state *s, time_t now_s, bool have_mutex, bool force); +bool rrdhost_sender_should_exit(struct sender_state *s); +void rrdpush_sender_thread_spawn(RRDHOST *host); + +#include "replication.h" + +#endif //NETDATA_SENDER_H diff --git a/src/streaming/stream_capabilities.c b/src/streaming/stream-capabilities.c similarity index 100% rename from src/streaming/stream_capabilities.c rename to src/streaming/stream-capabilities.c diff --git a/src/streaming/stream_capabilities.h b/src/streaming/stream-capabilities.h similarity index 100% rename from src/streaming/stream_capabilities.h rename to src/streaming/stream-capabilities.h diff --git a/src/streaming/compression_brotli.c b/src/streaming/stream-compression/brotli.c similarity index 99% rename from src/streaming/compression_brotli.c rename to src/streaming/stream-compression/brotli.c index cf52f3bca4b95d..c2c09cdc53cd1e 100644 --- a/src/streaming/compression_brotli.c +++ b/src/streaming/stream-compression/brotli.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "compression_brotli.h" +#include "brotli.h" #ifdef ENABLE_BROTLI #include diff --git a/src/streaming/compression_brotli.h b/src/streaming/stream-compression/brotli.h similarity index 100% rename from src/streaming/compression_brotli.h rename to src/streaming/stream-compression/brotli.h diff --git a/src/streaming/compression.c b/src/streaming/stream-compression/compression.c similarity index 99% rename from src/streaming/compression.c rename to src/streaming/stream-compression/compression.c index 448f423533429c..3c99306563fd67 100644 --- a/src/streaming/compression.c +++ b/src/streaming/stream-compression/compression.c @@ -2,18 +2,18 @@ #include "compression.h" -#include "compression_gzip.h" +#include "gzip.h" #ifdef ENABLE_LZ4 -#include "compression_lz4.h" +#include "lz4.h" #endif #ifdef ENABLE_ZSTD -#include "compression_zstd.h" +#include "zstd.h" #endif #ifdef ENABLE_BROTLI -#include "compression_brotli.h" +#include "brotli.h" #endif int rrdpush_compression_levels[COMPRESSION_ALGORITHM_MAX] = { diff --git a/src/streaming/compression.h b/src/streaming/stream-compression/compression.h similarity index 93% rename from src/streaming/compression.h rename to src/streaming/stream-compression/compression.h index 285fb2cf6c9f94..37f589b8562e9e 100644 --- a/src/streaming/compression.h +++ b/src/streaming/stream-compression/compression.h @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "rrdpush.h" - #ifndef NETDATA_RRDPUSH_COMPRESSION_H #define NETDATA_RRDPUSH_COMPRESSION_H 1 +#include "libnetdata/libnetdata.h" + // signature MUST end with a newline #if COMPRESSION_MAX_MSG_SIZE >= (COMPRESSION_MAX_CHUNK - COMPRESSION_MAX_OVERHEAD) @@ -172,4 +172,12 @@ static inline size_t rrdpush_decompressor_get(struct decompressor_state *state, // ---------------------------------------------------------------------------- +#include "../rrdpush.h" + +bool rrdpush_compression_initialize(struct sender_state *s); +bool rrdpush_decompression_initialize(struct receiver_state *rpt); +void rrdpush_parse_compression_order(struct receiver_state *rpt, const char *order); +void rrdpush_select_receiver_compression_algorithm(struct receiver_state *rpt); +void rrdpush_compression_deactivate(struct sender_state *s); + #endif // NETDATA_RRDPUSH_COMPRESSION_H 1 diff --git a/src/streaming/compression_gzip.c b/src/streaming/stream-compression/gzip.c similarity index 99% rename from src/streaming/compression_gzip.c rename to src/streaming/stream-compression/gzip.c index c4ef3af05ee2d3..d63e9afbe3e197 100644 --- a/src/streaming/compression_gzip.c +++ b/src/streaming/stream-compression/gzip.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "compression_gzip.h" +#include "gzip.h" #include void rrdpush_compressor_init_gzip(struct compressor_state *state) { diff --git a/src/streaming/compression_gzip.h b/src/streaming/stream-compression/gzip.h similarity index 100% rename from src/streaming/compression_gzip.h rename to src/streaming/stream-compression/gzip.h diff --git a/src/streaming/compression_lz4.c b/src/streaming/stream-compression/lz4.c similarity index 99% rename from src/streaming/compression_lz4.c rename to src/streaming/stream-compression/lz4.c index f5174134eb3ee8..2841921539c495 100644 --- a/src/streaming/compression_lz4.c +++ b/src/streaming/stream-compression/lz4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "compression_lz4.h" +#include "lz4.h" #ifdef ENABLE_LZ4 #include "lz4.h" diff --git a/src/streaming/compression_lz4.h b/src/streaming/stream-compression/lz4.h similarity index 100% rename from src/streaming/compression_lz4.h rename to src/streaming/stream-compression/lz4.h diff --git a/src/streaming/compression_zstd.c b/src/streaming/stream-compression/zstd.c similarity index 99% rename from src/streaming/compression_zstd.c rename to src/streaming/stream-compression/zstd.c index dabc044f7f5fc1..0ce27c0d3b2c58 100644 --- a/src/streaming/compression_zstd.c +++ b/src/streaming/stream-compression/zstd.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "compression_zstd.h" +#include "zstd.h" #ifdef ENABLE_ZSTD #include diff --git a/src/streaming/compression_zstd.h b/src/streaming/stream-compression/zstd.h similarity index 100% rename from src/streaming/compression_zstd.h rename to src/streaming/stream-compression/zstd.h diff --git a/src/streaming/stream-conf.c b/src/streaming/stream-conf.c new file mode 100644 index 00000000000000..8fc9e081965ee1 --- /dev/null +++ b/src/streaming/stream-conf.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "stream-conf.h" + +struct config stream_config = APPCONFIG_INITIALIZER; + +bool stream_conf_send_enabled = false; +bool stream_conf_compression_enabled = true; +bool stream_conf_replication_enabled = true; + +const char *stream_conf_send_destination = NULL; +const char *stream_conf_send_api_key = NULL; +const char *stream_conf_send_charts_matching = "*"; + +time_t stream_conf_replication_period = 86400; +time_t stream_conf_replication_step = 600; + +const char *stream_conf_ssl_ca_path = NULL; +const char *stream_conf_ssl_ca_file = NULL; + +// to have the remote netdata re-sync the charts +// to its current clock, we send for this many +// iterations a BEGIN line without microseconds +// this is for the first iterations of each chart +unsigned int stream_conf_initial_clock_resync_iterations = 60; + +static void stream_conf_load() { + errno_clear(); + char *filename = filename_from_path_entry_strdupz(netdata_configured_user_config_dir, "stream.conf"); + if(!appconfig_load(&stream_config, filename, 0, NULL)) { + nd_log_daemon(NDLP_NOTICE, "CONFIG: cannot load user config '%s'. Will try stock config.", filename); + freez(filename); + + filename = filename_from_path_entry_strdupz(netdata_configured_stock_config_dir, "stream.conf"); + if(!appconfig_load(&stream_config, filename, 0, NULL)) + nd_log_daemon(NDLP_NOTICE, "CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename); + } + + freez(filename); + + appconfig_move(&stream_config, + CONFIG_SECTION_STREAM, "timeout seconds", + CONFIG_SECTION_STREAM, "timeout"); + + appconfig_move(&stream_config, + CONFIG_SECTION_STREAM, "reconnect delay seconds", + CONFIG_SECTION_STREAM, "reconnect delay"); + + appconfig_move_everywhere(&stream_config, "default memory mode", "db"); + appconfig_move_everywhere(&stream_config, "memory mode", "db"); + appconfig_move_everywhere(&stream_config, "db mode", "db"); + appconfig_move_everywhere(&stream_config, "default history", "retention"); + appconfig_move_everywhere(&stream_config, "history", "retention"); + appconfig_move_everywhere(&stream_config, "default proxy enabled", "proxy enabled"); + appconfig_move_everywhere(&stream_config, "default proxy destination", "proxy destination"); + appconfig_move_everywhere(&stream_config, "default proxy api key", "proxy api key"); + appconfig_move_everywhere(&stream_config, "default proxy send charts matching", "proxy send charts matching"); + appconfig_move_everywhere(&stream_config, "default health log history", "health log retention"); + appconfig_move_everywhere(&stream_config, "health log history", "health log retention"); + appconfig_move_everywhere(&stream_config, "seconds to replicate", "replication period"); + appconfig_move_everywhere(&stream_config, "seconds per replication step", "replication step"); + appconfig_move_everywhere(&stream_config, "default postpone alarms on connect seconds", "postpone alerts on connect"); + appconfig_move_everywhere(&stream_config, "postpone alarms on connect seconds", "postpone alerts on connect"); +} + +bool stream_conf_receiver_needs_dbengine(void) { + return stream_conf_needs_dbengine(&stream_config); +} + +bool stream_conf_init() { + // -------------------------------------------------------------------- + // load stream.conf + stream_conf_load(); + + stream_conf_send_enabled = + appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "enabled", stream_conf_send_enabled); + + stream_conf_send_destination = + appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "destination", ""); + + stream_conf_send_api_key = + appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "api key", ""); + + stream_conf_send_charts_matching = + appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "send charts matching", stream_conf_send_charts_matching); + + stream_conf_replication_enabled = + config_get_boolean(CONFIG_SECTION_DB, "enable replication", stream_conf_replication_enabled); + + stream_conf_replication_period = + config_get_duration_seconds(CONFIG_SECTION_DB, "replication period", stream_conf_replication_period); + + stream_conf_replication_step = + config_get_duration_seconds(CONFIG_SECTION_DB, "replication step", stream_conf_replication_step); + + rrdhost_free_orphan_time_s = + config_get_duration_seconds(CONFIG_SECTION_DB, "cleanup orphan hosts after", rrdhost_free_orphan_time_s); + + stream_conf_compression_enabled = + appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, + "enable compression", stream_conf_compression_enabled); + + rrdpush_compression_levels[COMPRESSION_ALGORITHM_BROTLI] = (int)appconfig_get_number( + &stream_config, CONFIG_SECTION_STREAM, "brotli compression level", + rrdpush_compression_levels[COMPRESSION_ALGORITHM_BROTLI]); + + rrdpush_compression_levels[COMPRESSION_ALGORITHM_ZSTD] = (int)appconfig_get_number( + &stream_config, CONFIG_SECTION_STREAM, "zstd compression level", + rrdpush_compression_levels[COMPRESSION_ALGORITHM_ZSTD]); + + rrdpush_compression_levels[COMPRESSION_ALGORITHM_LZ4] = (int)appconfig_get_number( + &stream_config, CONFIG_SECTION_STREAM, "lz4 compression acceleration", + rrdpush_compression_levels[COMPRESSION_ALGORITHM_LZ4]); + + rrdpush_compression_levels[COMPRESSION_ALGORITHM_GZIP] = (int)appconfig_get_number( + &stream_config, CONFIG_SECTION_STREAM, "gzip compression level", + rrdpush_compression_levels[COMPRESSION_ALGORITHM_GZIP]); + + if(stream_conf_send_enabled && (!stream_conf_send_destination || !*stream_conf_send_destination || !stream_conf_send_api_key || !*stream_conf_send_api_key)) { + nd_log_daemon(NDLP_WARNING, "STREAM [send]: cannot enable sending thread - information is missing."); + stream_conf_send_enabled = false; + } + + netdata_ssl_validate_certificate_sender = !appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "ssl skip certificate verification", !netdata_ssl_validate_certificate); + + if(!netdata_ssl_validate_certificate_sender) + nd_log_daemon(NDLP_NOTICE, "SSL: streaming senders will skip SSL certificates verification."); + + stream_conf_ssl_ca_path = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CApath", NULL); + stream_conf_ssl_ca_file = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CAfile", NULL); + + return stream_conf_send_enabled; +} + +bool stream_conf_configured_as_parent() { + return stream_conf_has_uuid_section(&stream_config); +} diff --git a/src/streaming/stream-conf.h b/src/streaming/stream-conf.h new file mode 100644 index 00000000000000..da7a8812350517 --- /dev/null +++ b/src/streaming/stream-conf.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STREAM_CONF_H +#define NETDATA_STREAM_CONF_H + +#include "libnetdata/libnetdata.h" +#include "daemon/common.h" + +extern bool stream_conf_send_enabled; +extern bool stream_conf_compression_enabled; +extern bool stream_conf_replication_enabled; + +extern const char *stream_conf_send_destination; +extern const char *stream_conf_send_api_key; +extern const char *stream_conf_send_charts_matching; +extern time_t stream_conf_replication_period; +extern time_t stream_conf_replication_step; +extern unsigned int stream_conf_initial_clock_resync_iterations; + +extern struct config stream_config; +extern const char *stream_conf_ssl_ca_path; +extern const char *stream_conf_ssl_ca_file; + +bool stream_conf_init(); +bool stream_conf_receiver_needs_dbengine(); +bool stream_conf_configured_as_parent(); + +#endif //NETDATA_STREAM_CONF_H diff --git a/src/streaming/stream-handshake.c b/src/streaming/stream-handshake.c new file mode 100644 index 00000000000000..e338df950f1fc8 --- /dev/null +++ b/src/streaming/stream-handshake.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdpush.h" + +static struct { + STREAM_HANDSHAKE err; + const char *str; +} handshake_errors[] = { + { STREAM_HANDSHAKE_OK_V3, "CONNECTED" }, + { STREAM_HANDSHAKE_OK_V2, "CONNECTED" }, + { STREAM_HANDSHAKE_OK_V1, "CONNECTED" }, + { STREAM_HANDSHAKE_NEVER, "" }, + { STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE, "BAD HANDSHAKE" }, + { STREAM_HANDSHAKE_ERROR_LOCALHOST, "LOCALHOST" }, + { STREAM_HANDSHAKE_ERROR_ALREADY_CONNECTED, "ALREADY CONNECTED" }, + { STREAM_HANDSHAKE_ERROR_DENIED, "DENIED" }, + { STREAM_HANDSHAKE_ERROR_SEND_TIMEOUT, "SEND TIMEOUT" }, + { STREAM_HANDSHAKE_ERROR_RECEIVE_TIMEOUT, "RECEIVE TIMEOUT" }, + { STREAM_HANDSHAKE_ERROR_INVALID_CERTIFICATE, "INVALID CERTIFICATE" }, + { STREAM_HANDSHAKE_ERROR_SSL_ERROR, "SSL ERROR" }, + { STREAM_HANDSHAKE_ERROR_CANT_CONNECT, "CANT CONNECT" }, + { STREAM_HANDSHAKE_BUSY_TRY_LATER, "BUSY TRY LATER" }, + { STREAM_HANDSHAKE_INTERNAL_ERROR, "INTERNAL ERROR" }, + { STREAM_HANDSHAKE_INITIALIZATION, "REMOTE IS INITIALIZING" }, + { STREAM_HANDSHAKE_DISCONNECT_HOST_CLEANUP, "DISCONNECTED HOST CLEANUP" }, + { STREAM_HANDSHAKE_DISCONNECT_STALE_RECEIVER, "DISCONNECTED STALE RECEIVER" }, + { STREAM_HANDSHAKE_DISCONNECT_SHUTDOWN, "DISCONNECTED SHUTDOWN REQUESTED" }, + { STREAM_HANDSHAKE_DISCONNECT_NETDATA_EXIT, "DISCONNECTED NETDATA EXIT" }, + { STREAM_HANDSHAKE_DISCONNECT_PARSER_EXIT, "DISCONNECTED PARSE ENDED" }, + {STREAM_HANDSHAKE_DISCONNECT_UNKNOWN_SOCKET_READ_ERROR, "DISCONNECTED UNKNOWN SOCKET READ ERROR" }, + { STREAM_HANDSHAKE_DISCONNECT_PARSER_FAILED, "DISCONNECTED PARSE ERROR" }, + { STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT, "DISCONNECTED RECEIVER LEFT" }, + { STREAM_HANDSHAKE_DISCONNECT_ORPHAN_HOST, "DISCONNECTED ORPHAN HOST" }, + { STREAM_HANDSHAKE_NON_STREAMABLE_HOST, "NON STREAMABLE HOST" }, + { STREAM_HANDSHAKE_DISCONNECT_NOT_SUFFICIENT_READ_BUFFER, "DISCONNECTED NOT SUFFICIENT READ BUFFER" }, + {STREAM_HANDSHAKE_DISCONNECT_SOCKET_EOF, "DISCONNECTED SOCKET EOF" }, + {STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_FAILED, "DISCONNECTED SOCKET READ FAILED" }, + {STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_TIMEOUT, "DISCONNECTED SOCKET READ TIMEOUT" }, + { 0, NULL }, +}; + +const char *stream_handshake_error_to_string(STREAM_HANDSHAKE handshake_error) { + if(handshake_error >= STREAM_HANDSHAKE_OK_V1) + // handshake_error is the whole version / capabilities number + return "CONNECTED"; + + for(size_t i = 0; handshake_errors[i].str ; i++) { + if(handshake_error == handshake_errors[i].err) + return handshake_errors[i].str; + } + + return "UNKNOWN"; +} diff --git a/src/streaming/stream-handshake.h b/src/streaming/stream-handshake.h new file mode 100644 index 00000000000000..9b66cab97c730a --- /dev/null +++ b/src/streaming/stream-handshake.h @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STREAM_HANDSHAKE_H +#define NETDATA_STREAM_HANDSHAKE_H + +#define HTTP_HEADER_SIZE 8192 + +#define STREAMING_PROTOCOL_VERSION "1.1" +#define START_STREAMING_PROMPT_V1 "Hit me baby, push them over..." +#define START_STREAMING_PROMPT_V2 "Hit me baby, push them over and bring the host labels..." +#define START_STREAMING_PROMPT_VN "Hit me baby, push them over with the version=" + +#define START_STREAMING_ERROR_SAME_LOCALHOST "Don't hit me baby, you are trying to stream my localhost back" +#define START_STREAMING_ERROR_ALREADY_STREAMING "This GUID is already streaming to this server" +#define START_STREAMING_ERROR_NOT_PERMITTED "You are not permitted to access this. Check the logs for more info." +#define START_STREAMING_ERROR_BUSY_TRY_LATER "The server is too busy now to accept this request. Try later." +#define START_STREAMING_ERROR_INTERNAL_ERROR "The server encountered an internal error. Try later." +#define START_STREAMING_ERROR_INITIALIZATION "The server is initializing. Try later." + +#define RRDPUSH_STATUS_CONNECTED "CONNECTED" +#define RRDPUSH_STATUS_ALREADY_CONNECTED "ALREADY CONNECTED" +#define RRDPUSH_STATUS_DISCONNECTED "DISCONNECTED" +#define RRDPUSH_STATUS_RATE_LIMIT "RATE LIMIT TRY LATER" +#define RRDPUSH_STATUS_INITIALIZATION_IN_PROGRESS "INITIALIZATION IN PROGRESS RETRY LATER" +#define RRDPUSH_STATUS_INTERNAL_SERVER_ERROR "INTERNAL SERVER ERROR DROPPING CONNECTION" +#define RRDPUSH_STATUS_DUPLICATE_RECEIVER "DUPLICATE RECEIVER DROPPING CONNECTION" +#define RRDPUSH_STATUS_CANT_REPLY "CANT REPLY DROPPING CONNECTION" +#define RRDPUSH_STATUS_NO_HOSTNAME "NO HOSTNAME PERMISSION DENIED" +#define RRDPUSH_STATUS_NO_API_KEY "NO API KEY PERMISSION DENIED" +#define RRDPUSH_STATUS_INVALID_API_KEY "INVALID API KEY PERMISSION DENIED" +#define RRDPUSH_STATUS_NO_MACHINE_GUID "NO MACHINE GUID PERMISSION DENIED" +#define RRDPUSH_STATUS_MACHINE_GUID_DISABLED "MACHINE GUID DISABLED PERMISSION DENIED" +#define RRDPUSH_STATUS_INVALID_MACHINE_GUID "INVALID MACHINE GUID PERMISSION DENIED" +#define RRDPUSH_STATUS_API_KEY_DISABLED "API KEY DISABLED PERMISSION DENIED" +#define RRDPUSH_STATUS_NOT_ALLOWED_IP "NOT ALLOWED IP PERMISSION DENIED" +#define RRDPUSH_STATUS_LOCALHOST "LOCALHOST PERMISSION DENIED" +#define RRDPUSH_STATUS_PERMISSION_DENIED "PERMISSION DENIED" +#define RRDPUSH_STATUS_BAD_HANDSHAKE "BAD HANDSHAKE" +#define RRDPUSH_STATUS_TIMEOUT "TIMEOUT" +#define RRDPUSH_STATUS_CANT_UPGRADE_CONNECTION "CANT UPGRADE CONNECTION" +#define RRDPUSH_STATUS_SSL_ERROR "SSL ERROR" +#define RRDPUSH_STATUS_INVALID_SSL_CERTIFICATE "INVALID SSL CERTIFICATE" +#define RRDPUSH_STATUS_CANT_ESTABLISH_SSL_CONNECTION "CANT ESTABLISH SSL CONNECTION" + +typedef enum { + STREAM_HANDSHAKE_OK_V3 = 3, // v3+ + STREAM_HANDSHAKE_OK_V2 = 2, // v2 + STREAM_HANDSHAKE_OK_V1 = 1, // v1 + STREAM_HANDSHAKE_NEVER = 0, // never tried to connect + STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE = -1, + STREAM_HANDSHAKE_ERROR_LOCALHOST = -2, + STREAM_HANDSHAKE_ERROR_ALREADY_CONNECTED = -3, + STREAM_HANDSHAKE_ERROR_DENIED = -4, + STREAM_HANDSHAKE_ERROR_SEND_TIMEOUT = -5, + STREAM_HANDSHAKE_ERROR_RECEIVE_TIMEOUT = -6, + STREAM_HANDSHAKE_ERROR_INVALID_CERTIFICATE = -7, + STREAM_HANDSHAKE_ERROR_SSL_ERROR = -8, + STREAM_HANDSHAKE_ERROR_CANT_CONNECT = -9, + STREAM_HANDSHAKE_BUSY_TRY_LATER = -10, + STREAM_HANDSHAKE_INTERNAL_ERROR = -11, + STREAM_HANDSHAKE_INITIALIZATION = -12, + STREAM_HANDSHAKE_DISCONNECT_HOST_CLEANUP = -13, + STREAM_HANDSHAKE_DISCONNECT_STALE_RECEIVER = -14, + STREAM_HANDSHAKE_DISCONNECT_SHUTDOWN = -15, + STREAM_HANDSHAKE_DISCONNECT_NETDATA_EXIT = -16, + STREAM_HANDSHAKE_DISCONNECT_PARSER_EXIT = -17, + STREAM_HANDSHAKE_DISCONNECT_UNKNOWN_SOCKET_READ_ERROR = -18, + STREAM_HANDSHAKE_DISCONNECT_PARSER_FAILED = -19, + STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT = -20, + STREAM_HANDSHAKE_DISCONNECT_ORPHAN_HOST = -21, + STREAM_HANDSHAKE_NON_STREAMABLE_HOST = -22, + STREAM_HANDSHAKE_DISCONNECT_NOT_SUFFICIENT_READ_BUFFER = -23, + STREAM_HANDSHAKE_DISCONNECT_SOCKET_EOF = -24, + STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_FAILED = -25, + STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_TIMEOUT = -26, + STREAM_HANDSHAKE_ERROR_HTTP_UPGRADE = -27, + +} STREAM_HANDSHAKE; + +const char *stream_handshake_error_to_string(STREAM_HANDSHAKE handshake_error); + +#endif //NETDATA_STREAM_HANDSHAKE_H diff --git a/src/streaming/stream_path.c b/src/streaming/stream-path.c similarity index 99% rename from src/streaming/stream_path.c rename to src/streaming/stream-path.c index 8f2c2889950d50..7aad9a0bfdbc3f 100644 --- a/src/streaming/stream_path.c +++ b/src/streaming/stream-path.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "stream_path.h" +#include "stream-path.h" #include "rrdpush.h" #include "plugins.d/pluginsd_internals.h" diff --git a/src/streaming/stream_path.h b/src/streaming/stream-path.h similarity index 98% rename from src/streaming/stream_path.h rename to src/streaming/stream-path.h index 8a3035e6618bef..6dc323bddc104a 100644 --- a/src/streaming/stream_path.h +++ b/src/streaming/stream-path.h @@ -3,7 +3,7 @@ #ifndef NETDATA_STREAM_PATH_H #define NETDATA_STREAM_PATH_H -#include "stream_capabilities.h" +#include "stream-capabilities.h" #define STREAM_PATH_JSON_MEMBER "streaming_path" diff --git a/src/web/api/v1/api_v1_info.c b/src/web/api/v1/api_v1_info.c index 58a4a76f2cc2e0..2395cea5987252 100644 --- a/src/web/api/v1/api_v1_info.c +++ b/src/web/api/v1/api_v1_info.c @@ -161,7 +161,7 @@ static int web_client_api_request_v1_info_fill_buffer(RRDHOST *host, BUFFER *wb) buffer_json_member_add_uint64(wb, "page-cache-size", default_rrdeng_page_cache_mb); #endif // ENABLE_DBENGINE buffer_json_member_add_boolean(wb, "web-enabled", web_server_mode != WEB_SERVER_MODE_NONE); - buffer_json_member_add_boolean(wb, "stream-enabled", default_rrdpush_enabled); + buffer_json_member_add_boolean(wb, "stream-enabled", stream_conf_send_enabled); buffer_json_member_add_boolean(wb, "stream-compression", host->sender && host->sender->compressor.initialized); diff --git a/src/web/server/h2o/http_server.c b/src/web/server/h2o/http_server.c index dc83f40fcce684..0fc65b35096551 100644 --- a/src/web/server/h2o/http_server.c +++ b/src/web/server/h2o/http_server.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "daemon/common.h" -#include "streaming/common.h" +#include "streaming/h2o-common.h" #include "http_server.h" #pragma GCC diagnostic push diff --git a/src/web/server/h2o/streaming.c b/src/web/server/h2o/rrdpush.c similarity index 99% rename from src/web/server/h2o/streaming.c rename to src/web/server/h2o/rrdpush.c index fbe3f8050bb76b..515ec8fd49a3b8 100644 --- a/src/web/server/h2o/streaming.c +++ b/src/web/server/h2o/rrdpush.c @@ -4,7 +4,7 @@ #include "streaming.h" #include "connlist.h" #include "h2o_utils.h" -#include "streaming/common.h" +#include "streaming/h2o-common.h" static int pending_write_reqs = 0;