diff --git a/src/daemon/dlt-daemon.c b/src/daemon/dlt-daemon.c index f0d5a1657..9b320dbc8 100644 --- a/src/daemon/dlt-daemon.c +++ b/src/daemon/dlt-daemon.c @@ -1549,14 +1549,25 @@ int main(int argc, char *argv[]) #ifdef DLT_SYSTEMD_WATCHDOG_ENABLE { char *watchdogUSec = getenv("WATCHDOG_USEC"); - int watchdogTimeoutSeconds = 0; + // set a sensible default, in case the environment variable is not set + int watchdogTimeoutSeconds = 30; dlt_log(LOG_DEBUG, "Systemd watchdog initialization\n"); - if (watchdogUSec) + if (watchdogUSec) { + // WATCHDOG_USEC is the timeout in micrsoseconds + // divide this by 2*10^6 to get the interval in seconds + // 2 * because we notify systemd after half the timeout watchdogTimeoutSeconds = atoi(watchdogUSec) / 2000000; + } + + if (watchdogTimeoutSeconds == 0) { + dlt_log(LOG_WARNING, "Watchdog timeout is too small, need at least 1s, setting 1s timeout\n"); + watchdogTimeoutSeconds = 1; + } daemon.watchdog_trigger_interval = watchdogTimeoutSeconds; + daemon.watchdog_last_trigger_time = 0U; create_timer_fd(&daemon_local, watchdogTimeoutSeconds, watchdogTimeoutSeconds, @@ -3152,6 +3163,17 @@ int dlt_daemon_process_user_messages(DltDaemon *daemon, /* look through buffer as long as data is in there */ while ((receiver->bytesRcvd >= min_size) && run_loop) { +#ifdef DLT_SYSTEMD_WATCHDOG_ENABLE + /* this loop may be running long, so we have to exit it at some point to be able to + * to process other events, like feeding the watchdog + */ + bool watchdog_triggered= dlt_daemon_trigger_systemd_watchdog_if_necessary(daemon); + if (watchdog_triggered) { + dlt_vlog(LOG_WARNING, "%s yields due to watchdog.\n", __func__); + run_loop = 0; // exit loop in next iteration + } +#endif + dlt_daemon_process_user_message_func func = NULL; offset = 0; @@ -3798,12 +3820,9 @@ int dlt_daemon_process_user_message_log(DltDaemon *daemon, while (1) { #ifdef DLT_SYSTEMD_WATCHDOG_ENABLE - const unsigned int uptime = dlt_uptime(); - if ((uptime - start_time) / 10000 > daemon->watchdog_trigger_interval) { - dlt_vlog(LOG_WARNING, - "spent already 1 watchdog trigger interval in %s, yielding to process other events.\n", __func__); - if (sd_notify(0, "WATCHDOG=1") < 0) - dlt_vlog(LOG_CRIT, "Could not reset systemd watchdog from %s\n", __func__); + bool watchdog_triggered = dlt_daemon_trigger_systemd_watchdog_if_necessary(daemon); + if (watchdog_triggered) { + dlt_vlog(LOG_WARNING, "%s yields due to watchdog.\n", __func__); break; } #endif @@ -4162,7 +4181,7 @@ int dlt_daemon_send_ringbuffer_to_client(DltDaemon *daemon, DltDaemonLocal *daem while ((length = dlt_buffer_copy(&(daemon->client_ringbuffer), data, sizeof(data))) > 0) { #ifdef DLT_SYSTEMD_WATCHDOG_ENABLE - dlt_daemon_trigger_systemd_watchdog_if_necessary(&curr_time, daemon->watchdog_trigger_interval); + dlt_daemon_trigger_systemd_watchdog_if_necessary(daemon); #endif if ((ret = diff --git a/src/daemon/dlt_daemon_client.c b/src/daemon/dlt_daemon_client.c index 5b6fd5bcc..26f7fdd15 100644 --- a/src/daemon/dlt_daemon_client.c +++ b/src/daemon/dlt_daemon_client.c @@ -110,7 +110,7 @@ static int dlt_daemon_client_send_all_multiple(DltDaemon *daemon, int verbose) { int sent = 0; - unsigned int i = 0; + nfds_t i = 0; int ret = 0; DltConnection *temp = NULL; int type_mask = @@ -125,6 +125,13 @@ static int dlt_daemon_client_send_all_multiple(DltDaemon *daemon, for (i = 0; i < daemon_local->pEvent.nfds; i++) { +#ifdef DLT_SYSTEMD_WATCHDOG_ENABLE + bool watchdog_triggered = dlt_daemon_trigger_systemd_watchdog_if_necessary(daemon); + if (watchdog_triggered) { + dlt_vlog(LOG_WARNING, "%s notified watchdog, processed %lu/%lu fds already.\n", + __func__, i, daemon_local->pEvent.nfds); + } +#endif temp = dlt_event_handler_find_connection(&(daemon_local->pEvent), daemon_local->pEvent.pfd[i].fd); @@ -152,7 +159,7 @@ static int dlt_daemon_client_send_all_multiple(DltDaemon *daemon, if (ret != DLT_DAEMON_ERROR_OK) dlt_vlog(LOG_WARNING, "%s: send dlt message failed\n", __func__); else - /* If sent to at least one client, + /* If sent to at least one client, * then do not store in ring buffer */ sent = 1; @@ -2398,8 +2405,7 @@ int dlt_daemon_process_systemd_timer(DltDaemon *daemon, daemon->received_message_since_last_watchdog_interval = 0; #endif - if (sd_notify(0, "WATCHDOG=1") < 0) - dlt_log(LOG_CRIT, "Could not reset systemd watchdog\n"); + dlt_daemon_trigger_systemd_watchdog_if_necessary(daemon); dlt_log(LOG_DEBUG, "Timer watchdog\n"); diff --git a/src/daemon/dlt_daemon_common.c b/src/daemon/dlt_daemon_common.c index f3aa277b9..86018ba48 100644 --- a/src/daemon/dlt_daemon_common.c +++ b/src/daemon/dlt_daemon_common.c @@ -2002,14 +2002,24 @@ void dlt_daemon_change_state(DltDaemon *daemon, DltDaemonState newState) } #ifdef DLT_SYSTEMD_WATCHDOG_ENABLE -void dlt_daemon_trigger_systemd_watchdog_if_necessary(unsigned int* last_trigger_time, unsigned int watchdog_trigger_interval) { - unsigned int uptime = dlt_uptime(); - if ((uptime - *last_trigger_time) / 10000 >= watchdog_trigger_interval) { - if (sd_notify(0, "WATCHDOG=1") < 0) - dlt_vlog(LOG_WARNING, "%s: Could not reset systemd watchdog\n", __func__); - *last_trigger_time = uptime; +bool dlt_daemon_trigger_systemd_watchdog_if_necessary(DltDaemon *daemon) { + if (daemon->watchdog_trigger_interval == 0) { + return false; } + + const unsigned int uptime_seconds = dlt_uptime() / 10000; + const unsigned int seconds_since_last_trigger = uptime_seconds - daemon->watchdog_last_trigger_time; + if (seconds_since_last_trigger < daemon->watchdog_trigger_interval) { + return false; + } + if (sd_notify(0, "WATCHDOG=1") < 0) + dlt_vlog(LOG_WARNING, "%s: Could not reset systemd watchdog\n", __func__); + else + daemon->watchdog_last_trigger_time = uptime_seconds; + + return true; } + #endif #ifdef DLT_TRACE_LOAD_CTRL_ENABLE diff --git a/src/daemon/dlt_daemon_common.h b/src/daemon/dlt_daemon_common.h index fc91ee934..c46151387 100644 --- a/src/daemon/dlt_daemon_common.h +++ b/src/daemon/dlt_daemon_common.h @@ -207,6 +207,7 @@ typedef struct #endif #ifdef DLT_SYSTEMD_WATCHDOG_ENABLE unsigned int watchdog_trigger_interval; /* watchdog trigger interval in [s] */ + unsigned int watchdog_last_trigger_time; /* when the watchdog was last triggered in [s] */ #endif #ifdef DLT_LOG_LEVEL_APP_CONFIG DltDaemonContextLogSettings *app_id_log_level_settings; /**< Settings for app id specific log levels */ @@ -606,7 +607,12 @@ void dlt_daemon_control_reset_to_factory_default(DltDaemon *daemon, void dlt_daemon_change_state(DltDaemon *daemon, DltDaemonState newState); #ifdef DLT_SYSTEMD_WATCHDOG_ENABLE -void dlt_daemon_trigger_systemd_watchdog_if_necessary(unsigned int* last_trigger_time, unsigned int watchdog_trigger_interval); +/** + * Trigger the systemd watchdog when the timeout has been reached + * @param daemon pointer to dlt daemon structure + * @return true if the watchdog has been triggered + */ +bool dlt_daemon_trigger_systemd_watchdog_if_necessary(DltDaemon *daemon); #endif # ifdef __cplusplus diff --git a/src/daemon/dlt_daemon_event_handler.c b/src/daemon/dlt_daemon_event_handler.c index 0dec007bf..5fdefd953 100644 --- a/src/daemon/dlt_daemon_event_handler.c +++ b/src/daemon/dlt_daemon_event_handler.c @@ -268,7 +268,9 @@ int dlt_daemon_handle_event(DltEventHandler *pEvent, return -1; } #ifdef DLT_SYSTEMD_WATCHDOG_ENABLE - dlt_daemon_trigger_systemd_watchdog_if_necessary(&start_time, daemon->watchdog_trigger_interval); + // no need to yield here, it will be called in a loop anyways. + // therefore we also do not log. + dlt_daemon_trigger_systemd_watchdog_if_necessary(daemon); #endif }