From 4052c22e5d9dff071d461b07e69a08b7f183ca0a Mon Sep 17 00:00:00 2001 From: Vitaly Zhuravlev Date: Sat, 2 Nov 2024 14:34:27 +0000 Subject: [PATCH 1/5] windows: Use instance label for hostname in hostname panel --- windows-observ-lib/panels.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/windows-observ-lib/panels.libsonnet b/windows-observ-lib/panels.libsonnet index 8170502fb..2803ed446 100644 --- a/windows-observ-lib/panels.libsonnet +++ b/windows-observ-lib/panels.libsonnet @@ -363,7 +363,7 @@ local utils = commonlib.utils; targets=[t.osInfo], description="System's hostname." ) - { options+: { reduceOptions+: { fields: '/^hostname$/' } } }, + { options+: { reduceOptions+: { fields: '/^instance$/' } } }, networkErrorsAndDroppedPerSec: commonlib.panels.network.timeSeries.errors.new( 'Network errors and dropped packets', From aa2cdc3a68843903288f9d96ee92674aff67bfa6 Mon Sep 17 00:00:00 2001 From: Vitaly Zhuravlev Date: Tue, 5 Nov 2024 14:52:49 +0000 Subject: [PATCH 2/5] Fix NTP delay units (thanks to linter) --- windows-observ-lib/panels.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/windows-observ-lib/panels.libsonnet b/windows-observ-lib/panels.libsonnet index 2803ed446..8df745311 100644 --- a/windows-observ-lib/panels.libsonnet +++ b/windows-observ-lib/panels.libsonnet @@ -224,7 +224,7 @@ local utils = commonlib.utils; Time offset: Absolute time offset between the system clock and the chosen time source, in seconds. ||| ) - + g.panel.timeSeries.standardOptions.withUnit('seconds') + + g.panel.timeSeries.standardOptions.withUnit('s') + g.panel.timeSeries.standardOptions.withNoValue('No data. Please check that "time" collector is enabled.'), cpuCount: commonlib.panels.cpu.stat.count.new(targets=[t.cpuCount]), cpuUsageTs: commonlib.panels.cpu.timeSeries.utilization.new(targets=[t.cpuUsage]), From aacf86d369efa9e926660fbe96a6abf7146e4465 Mon Sep 17 00:00:00 2001 From: Vitaly Zhuravlev Date: Tue, 5 Nov 2024 14:57:59 +0000 Subject: [PATCH 3/5] Add units to panels --- windows-observ-lib/panels.libsonnet | 31 +++++++++++++++++------------ 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/windows-observ-lib/panels.libsonnet b/windows-observ-lib/panels.libsonnet index 8df745311..6b436c612 100644 --- a/windows-observ-lib/panels.libsonnet +++ b/windows-observ-lib/panels.libsonnet @@ -187,7 +187,8 @@ local utils = commonlib.utils; A high number of context switches or interrupts can indicate that the system is overloaded or that there are problems with specific devices or processes. ||| - ), + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), systemExceptions: commonlib.panels.generic.timeSeries.base.new( 'System calls and exceptions', @@ -195,14 +196,16 @@ local utils = commonlib.utils; t.windowsSystemExceptions, t.windowsSystemCalls, ], - ), + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), systemThreads: commonlib.panels.generic.timeSeries.base.new( 'System threads', targets=[ t.windowsSystemThreads, ], - ), + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), timeNtpStatus: commonlib.panels.system.statusHistory.ntp.new( 'NTP status', @@ -242,17 +245,19 @@ local utils = commonlib.utils; CPU usage by different modes. ||| ), - cpuQueue: commonlib.panels.generic.timeSeries.base.new( - 'CPU average queue size', - targets=[t.cpuQueue], - description=||| - The CPU average queue size in Windows, often referred to as the "Processor Queue Length" or "CPU Queue Length," is a metric that measures the number of threads or tasks waiting to be processed by the central processing unit (CPU) at a given moment. - It is an essential performance indicator that reflects the workload and responsiveness of the CPU. - When the CPU queue length is high, it indicates that there are more tasks in line for processing than the CPU can handle immediately. + cpuQueue: + commonlib.panels.generic.timeSeries.base.new( + 'CPU average queue size', + targets=[t.cpuQueue], + description=||| + The CPU average queue size in Windows, often referred to as the "Processor Queue Length" or "CPU Queue Length," is a metric that measures the number of threads or tasks waiting to be processed by the central processing unit (CPU) at a given moment. + It is an essential performance indicator that reflects the workload and responsiveness of the CPU. + When the CPU queue length is high, it indicates that there are more tasks in line for processing than the CPU can handle immediately. - This can lead to system slowdowns, decreased responsiveness, and potential performance issues. High CPU queue lengths are often associated with CPU saturation, where the CPU is struggling to keep up with the demands placed on it. - ||| - ), + This can lead to system slowdowns, decreased responsiveness, and potential performance issues. High CPU queue lengths are often associated with CPU saturation, where the CPU is struggling to keep up with the demands placed on it. + ||| + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), memoryTotalBytes: commonlib.panels.memory.stat.total.new(targets=[t.memoryTotalBytes]), memoryPageTotalBytes: commonlib.panels.memory.stat.total.new( From f0fd592b957a890182fd7736cdf1120e9e97d71e Mon Sep 17 00:00:00 2001 From: Vitaly Zhuravlev Date: Tue, 5 Nov 2024 15:02:07 +0000 Subject: [PATCH 4/5] Add none units to cpu count to pass linter --- common-lib/common/panels/cpu/stat/count.libsonnet | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common-lib/common/panels/cpu/stat/count.libsonnet b/common-lib/common/panels/cpu/stat/count.libsonnet index 915060b49..4b9a9ed77 100644 --- a/common-lib/common/panels/cpu/stat/count.libsonnet +++ b/common-lib/common/panels/cpu/stat/count.libsonnet @@ -16,6 +16,7 @@ base { stylize(allLayers=true): (if allLayers then super.stylize() else {}) - + generic.info.stylize(allLayers=false), + + generic.info.stylize(allLayers=false) + + g.panel.stat.standardOptions.withUnit('none'), } From 55ffc29382cf1c7adfdae8b322e54ec311ea93e8 Mon Sep 17 00:00:00 2001 From: Vitaly Zhuravlev Date: Wed, 6 Nov 2024 12:19:46 +0000 Subject: [PATCH 5/5] Update win mixin --- .../mixin.libsonnet | 9 +-- windows-mixin/mixin.libsonnet | 6 +- windows-observ-lib/config.libsonnet | 42 ++++++++++++++ windows-observ-lib/main.libsonnet | 58 +++---------------- windows-observ-lib/mixin.libsonnet | 10 ++++ 5 files changed, 63 insertions(+), 62 deletions(-) create mode 100644 windows-observ-lib/config.libsonnet create mode 100644 windows-observ-lib/mixin.libsonnet diff --git a/windows-active-directory-mixin/mixin.libsonnet b/windows-active-directory-mixin/mixin.libsonnet index 23a9c558b..938a47671 100644 --- a/windows-active-directory-mixin/mixin.libsonnet +++ b/windows-active-directory-mixin/mixin.libsonnet @@ -3,16 +3,13 @@ local alerts = import './alerts/alerts.libsonnet'; local g = import './g.libsonnet'; local var = g.dashboard.variable; local activedirectorymixin = - windowsobservlib.new( - filteringSelector='job=~"integrations/windows_exporter"', - uid='active-directory', - groupLabels=['job'], - instanceLabels=['instance'], - ) + windowsobservlib.new() { config+: { enableADDashboard: true, + groupLabels: ['job'], + uid: 'active-directory', }, } diff --git a/windows-mixin/mixin.libsonnet b/windows-mixin/mixin.libsonnet index 2bae55c62..0aea86ab7 100644 --- a/windows-mixin/mixin.libsonnet +++ b/windows-mixin/mixin.libsonnet @@ -4,11 +4,7 @@ local winlib = import 'windows-observ-lib/main.libsonnet'; local config = (import 'config.libsonnet')._config; { local windows = - winlib.new( - dashboardNamePrefix=config.dashboardNamePrefix, - uid=config.uid, - filteringSelector=config.filteringSelector, - ) + winlib.new() + { config+: config, diff --git a/windows-observ-lib/config.libsonnet b/windows-observ-lib/config.libsonnet new file mode 100644 index 000000000..8ff0a6e0b --- /dev/null +++ b/windows-observ-lib/config.libsonnet @@ -0,0 +1,42 @@ +{ + // any modular library should include as inputs: + // 'dashboardNamePrefix' - Use as prefix for all Dashboards and (optional) rule groups + // 'filteringSelector' - Static selector to apply to ALL dashboard variables of type query, panel queries, alerts and recording rules. + // 'groupLabels' - one or more labels that can be used to identify 'group' of instances. In simple cases, can be 'job' or 'cluster'. + // 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'. + // 'uid' - UID to prefix all dashboards original uids + groupLabels: ['job'], + instanceLabels: ['instance'], + filteringSelector: 'job=~".*windows.*"', + dashboardTags: ['windows'], + uid: 'windows', + dashboardNamePrefix: '', + + // optional + ignoreVolumes: 'HarddiskVolume.*', + alertsCPUThresholdWarning: '90', + alertMemoryUsageThresholdCritical: '90', + alertDiskUsageThresholdCritical: '90', + dashboardPeriod: 'now-1h', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + + // optional Windows AD + alertsHighPendingReplicationOperations: 50, // count + alertsHighReplicationSyncRequestFailures: 0, // count + alertsHighPasswordChanges: 25, // count + alertsMetricsDownJobName: 'integrations/windows_exporter', + enableADDashboard: false, + + // logs lib related + enableLokiLogs: true, + extraLogLabels: ['channel', 'source', 'keywords', 'level'], + logsVolumeGroupBy: 'level', + showLogsVolume: true, + logsExtraFilters: + ||| + | label_format timestamp="{{__timestamp__}}" + | drop channel_extracted,source_extracted,computer_extracted,level_extracted,keywords_extracted + | line_format `{{ if eq "[[instance]]" ".*" }}{{ alignLeft 25 .instance}}|{{end}}{{alignLeft 12 .channel }}| {{ alignLeft 25 .source}}| {{ .message }}` + |||, +} diff --git a/windows-observ-lib/main.libsonnet b/windows-observ-lib/main.libsonnet index 1ee5a7a7a..df7145a01 100644 --- a/windows-observ-lib/main.libsonnet +++ b/windows-observ-lib/main.libsonnet @@ -1,4 +1,5 @@ local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; local dashboards = import './dashboards.libsonnet'; local datasources = import './datasources.libsonnet'; local g = import './g.libsonnet'; @@ -7,58 +8,10 @@ local targets = import './targets.libsonnet'; local commonlib = import 'common-lib/common/main.libsonnet'; { - new( - filteringSelector, - groupLabels=['job'], - instanceLabels=['instance'], - dashboardNamePrefix='', - dashboardTags=[uid], - uid, - ): { + new(): { local this = self, - config: { - // any modular library should include as inputs: - // 'dashboardNamePrefix' - Use as prefix for all Dashboards and (optional) rule groups - // 'filteringSelector' - Static selector to apply to ALL dashboard variables of type query, panel queries, alerts and recording rules. - // 'groupLabels' - one or more labels that can be used to identify 'group' of instances. In simple cases, can be 'job' or 'cluster'. - // 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'. - // 'uid' - UID to prefix all dashboards original uids - groupLabels: groupLabels, - instanceLabels: instanceLabels, - filteringSelector: filteringSelector, - dashboardTags: dashboardTags, - uid: uid, - dashboardNamePrefix: dashboardNamePrefix, - - // optional - ignoreVolumes: 'HarddiskVolume.*', - alertsCPUThresholdWarning: '90', - alertMemoryUsageThresholdCritical: '90', - alertDiskUsageThresholdCritical: '90', - dashboardPeriod: 'now-1h', - dashboardTimezone: 'default', - dashboardRefresh: '1m', - - // optional Windows AD - alertsHighPendingReplicationOperations: 50, // count - alertsHighReplicationSyncRequestFailures: 0, // count - alertsHighPasswordChanges: 25, // count - alertsMetricsDownJobName: 'integrations/windows_exporter', - enableADDashboard: false, - - // logs lib related - enableLokiLogs: true, - extraLogLabels: ['channel', 'source', 'keywords', 'level'], - logsVolumeGroupBy: 'level', - showLogsVolume: true, - logsExtraFilters: - ||| - | label_format timestamp="{{__timestamp__}}" - | drop channel_extracted,source_extracted,computer_extracted,level_extracted,keywords_extracted - | line_format `{{ if eq "[[instance]]" ".*" }}{{ alignLeft 25 .instance}}|{{end}}{{alignLeft 12 .channel }}| {{ alignLeft 25 .source}}| {{ .message }}` - |||, - }, + config: config, grafana: { variables: commonlib.variables.new( filteringSelector=this.config.filteringSelector, @@ -74,7 +27,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; reboot: commonlib.annotations.reboot.new( title='Reboot', target=this.grafana.targets.reboot, - instanceLabels=std.join(',', instanceLabels), + instanceLabels=std.join(',', this.config.instanceLabels), ) + commonlib.annotations.base.withTagKeys(std.join(',', this.config.groupLabels + this.config.instanceLabels)), } @@ -124,5 +77,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; }, }, + withConfigMixin(config): { + config+: config, + }, } diff --git a/windows-observ-lib/mixin.libsonnet b/windows-observ-lib/mixin.libsonnet new file mode 100644 index 000000000..4e356b0a6 --- /dev/null +++ b/windows-observ-lib/mixin.libsonnet @@ -0,0 +1,10 @@ +local windowslib = import './main.libsonnet'; +{ + _config:: {}, + _windowsib:: + windowslib.new() + + windowslib.withConfigMixin(self._config), + grafanaDashboards+:: self._windowsib.grafana.dashboards, + prometheusAlerts+:: self._windowsib.prometheus.alerts, + prometheusRules+:: self._windowsib.prometheus.recordingRules, +}