Skip to content

Commit

Permalink
Refactor CPU reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
Qubad786 committed Jan 21, 2025
1 parent 4057b3e commit 23059e4
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 113 deletions.
8 changes: 6 additions & 2 deletions src/freenas/usr/lib/netdata/python.d/cputemp.chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,14 @@ def get_data(self):
cpu_temps = {}

data = {}
total_temp = 0
for core, temp in cpu_temps.items():
data[str(core)] = temp
data[f'cpu{core}'] = temp
total_temp += temp

return data or {str(i): 0 for i in range(cpu_info()['core_count'])}
if total_temp:
data['cpu'] = total_temp / len(data.keys())
return data or ({f'cpu{i}': 0 for i in range(cpu_info()['core_count'])} | {'cpu': 0})

def check(self):
try:
Expand Down
21 changes: 12 additions & 9 deletions src/freenas/usr/lib/netdata/python.d/truenas_cpu_usage.chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,24 @@
class Service(SimpleService):
def __init__(self, configuration=None, name=None):
SimpleService.__init__(self, configuration=configuration, name=name)
self.old_stats = {}

def check(self):
self.add_cpu_usage_to_charts()
return True

def get_data(self):
return get_cpu_usage()
data, self.old_stats = get_cpu_usage(self.old_stats)
return data

def add_cpu_usage_to_charts(self):
for cpu_name in get_cpu_usage().keys():
self.charts.add_chart([
cpu_name, cpu_name, cpu_name, 'CPU USAGE%',
'cpu.usage',
'Cpu usage',
'line',
])
data, self.old_stats = get_cpu_usage()
self.charts.add_chart([
'cpu', 'cpu', 'cpu', 'CPU USAGE%',
'cpu.usage',
'Cpu usage',
'line',
])

self.charts[cpu_name].add_dimension([cpu_name, 'usage', 'absolute'])
for cpu_name in filter(lambda s: s.startswith('cpu'), data.keys()):
self.charts['cpu'].add_dimension([f'{cpu_name}', f'{cpu_name}', 'absolute'])
12 changes: 1 addition & 11 deletions src/middlewared/middlewared/etc_files/netdata/netdata.conf.mako
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@
/proc/meminfo = no
/proc/net/dev = yes
/proc/pagetypeinfo = no
# /proc/stat = yes - we keep this uncommented as by default that enables it, for some reason
# if we explicitly set it to yes it still does not has the desired affect and we are not able
# to retrieve system.cpu stats
/proc/stat = no
/proc/uptime = yes
/proc/loadavg = yes
/proc/sys/kernel/random/entropy_avail = no
Expand Down Expand Up @@ -120,11 +118,3 @@

[plugin:cgroups]
enable by default cgroups names matching = !*udev* *

[plugin:proc:/proc/stat]
per cpu core utilization = no
context switches = no
cpu interrupts = no
processes started = no
processes running = no
cpu idle states = no
3 changes: 0 additions & 3 deletions src/middlewared/middlewared/plugins/reporting/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,6 @@ def run_sync(self):
'failed_to_connect': False,
}

# CPU temperature
data['cpu']['temperature_celsius'] = self.middleware.call_sync('reporting.cpu_temperatures') or None

self.send_event('ADDED', fields=data)
time.sleep(interval)

Expand Down
11 changes: 6 additions & 5 deletions src/middlewared/middlewared/plugins/reporting/netdata/graphs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import typing

from middlewared.utils.cpu import cpu_info
from middlewared.utils.disk_temperatures import get_disks_for_temperature_reading

from .graph_base import GraphBase
Expand All @@ -13,12 +14,12 @@ class CPUPlugin(GraphBase):
vertical_label = '%CPU'

def get_chart_name(self, identifier: typing.Optional[str] = None) -> str:
return 'system.cpu'
return 'truenas_cpu_usage.cpu'

def query_parameters(self) -> dict:
return super().query_parameters() | {
'dimensions': 'system|user|idle|softirq|nice|iowait',
}
async def get_identifiers(self) -> typing.Optional[list]:
cpu_usage = [f'cpu{i}' for i in cpu_info()['core']]
cpu_usage.append('cpu')
return cpu_usage


class CPUTempPlugin(GraphBase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,24 @@
from .utils import safely_retrieve_dimension


def calculate_usage(cpu_stats: dict) -> float:
cp_total = sum(cpu_stats.values())
return ((cp_total - cpu_stats['idle'] - cpu_stats['iowait']) / cp_total) * 100 if cp_total else 0


def get_cpu_stats(netdata_metrics: dict) -> dict:
metric_name = 'system.cpu'
fields = ['user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq', 'steal', 'guest', 'guest_nice']
data = {field: safely_retrieve_dimension(netdata_metrics, metric_name, field, 0) for field in fields}
data['aggregated_usage'] = safely_retrieve_dimension(netdata_metrics, 'truenas_cpu_usage.cpu', 'cpu', 0)
data = {
'cpu': {
'usage': safely_retrieve_dimension(
netdata_metrics, f'truenas_cpu_usage.cpu', 'cpu', 0
),
'temp': safely_retrieve_dimension(
netdata_metrics, 'cputemp.temperatures', 'cpu',
) or None
}}
for core_index in range(cpu_info()['core_count']):
data[f'core{core_index}_usage'] = safely_retrieve_dimension(
netdata_metrics, f'truenas_cpu_usage.cpu{core_index}', f'cpu{core_index}', 0
)
data[f'cpu{core_index}'] = {
'usage': safely_retrieve_dimension(
netdata_metrics, f'truenas_cpu_usage.cpu', f'cpu{core_index}', 0
),
'temp': safely_retrieve_dimension(
netdata_metrics, 'cputemp.temperatures', f'cpu{core_index}',
) or None
}

return data
3 changes: 1 addition & 2 deletions src/middlewared/middlewared/plugins/reporting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def get_metrics_approximation(
) -> dict:
data = {
1: {
'system.cpu': 10,
'system.clock_sync_state': 1,
'system.clock_status': 2,
'system.clock_sync_offset': 1,
Expand Down Expand Up @@ -122,7 +121,7 @@ def get_metrics_approximation(
'cpu.usage': core_count + 1,

# cputemp
'cputemp.temperatures': core_count,
'cputemp.temperatures': core_count + 1,

# ups
'nut_ups.charge': 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@


@pytest.mark.parametrize('disk_count,core_count,interface_count,services_count,vms_count,expected_output', [
(4, 2, 1, 10, 2, {1: 708, 60: 4}),
(1600, 32, 4, 10, 1, {1: 8763, 60: 1600}),
(10, 16, 2, 12, 3, {1: 847, 60: 10}),
(4, 2, 1, 10, 2, {1: 699, 60: 4}),
(1600, 32, 4, 10, 1, {1: 8754, 60: 1600}),
(10, 16, 2, 12, 3, {1: 838, 60: 10}),
])
def test_netdata_metrics_count_approximation(
disk_count, core_count, interface_count, services_count, vms_count, expected_output
Expand All @@ -19,14 +19,14 @@ def test_netdata_metrics_count_approximation(
@pytest.mark.parametrize(
'disk_count,core_count,interface_count,services_count,vms_count,days,'
'bytes_per_point,tier_interval,expected_output', [
(4, 2, 1, 10, 2, 7, 1, 1, 408),
(4, 2, 1, 10, 2, 7, 1, 1, 403),
(4, 2, 1, 10, 1, 7, 4, 60, 25),
(1600, 32, 4, 2, 4, 4, 1, 1, 2928),
(1600, 32, 4, 1, 4, 4, 4, 900, 13),
(10, 16, 2, 12, 1, 3, 1, 1, 185),
(1600, 32, 4, 2, 4, 4, 1, 1, 2925),
(1600, 32, 4, 1, 4, 4, 4, 900, 12),
(10, 16, 2, 12, 1, 3, 1, 1, 183),
(10, 16, 2, 10, 3, 3, 4, 60, 13),
(1600, 32, 4, 12, 3, 18, 1, 1, 13196),
(1600, 32, 4, 12, 1, 18, 4, 900, 58),
(1600, 32, 4, 12, 3, 18, 1, 1, 13183),
(1600, 32, 4, 12, 1, 18, 4, 900, 57),
],
)
def test_netdata_disk_space_approximation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,52 +8,53 @@


NETDATA_ALL_METRICS = {
'system.cpu': {
'name': 'system.cpu',
'family': 'cpu',
'context': 'system.cpu',
'units': 'percentage',
'last_updated': 1691150349,
'dimensions': {
'guest_nice': {
'name': 'guest_nice',
'value': 0.5375124
},
'guest': {
'name': 'guest',
'value': 0.5375124
},
'steal': {
'name': 'steal',
'value': 0.5275124
'cputemp.temperatures': {
'name': 'cputemp.temperatures',
'family': 'temperature',
'context': 'sensors.temperature',
'units': 'Celsius',
'last_updated': 1737452189,
'dimensions': {
'cpu0': {
'name': 'cpu0',
'value': 22
},
'softirq': {
'name': 'softirq',
'value': 0.5175124
'cpu1': {
'name': 'cpu1',
'value': 21
},
'irq': {
'name': 'irq',
'value': 0.4975124
},
'user': {
'name': 'user',
'value': 0.4975124
'cpu2': {
'name': 'cpu2',
'value': 10
},
'system': {
'name': 'system',
'value': 0.4975124
'cpu': {
'name': 'cpu',
'value': 40
}
}
},
'truenas_cpu_usage.cpu': {
'name': 'truenas_cpu_usage.cpu',
'family': 'cpu.usage',
'context': 'Cpu usage',
'units': 'CPU USAGE%',
'last_updated': 1737452189,
'dimensions': {
'cpu': {
'name': 'cpu',
'value': 4
},
'nice': {
'name': 'nice',
'value': 49.75124
'cpu0': {
'name': 'cpu0',
'value': 2
},
'iowait': {
'name': 'iowait',
'value': 4.75124
'cpu1': {
'name': 'cpu1',
'value': 3
},
'idle': {
'name': 'idle',
'value': 99.0049751
'cpu2': {
'name': 'cpu2',
'value': 4
}
}
},
Expand Down Expand Up @@ -729,13 +730,17 @@ def test_arc_stats():

def test_cpu_stats():
cpu_stat = get_cpu_stats(NETDATA_ALL_METRICS)
total_sum = 0
for metric, value in cpu_stat.items():
if metric == 'usage':
assert value == ((total_sum - cpu_stat['idle'] - cpu_stat['iowait']) / total_sum) * 100
else:
assert value == safely_retrieve_dimension(NETDATA_ALL_METRICS, 'system.cpu', metric, 0)
total_sum += value
assert value == {
'usage': safely_retrieve_dimension(
NETDATA_ALL_METRICS,
f'truenas_cpu_usage.cpu',
f'{metric}', 0
),
'temp': safely_retrieve_dimension(
NETDATA_ALL_METRICS, 'cputemp.temperatures', metric,
) or None
}


def test_disk_stats():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@

def test_memory_stats():
with patch('builtins.open', mock_open(read_data=STAT)):
assert get_cpu_usage() == {
'cpu': calculate_cpu_usage([269140, 3068, 35283, 1952826, 2648, 0, 1978, 1126, 12492, 0]),
'cpu0': calculate_cpu_usage([89904, 989, 11999, 512042, 887, 0, 997, 361, 4042, 0]),
'cpu1': calculate_cpu_usage([93084, 950, 12594, 716357, 845, 0, 604, 369, 4842, 0]),
'cpu2': calculate_cpu_usage([86151, 1129, 10690, 724426, 914, 0, 376, 396, 3608, 0]),

assert get_cpu_usage({cpu: [0] * 9 for cpu in ('cpu_usage', 'cpu0_usage', 'cpu1_usage', 'cpu2_usage')})[0] == {
'cpu': calculate_cpu_usage([269140, 3068, 35283, 1952826, 2648, 0, 1978, 1126, 12492, 0], [0] * 9),
'cpu0': calculate_cpu_usage([89904, 989, 11999, 512042, 887, 0, 997, 361, 4042, 0], [0] * 9),
'cpu1': calculate_cpu_usage([93084, 950, 12594, 716357, 845, 0, 604, 369, 4842, 0], [0] * 9),
'cpu2': calculate_cpu_usage([86151, 1129, 10690, 724426, 914, 0, 376, 396, 3608, 0], [0] * 9),
}
27 changes: 18 additions & 9 deletions src/middlewared/middlewared/utils/metrics/cpu_usage.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,38 @@
def calculate_cpu_usage(cpu_times: list[int]) -> float:
def calculate_cpu_usage(cur_cpu_times: list[int], old_cpu_times: list[int]) -> float:
"""
Calculate CPU usage as a percentage.
Excludes 'idle' and 'iowait' times from the calculation.
Args:
cpu_times (list[int]): List of CPU time values.
cur_cpu_times (list[int]): List of CPU time values.
old_cpu_times (list[int]): List of CPU time values.
Returns:
float: CPU usage percentage, rounded to two decimal places.
"""
total_time = sum(cpu_times)
delta_time = list(map(lambda args: args[0] - args[1], zip(cur_cpu_times, old_cpu_times)))
total_time = sum(delta_time)
if total_time:
idle_time = cpu_times[3] # Idle
iowait_time = cpu_times[4] # I/O Wait
idle_time = delta_time[3] # Idle
iowait_time = delta_time[4] # I/O Wait
active_time = total_time - idle_time - iowait_time
return round((active_time / total_time) * 100, 2)
return 0.0


def get_cpu_usage() -> dict[str, float]:
def get_cpu_usage(old_stats: dict | None = None) -> (dict[str, float], dict[str, list]):
"""
Retrieve CPU usage statistics from /proc/stat.
Returns:
dict[str, float]: Dictionary containing CPU usage percentages for
the aggregate ('cpu') and each individual core ('cpu0', 'cpu1', ...).
"""
# Calculation is inspired by how htop does it
# https://github.com/htop-dev/htop/blob/3a9f468c626b9261dc3a5234fc362303aeb5103d/linux/Platform.c#L320
old_stats = old_stats or {}
cpu_usage_data = {}
cached_values = {}
with open('/proc/stat') as f:
# Process only CPU-related lines
for line in filter(lambda x: x.startswith('cpu'), f):
Expand All @@ -35,7 +41,10 @@ def get_cpu_usage() -> dict[str, float]:
# all cpu cores and the later representing the
# cpu core specific values
core, values = line.split(' ', 1)
cpu_stats = list(map(int, values.strip().split()))
cpu_usage_data[core] = calculate_cpu_usage(
list(map(int, values.strip().split()))
)
return cpu_usage_data
cpu_stats, old_stats.get(core, [0] * len(values))
) if old_stats else 0
cached_values[core] = cpu_stats

return cpu_usage_data, cached_values

0 comments on commit 23059e4

Please sign in to comment.