Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NAS-134387 / 25.10 / Add root datasets information to realtime reporting #15895

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions src/freenas/usr/lib/netdata/conf.d/python.d.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ truenas_arcstats: yes
truenas_cpu_usage: yes
truenas_disk_stats: yes
truenas_meminfo: yes
truenas_pool: yes
35 changes: 35 additions & 0 deletions src/freenas/usr/lib/netdata/python.d/truenas_pool.chart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from bases.FrameworkServices.SimpleService import SimpleService

from middlewared.utils.metrics.pool_stats import get_pool_dataset_stats


class Service(SimpleService):
def __init__(self, configuration=None, name=None):
SimpleService.__init__(self, configuration=configuration, name=name)
self.update_every = 300

def check(self):
self.add_pool_stats_to_charts()
return True

def get_data(self):
data = {}
for pool_guid, info in get_pool_dataset_stats().items():
for i, value in info.items():
data[f'{pool_guid}.{i}'] = value
data[f'{pool_guid}.total'] = info['used'] + info['available']
return data

def add_pool_stats_to_charts(self):
data = get_pool_dataset_stats()
self.charts.add_chart([
'usage', 'usage', 'usage', 'bytes',
'pool.usage',
'pool.usage',
'line',
])

for pool_guid in data.keys():
self.charts['usage'].add_dimension([f'{pool_guid}.available', 'available', 'absolute'])
self.charts['usage'].add_dimension([f'{pool_guid}.used', 'used', 'absolute'])
self.charts['usage'].add_dimension([f'{pool_guid}.total', 'total', 'absolute'])
9 changes: 6 additions & 3 deletions src/middlewared/middlewared/plugins/reporting/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from middlewared.utils.disks import get_disk_names, get_disks_with_identifiers
from middlewared.validators import Range

from .realtime_reporting import get_arc_stats, get_cpu_stats, get_disk_stats, get_interface_stats, get_memory_info

from .realtime_reporting import (
get_arc_stats, get_cpu_stats, get_disk_stats, get_interface_stats, get_memory_info, get_pool_stats,
)

class ReportingRealtimeService(Service):

Expand Down Expand Up @@ -50,6 +51,7 @@ def stats(self, disk_mapping=None):
)
]
),
'pools': get_pool_stats(netdata_metrics),
'failed_to_connect': False,
}

Expand Down Expand Up @@ -108,7 +110,8 @@ class RealtimeEventSource(EventSource):
Int('l2arc_miss_percentage'),
Int('bytes_read_per_second_from_the_l2arc'),
Int('bytes_written_per_second_to_the_l2arc'),
)
),
Dict('pools', additional_attrs=True),
)

def run_sync(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
from .ifstat import get_interface_stats # noqa
from .iostat import get_disk_stats # noqa
from .memory import get_memory_info # noqa
from .pool import get_pool_stats # noqa
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from collections import defaultdict

from middlewared.utils.zfs import query_imported_fast_impl

from .utils import safely_retrieve_dimension


def get_pool_stats(netdata_metrics: dict) -> dict:
data = defaultdict(lambda: {'available': None, 'used': None, 'total': None})
pool_data = query_imported_fast_impl()

for dimension_name, value in safely_retrieve_dimension(netdata_metrics, 'truenas_pool.usage').items():
value = value or {}
pool_guid, stat_key = dimension_name.split('.')
data[pool_data[pool_guid]['name']][stat_key] = value
return data
3 changes: 3 additions & 0 deletions src/middlewared/middlewared/plugins/reporting/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def calculated_metrics_count(self):
len(self.middleware.call_sync('device.get_disks', False, True)),
cpu_info()['core_count'],
self.middleware.call_sync('interface.query', [], {'count': True}),
self.middleware.call_sync('pool.dataset.query', [], {'count': True, 'extra': {
'properties': [], 'retrieve_children': False,
}}),
len(self.middleware.call_sync('virt.instance.query', [['type', '=', 'VM']])),
len(glob.glob('/sys/fs/cgroup/**/*.service')),
)
Expand Down
5 changes: 4 additions & 1 deletion src/middlewared/middlewared/plugins/reporting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_netdata_state_path() -> str:


def get_metrics_approximation(
disk_count: int, core_count: int, interface_count: int, vms_count: int,
disk_count: int, core_count: int, interface_count: int, pool_count: int, vms_count: int,
systemd_service_count: int, containers_count: typing.Optional[int] = 10,
) -> dict:
data = {
Expand Down Expand Up @@ -120,6 +120,9 @@ def get_metrics_approximation(
# cpu usage, it is core count + 1 with +1 saving aggregated stats
'cpu.usage': core_count + 1,

# pool usage
'truenas_pool.usage': pool_count * 2,

# cputemp
'cputemp.temperatures': core_count + 1,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,58 +3,59 @@
from middlewared.plugins.reporting.utils import get_metrics_approximation, calculate_disk_space_for_netdata


@pytest.mark.parametrize('disk_count,core_count,interface_count,services_count,vms_count,expected_output', [
(4, 2, 1, 10, 2, {1: 699, 60: 4}),
(1600, 32, 4, 10, 1, {1: 8754, 60: 1600}),
(10, 16, 2, 12, 3, {1: 838, 60: 10}),
@pytest.mark.parametrize('disk_count,core_count,interface_count,pool_count,services_count,vms_count,expected_output', [
(4, 2, 1, 2, 10, 2, {1: 703, 60: 4}),
(1600, 32, 4, 4, 10, 1, {1: 8762, 60: 1600}),
(10, 16, 2, 2, 12, 3, {1: 842, 60: 10}),
])
def test_netdata_metrics_count_approximation(
disk_count, core_count, interface_count, services_count, vms_count, expected_output
disk_count, core_count, interface_count, pool_count, services_count, vms_count, expected_output
):
assert get_metrics_approximation(
disk_count, core_count, interface_count, vms_count, services_count
disk_count, core_count, interface_count, pool_count, vms_count, services_count
) == expected_output


@pytest.mark.parametrize(
'disk_count,core_count,interface_count,services_count,vms_count,days,'
'disk_count,core_count,interface_count,pool_count,services_count,vms_count,days,'
'bytes_per_point,tier_interval,expected_output', [
(4, 2, 1, 10, 2, 7, 1, 1, 403),
(4, 2, 1, 10, 1, 7, 4, 60, 25),
(1600, 32, 4, 2, 4, 4, 1, 1, 2925),
(1600, 32, 4, 1, 4, 4, 4, 900, 12),
(10, 16, 2, 12, 1, 3, 1, 1, 183),
(10, 16, 2, 10, 3, 3, 4, 60, 13),
(1600, 32, 4, 12, 3, 18, 1, 1, 13183),
(1600, 32, 4, 12, 1, 18, 4, 900, 57),
(4, 2, 1, 2, 10, 2, 7, 1, 1, 405),
(4, 2, 1, 2, 10, 1, 7, 4, 60, 25),
(1600, 32, 4, 12, 2, 4, 4, 1, 1, 2933),
(1600, 32, 4, 10, 1, 4, 4, 4, 900, 13),
(10, 16, 2, 2, 12, 1, 3, 1, 1, 184),
(10, 16, 2, 2, 10, 3, 3, 4, 60, 13),
(1600, 32, 4, 4, 12, 3, 18, 1, 1, 13195),
(1600, 32, 4, 4, 12, 1, 18, 4, 900, 58),
],
)
def test_netdata_disk_space_approximation(
disk_count, core_count, interface_count, services_count,
vms_count, days, bytes_per_point, tier_interval, expected_output
disk_count, core_count, interface_count, pool_count, services_count,
vms_count, days, bytes_per_point, tier_interval, expected_output
):
assert calculate_disk_space_for_netdata(get_metrics_approximation(
disk_count, core_count, interface_count, vms_count, services_count
disk_count, core_count, interface_count, pool_count, vms_count, services_count
), days, bytes_per_point, tier_interval) == expected_output


@pytest.mark.parametrize(
'disk_count,core_count,interface_count,services_count,vms_count,days,bytes_per_point,tier_interval', [
(4, 2, 1, 10, 2, 7, 1, 1),
(4, 2, 1, 12, 2, 7, 4, 60),
(1600, 32, 4, 10, 3, 4, 1, 1),
(1600, 32, 4, 12, 3, 4, 4, 900),
(10, 16, 2, 10, 4, 3, 1, 1),
(10, 16, 2, 12, 4, 3, 4, 60),
(1600, 32, 4, 10, 5, 18, 1, 1),
(1600, 32, 4, 12, 5, 18, 4, 900),
'disk_count,core_count,interface_count,pool_count,services_count,vms_count,days,bytes_per_point,tier_interval',
[
(4, 2, 1, 2, 10, 2, 7, 1, 1),
(4, 2, 1, 2, 12, 2, 7, 4, 60),
(1600, 32, 4, 4, 10, 3, 4, 1, 1),
(1600, 32, 4, 4, 12, 3, 4, 4, 900),
(10, 16, 2, 2, 10, 4, 3, 1, 1),
(10, 16, 2, 2, 12, 4, 3, 4, 60),
(1600, 32, 4, 4, 10, 5, 18, 1, 1),
(1600, 32, 4, 4, 12, 5, 18, 4, 900),
],
)
def test_netdata_days_approximation(
disk_count, core_count, interface_count, services_count, vms_count, days, bytes_per_point, tier_interval
):
disk_count, core_count, interface_count, pool_count, services_count, vms_count, days, bytes_per_point,
tier_interval):
metric_intervals = get_metrics_approximation(
disk_count, core_count, interface_count, vms_count, services_count
disk_count, core_count, interface_count, pool_count, vms_count, services_count
)
disk_size = calculate_disk_space_for_netdata(metric_intervals, days, bytes_per_point, tier_interval)
total_metrics = metric_intervals[1] + (metric_intervals[60] / 60)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from middlewared.plugins.reporting.netdata.utils import NETDATA_UPDATE_EVERY
from middlewared.plugins.reporting.realtime_reporting import (
get_arc_stats, get_cpu_stats, get_disk_stats, get_interface_stats, get_memory_info,
get_arc_stats, get_cpu_stats, get_disk_stats, get_interface_stats, get_memory_info, get_pool_stats
)
from middlewared.plugins.reporting.realtime_reporting.utils import normalize_value, safely_retrieve_dimension

Expand Down Expand Up @@ -653,10 +653,51 @@
}
}
},
'truenas_pool.usage': {
'name': 'truenas_pool.usage',
'family': 'pool.usage',
'context': 'pool.usage',
'units': 'bytes',
'last_updated': 1741023952,
'dimensions': {
'boot-pool.available': {
'name': 'available',
'value': 16053874688
},
'boot-pool.used': {
'name': 'used',
'value': 3730903040
},
'tank.available': {
'name': 'available',
'value': 7181352960
},
'tank.used': {
'name': 'used',
'value': 13102555136
}
}
},

}


def test_get_pool_stats():
pool_stats = get_pool_stats(NETDATA_ALL_METRICS)
assert pool_stats['tank']['available'] == safely_retrieve_dimension(
NETDATA_ALL_METRICS, 'truenas_pool.usage', 'tank.available'
)
assert pool_stats['tank']['used'] == safely_retrieve_dimension(
NETDATA_ALL_METRICS, 'truenas_pool.usage', 'tank.used'
)
assert pool_stats['boot-pool']['available'] == safely_retrieve_dimension(
NETDATA_ALL_METRICS, 'truenas_pool.usage', 'boot-pool.available'
)
assert pool_stats['boot-pool']['used'] == safely_retrieve_dimension(
NETDATA_ALL_METRICS, 'truenas_pool.usage', 'boot-pool.used'
)


def test_arc_stats():
arc_stats = get_arc_stats(NETDATA_ALL_METRICS)

Expand Down
19 changes: 19 additions & 0 deletions src/middlewared/middlewared/utils/metrics/pool_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json
import subprocess

from middlewared.utils.zfs import guid_fast_impl


def get_pool_dataset_stats() -> dict[str, dict]:
pool_stats = {}

zfs_data = json.loads(subprocess.run(
['zfs', 'list', '-o', 'used,avail', '-j', '--json-int', '-d', '0'],
capture_output=True, text=True, check=True,
).stdout.strip())
for dataset_info in zfs_data['datasets'].values():
pool_stats[guid_fast_impl(dataset_info['name'])] = {
key: value['value'] for key, value in dataset_info['properties'].items()
}

return pool_stats
Loading