Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NAS-129905 / 24.10 / Mark GPU as critical if it has devices which are in an iommu group which has a critical device #13976

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 7 additions & 31 deletions src/middlewared/middlewared/plugins/vm/pci.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import collections
import os
import pathlib
import re

from pyudev import Context

from middlewared.schema import accepts, Bool, Dict, Int, List, Ref, returns, Str
from middlewared.service import private, Service, ValidationErrors
from middlewared.utils.gpu import get_gpus, SENSITIVE_PCI_DEVICE_TYPES
from middlewared.utils.gpu import get_gpus
from middlewared.utils.iommu import get_iommu_groups_info
from middlewared.utils.pci import get_pci_device_class, SENSITIVE_PCI_DEVICE_TYPES

from .utils import convert_pci_id_to_vm_pci_slot, get_pci_device_class
from .utils import convert_pci_id_to_vm_pci_slot


RE_DEVICE_NAME = re.compile(r'(\w+):(\w+):(\w+).(\w+)')
RE_DEVICE_PATH = re.compile(r'pci_(\w+)_(\w+)_(\w+)_(\w+)')


Expand All @@ -27,30 +27,6 @@ def iommu_enabled(self):
"""Returns "true" if iommu is enabled, "false" otherwise"""
return os.path.exists('/sys/kernel/iommu_groups')

@private
def get_iommu_groups_info(self):
addresses = collections.defaultdict(list)
final = dict()
try:
for i in pathlib.Path('/sys/kernel/iommu_groups').glob('*/devices/*'):
if not i.is_dir() or not i.parent.parent.name.isdigit() or not RE_DEVICE_NAME.fullmatch(i.name):
continue

iommu_group = int(i.parent.parent.name)
dbs, func = i.name.split('.')
dom, bus, slot = dbs.split(':')
addresses[iommu_group].append({
'domain': f'0x{dom}',
'bus': f'0x{bus}',
'slot': f'0x{slot}',
'function': f'0x{func}',
})
final[i.name] = {'number': iommu_group, 'addresses': addresses[iommu_group]}
except FileNotFoundError:
pass

return final

@private
def get_pci_device_default_data(self):
return {
Expand Down Expand Up @@ -120,7 +96,7 @@ def get_pci_device_details(self, obj, iommu_info):
@private
def get_all_pci_devices_details(self):
result = dict()
iommu_info = self.get_iommu_groups_info()
iommu_info = get_iommu_groups_info()
for i in Context().list_devices(subsystem='pci'):
key = f"pci_{i.sys_name.replace(':', '_').replace('.', '_')}"
result[key] = self.get_pci_device_details(i, iommu_info)
Expand All @@ -129,7 +105,7 @@ def get_all_pci_devices_details(self):
@private
def get_single_pci_device_details(self, pcidev):
result = dict()
iommu_info = self.get_iommu_groups_info()
iommu_info = get_iommu_groups_info()
for i in filter(lambda x: x.sys_name == pcidev, Context().list_devices(subsystem='pci')):
key = f"pci_{i.sys_name.replace(':', '_').replace('.', '_')}"
result[key] = self.get_pci_device_details(i, iommu_info)
Expand Down Expand Up @@ -214,7 +190,7 @@ def get_pci_ids_for_gpu_isolation(self, gpu_pci_id):

verrors.check()

iommu_groups = self.get_iommu_groups_info()
iommu_groups = get_iommu_groups_info()
iommu_groups_mapping_with_group_no = collections.defaultdict(set)
for pci_slot, pci_details in iommu_groups.items():
iommu_groups_mapping_with_group_no[pci_details['number']].add(convert_pci_id_to_vm_pci_slot(pci_slot))
Expand Down
10 changes: 0 additions & 10 deletions src/middlewared/middlewared/plugins/vm/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import contextlib
import os
from xml.etree import ElementTree as etree


Expand Down Expand Up @@ -28,14 +26,6 @@ def convert_pci_id_to_vm_pci_slot(pci_id: str) -> str:
return f'pci_{pci_id.replace(".", "_").replace(":", "_")}'


def get_pci_device_class(pci_path: str) -> str:
with contextlib.suppress(FileNotFoundError):
with open(os.path.join(pci_path, 'class'), 'r') as r:
return r.read().strip()

return ''


def get_vm_nvram_file_name(vm_data: dict) -> str:
return f'{vm_data["id"]}_{vm_data["name"]}_VARS.fd'

Expand Down
159 changes: 159 additions & 0 deletions src/middlewared/middlewared/pytest/unit/plugins/test_gpu_critical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import textwrap

import pytest
from unittest.mock import patch, MagicMock

from middlewared.utils.gpu import get_gpus


DEVICE_DATA = {
'0000:17:00.0': {
'PCI_ID': '1AF4:1050',
'ID_VENDOR_FROM_DATABASE': 'NVIDIA Corporation',
'ID_PCI_SUBCLASS_FROM_DATABASE': 'VGA compatible controller',
'PCI_SLOT_NAME': '0000:17:00.0',
},
'0000:17:00.1': {
'PCI_ID': '1AF4:1050',
'ID_VENDOR_FROM_DATABASE': 'NVIDIA Corporation',
'ID_PCI_SUBCLASS_FROM_DATABASE': 'Audio device',
'PCI_SLOT_NAME': '0000:17:00.1',
},
'0000:00:1f.4': {
'PCI_ID': '1AF4:1050',
'ID_VENDOR_FROM_DATABASE': 'Intel Corporation',
'ID_PCI_SUBCLASS_FROM_DATABASE': 'SMBus',
'PCI_SLOT_NAME': '0000:00:1f.4',
},
}


@pytest.mark.parametrize(
'ls_pci,gpu_pci_id,child_ids,iommu_group,uses_system_critical_devices,critical_reason',
[
(
textwrap.dedent('''
0000:17:00.0 VGA compatible controller: NVIDIA Corporation TU117GL [T400 4GB] (rev a1)
0000:17:00.1 Audio device: NVIDIA Corporation Device 10fa (rev a1)
'''),
'0000:17:00.0',
['0000:17:00.1'],
{
'0000:17:00.1': {
'number': 9,
'addresses': [],
'critical': False
},
'0000:17:00.0': {
'number': 9,
'addresses': [],
'critical': False
},
},
False,
None
),
(
textwrap.dedent('''
0000:17:00.0 VGA compatible controller: NVIDIA Corporation TU117GL [T400 4GB] (rev a1)
0000:17:00.1 Audio device: NVIDIA Corporation Device 10fa (rev a1)
0000:00:1f.4 SMBus: Intel Corporation C620 Series Chipset Family SMBus (rev 09)
'''),
'0000:17:00.0',
['0000:17:00.1', '0000:00:1f.4'],
{
'0000:17:00.1': {
'number': 9,
'addresses': [],
'critical': False
},
'0000:17:00.0': {
'number': 9,
'addresses': [],
'critical': False
},
'0000:00:1f.4': {
'number': 31,
'addresses': [],
'critical': True
},
},
True,
'Critical devices found: 0000:00:1f.4\nCritical devices found in same IOMMU group: 0000:00:1f.4'
),
(
textwrap.dedent('''
0000:17:00.0 VGA compatible controller: NVIDIA Corporation TU117GL [T400 4GB] (rev a1)
0000:17:00.1 Audio device: NVIDIA Corporation Device 10fa (rev a1)
0000:00:1f.4 SMBus: Intel Corporation C620 Series Chipset Family SMBus (rev 09)
'''),
'0000:17:00.0',
['0000:17:00.1'],
{
'0000:17:00.1': {
'number': 10,
'addresses': [],
'critical': False
},
'0000:17:00.0': {
'number': 9,
'addresses': [],
'critical': False
},
'0000:00:1f.4': {
'number': 9,
'addresses': [],
'critical': True
},
},
True,
'Critical devices found in same IOMMU group: 0000:17:00.0'
),
(
textwrap.dedent('''
0000:17:00.0 VGA compatible controller: NVIDIA Corporation TU117GL [T400 4GB] (rev a1)
0000:17:00.1 Audio device: NVIDIA Corporation Device 10fa (rev a1)
0000:00:1f.4 SMBus: Intel Corporation C620 Series Chipset Family SMBus (rev 09)
'''),
'0000:17:00.0',
['0000:17:00.1'],
{
'0000:17:00.1': {
'number': 9,
'addresses': [],
'critical': False
},
'0000:17:00.0': {
'number': 10,
'addresses': [],
'critical': False
},
'0000:00:1f.4': {
'number': 9,
'addresses': [],
'critical': True
},
},
True,
'Critical devices found in same IOMMU group: 0000:17:00.1'
)
]
)
def test_critical_gpu(
ls_pci, gpu_pci_id, child_ids, iommu_group, uses_system_critical_devices, critical_reason
):
with patch('middlewared.utils.gpu.pyudev.Devices.from_name', MagicMock()) as from_name_mock:
udev_mock = MagicMock()
udev_mock.get = lambda key, default: DEVICE_DATA[gpu_pci_id].get(key, default)
udev_mock.parent.children = [DEVICE_DATA[child_id] for child_id in child_ids]
from_name_mock.return_value = udev_mock
with patch('middlewared.utils.gpu.subprocess.Popen', MagicMock()) as popen_mock:
comm_mock = MagicMock()
comm_mock.returncode = 0
comm_mock.communicate.return_value = ls_pci.strip().encode(), b''
popen_mock.return_value = comm_mock

with patch('middlewared.utils.gpu.get_iommu_groups_info', lambda *args, **kwargs: iommu_group):
gpus = get_gpus()[0]
assert gpus['uses_system_critical_devices'] == uses_system_critical_devices
assert gpus['critical_reason'] == critical_reason
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from pathlib import PosixPath
from unittest.mock import Mock, patch

from middlewared.pytest.unit.middleware import Middleware
from middlewared.plugins.vm.pci import VMDeviceService
from middlewared.utils.iommu import get_iommu_groups_info


DEVICES_PATH = [
Expand Down Expand Up @@ -86,6 +85,6 @@


def test_iommu_groups():
with patch('middlewared.plugins.vm.pci.pathlib.PosixPath.is_dir', Mock(return_value=True)):
with patch('middlewared.plugins.vm.pci.pathlib.Path.glob', Mock(return_value=DEVICES_PATH)):
assert VMDeviceService(Middleware()).get_iommu_groups_info() == IOMMU_GROUPS
with patch('middlewared.utils.iommu.pathlib.PosixPath.is_dir', Mock(return_value=True)):
with patch('middlewared.utils.iommu.pathlib.Path.glob', Mock(return_value=DEVICES_PATH)):
assert get_iommu_groups_info() == IOMMU_GROUPS
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@


def test_get_pci_ids_for_gpu_isolation():
with patch('middlewared.plugins.vm.pci.VMDeviceService.get_iommu_groups_info', Mock(return_value=IOMMU_GROUPS)):
with patch('middlewared.plugins.vm.pci.get_iommu_groups_info', Mock(return_value=IOMMU_GROUPS)):
with patch('middlewared.plugins.vm.pci.get_gpus', Mock(return_value=AVAILABLE_GPUs)):
assert set(VMDeviceService(Middleware()).get_pci_ids_for_gpu_isolation('0000:16:0e.0')) == {
'pci_0000_16_0e_0', 'pci_0000_16_0e_2'
Expand Down
58 changes: 43 additions & 15 deletions src/middlewared/middlewared/utils/gpu.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
import collections
import pyudev
import re
import subprocess

from middlewared.service_exception import CallError

from .iommu import get_iommu_groups_info
from .pci import SENSITIVE_PCI_DEVICE_TYPES


RE_PCI_ADDR = re.compile(r'(?P<domain>.*):(?P<bus>.*):(?P<slot>.*)\.')

# get capability classes for relevant pci devices from
# https://github.com/pciutils/pciutils/blob/3d2d69cbc55016c4850ab7333de8e3884ec9d498/lib/header.h#L1429
SENSITIVE_PCI_DEVICE_TYPES = {
'0x0604': 'PCI Bridge',
'0x0601': 'ISA Bridge',
'0x0500': 'RAM memory',
'0x0c05': 'SMBus',
}

def get_critical_devices_in_iommu_group_mapping(iommu_groups: dict) -> dict[str, set[str]]:
iommu_groups_mapping_with_critical_devices = collections.defaultdict(set)
for pci_slot, pci_details in iommu_groups.items():
if pci_details['critical']:
iommu_groups_mapping_with_critical_devices[pci_details['number']].add(pci_slot)

return iommu_groups_mapping_with_critical_devices

def get_gpus():

def get_gpus() -> list:
cp = subprocess.Popen(['lspci', '-D'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = cp.communicate()
if cp.returncode:
Expand All @@ -35,6 +39,9 @@ def get_gpus():
gpu_slots.append((line.strip(), k))
break

iommu_groups = get_iommu_groups_info(get_critical_info=True)
critical_iommu_mapping = get_critical_devices_in_iommu_group_mapping(iommu_groups)

for gpu_line, key in gpu_slots:
addr = gpu_line.split()[0]
addr_re = RE_PCI_ADDR.match(addr)
Expand All @@ -51,17 +58,37 @@ def get_gpus():
vendor = 'AMD'

devices = []
critical = False
critical_reason = None
critical_devices = set()

# So we will try to mark those gpu's as critical which meet following criteria:
# 1) Have a device which belongs to sensitive pci devices group
# 2) Have a device which is in same iommu group as a device which belongs to sensitive pci devices group
if critical_iommu_mapping[iommu_groups.get(addr, {}).get('number')]:
critical_devices_based_on_iommu = {addr}
else:
critical_devices_based_on_iommu = set()

for child in filter(lambda c: all(k in c for k in ('PCI_SLOT_NAME', 'PCI_ID')), gpu_dev.parent.children):
devices.append({
'pci_id': child['PCI_ID'],
'pci_slot': child['PCI_SLOT_NAME'],
'vm_pci_slot': f'pci_{child["PCI_SLOT_NAME"].replace(".", "_").replace(":", "_")}',
})
critical |= any(
k.lower() in child.get('ID_PCI_SUBCLASS_FROM_DATABASE', '').lower()
for k in SENSITIVE_PCI_DEVICE_TYPES.values()
)
for k in SENSITIVE_PCI_DEVICE_TYPES.values():
if k.lower() in child.get('ID_PCI_SUBCLASS_FROM_DATABASE', '').lower():
critical_devices.add(child['PCI_SLOT_NAME'])
break
if critical_iommu_mapping[iommu_groups.get(child['PCI_SLOT_NAME'], {}).get('number')]:
critical_devices_based_on_iommu.add(child['PCI_SLOT_NAME'])

if critical_devices:
critical_reason = f'Critical devices found: {", ".join(critical_devices)}'

if critical_devices_based_on_iommu:
critical_reason = f'{critical_reason}\n' if critical_reason else ''
critical_reason += ('Critical devices found in same IOMMU group: '
f'{", ".join(critical_devices_based_on_iommu)}')

gpus.append({
'addr': {
Expand All @@ -71,7 +98,8 @@ def get_gpus():
'description': gpu_line.split(f'{key}:')[-1].split('(rev')[0].strip(),
'devices': devices,
'vendor': vendor,
'uses_system_critical_devices': critical,
'uses_system_critical_devices': bool(critical_reason),
'critical_reason': critical_reason,
})

return gpus
Loading
Loading