From 4f578acf3c555a3ec0225d3a1cb923676400a7ec Mon Sep 17 00:00:00 2001 From: "Andres D. Molins" Date: Wed, 27 Nov 2024 16:04:10 +0100 Subject: [PATCH] Fix: Added tests to cover that use case --- aleph_message/models/execution/environment.py | 2 +- aleph_message/models/execution/instance.py | 20 ++-- .../tests/messages/instance_gpu_machine.json | 111 ++++++++++++++++++ aleph_message/tests/test_models.py | 47 +++++++- 4 files changed, 169 insertions(+), 11 deletions(-) create mode 100644 aleph_message/tests/messages/instance_gpu_machine.json diff --git a/aleph_message/models/execution/environment.py b/aleph_message/models/execution/environment.py index d1e46ee..3167da7 100644 --- a/aleph_message/models/execution/environment.py +++ b/aleph_message/models/execution/environment.py @@ -191,7 +191,7 @@ class HostRequirements(HashableModel): node: Optional[NodeRequirements] = Field( default=None, description="Required Compute Resource Node properties" ) - gpus: Optional[List[GpuProperties]] = Field( + gpu: Optional[List[GpuProperties]] = Field( default=None, description="GPUs needed to pass-through from the host" ) diff --git a/aleph_message/models/execution/instance.py b/aleph_message/models/execution/instance.py index 35bde24..abe678d 100644 --- a/aleph_message/models/execution/instance.py +++ b/aleph_message/models/execution/instance.py @@ -1,13 +1,11 @@ from __future__ import annotations -from typing import Self - from pydantic import Field, root_validator from aleph_message.models.abstract import HashableModel from .abstract import BaseExecutableContent -from .environment import InstanceEnvironment +from .environment import InstanceEnvironment, HypervisorType from .volume import ParentVolume, PersistentVolumeSizeMib, VolumePersistence @@ -35,9 +33,15 @@ class InstanceContent(BaseExecutableContent): description="Root filesystem of the system, will be booted by the kernel" ) - @root_validator(pre=True) - def check_gpu_requirement(self) -> Self: - if self.requirements and self.requirements.gpus: - if self.payment and not self.payment.is_stream: + @root_validator() + def check_gpu_requirement(cls, values): + if values.get("requirements") and values.get("requirements").gpu: + if values.get("payment") and not values.get("payment").is_stream: raise ValueError("Stream payment type is needed for GPU requirement") - return self + + if ( + values.get("environment") + and values.get("environment").hypervisor != HypervisorType.qemu + ): + raise ValueError("GPU option is only supported for QEmu hypervisor") + return values diff --git a/aleph_message/tests/messages/instance_gpu_machine.json b/aleph_message/tests/messages/instance_gpu_machine.json new file mode 100644 index 0000000..0096820 --- /dev/null +++ b/aleph_message/tests/messages/instance_gpu_machine.json @@ -0,0 +1,111 @@ +{ + "_id": { + "$oid": "6080402d7f44efefd611dc1e" + }, + "chain": "ETH", + "sender": "0x9319Ad3B7A8E0eE24f2E639c40D8eD124C5520Ba", + "type": "INSTANCE", + "channel": "Fun-dApps", + "confirmed": true, + "content": { + "address": "0x9319Ad3B7A8E0eE24f2E639c40D8eD124C5520Ba", + "allow_amend": false, + "variables": { + "VM_CUSTOM_VARIABLE": "SOMETHING", + "VM_CUSTOM_VARIABLE_2": "32" + }, + "environment": { + "reproducible": true, + "internet": false, + "aleph_api": false, + "shared_cache": false, + "hypervisor": "qemu" + }, + "resources": { + "vcpus": 1, + "memory": 128, + "seconds": 30 + }, + "payment": { + "type": "superfluid", + "chain": "BASE", + "receiver": "0x9319Ad3B7A8E0eE24f2E639c40D8eD124C5520Ba" + }, + "requirements": { + "cpu": { + "architecture": "x86_64" + }, + "node": { + "node_hash": "4d4db19afca380fdf06ba7f916153d0f740db9de9eee23ad26ba96a90d8a2920" + }, + "gpu": [ + { + "vendor": "NVIDIA", + "device_class": "0300", + "device_name": "NVIDIA H100", + "device_id": "10de:2504" + } + ] + }, + "rootfs": { + "parent": { + "ref": "549ec451d9b099cad112d4aaa2c00ac40fb6729a92ff252ff22eef0b5c3cb613", + "use_latest": true + }, + "persistence": "host", + "size_mib": 20000 + }, + "authorized_keys": [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGULT6A41Msmw2KEu0R9MvUjhuWNAsbdeZ0DOwYbt4Qt user@example", + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIH0jqdc5dmt75QhTrWqeHDV9xN8vxbgFyOYs2fuQl7CI", + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDRsrQV1HVrcnskNhyH0may8TG9fHCPawpAi3ZgAWU6V/R7ezvZOHnZdaFeIsOpFbPbt/l67Fur3qniSXllI2kvuh2D4BBJ9PwwlB2sgWzFDF34ADsfLQf+C/vpwrWemEEE91Tpj0dWbnf219i3mZLxy/+5Sv6kUy9YJlzWnDEUbaMAZK2CXrlK90b9Ns7mT82h6h3x9dLF/oCjBAKOSxbH2X+KgsDEZT0soxyluDqKNgKflkav+pvKFyD4J9IWM4j36r80yW+OPGsHqWoWleEhprfNb60RJPwKAYCDiBiSg6wCq5P+kS15O79Ko45wPaYDUwhRoNTcrWeadvTaCZgz9X3KDHgrX6wzdKqzQwtQeabhCaIGLFRMNl1Oy/BR8VozPbIe/mY28IN84An50UYkbve7nOGJucKc4hKxZKEVPpnVpRtIoWGwBJY2fi6C6wy2pBa8UX4C4t9NLJjNQSwFBzYOrphLu3ZW9A+267nogQHGnsJ5xnQ/MXximP3BlwM= user@example" + ], + "volumes": [ + { + "comment": "Python libraries. Read-only since a 'ref' is specified.", + "mount": "/opt/venv", + "ref": "5f31b0706f59404fad3d0bff97ef89ddf24da4761608ea0646329362c662ba51", + "use_latest": false + }, + { + "comment": "Ephemeral storage, read-write but will not persist after the VM stops", + "mount": "/var/cache", + "ephemeral": true, + "size_mib": 5 + }, + { + "comment": "Working data persisted on the VM supervisor, not available on other nodes", + "mount": "/var/lib/sqlite", + "name": "sqlite-data", + "persistence": "host", + "size_mib": 10 + }, + { + "comment": "Working data persisted on the Aleph network. New VMs will try to use the latest version of this volume, with no guarantee against conflicts", + "mount": "/var/lib/statistics", + "name": "statistics", + "persistence": "store", + "size_mib": 10 + }, + { + "comment": "Raw drive to use by a process, do not mount it", + "name": "raw-data", + "persistence": "host", + "size_mib": 10 + } + ], + "replaces": "0x9319Ad3B7A8E0eE24f2E639c40D8eD124C5520Ba", + "time": 1619017773.8950517 + }, + "item_type": "inline", + "signature": "0x372da8230552b8c3e65c05b31a0ff3a24666d66c575f8e11019f62579bf48c2b7fe2f0bbe907a2a5bf8050989cdaf8a59ff8a1cbcafcdef0656c54279b4aa0c71b", + "size": 749, + "time": 1619017773.8950577, + "confirmations": [ + { + "chain": "ETH", + "height": 12284734, + "hash": "0x67f2f3cde5e94e70615c92629c70d22dc959a118f46e9411b29659c2fce87cdc" + } + ] +} diff --git a/aleph_message/tests/test_models.py b/aleph_message/tests/test_models.py index 78cdf04..5226d62 100644 --- a/aleph_message/tests/test_models.py +++ b/aleph_message/tests/test_models.py @@ -25,8 +25,9 @@ create_message_from_json, create_new_message, parse_message, + PaymentType, ) -from aleph_message.models.execution.environment import AMDSEVPolicy +from aleph_message.models.execution.environment import AMDSEVPolicy, HypervisorType from aleph_message.tests.download_messages import MESSAGES_STORAGE_PATH console = Console(color_system="windows") @@ -172,7 +173,7 @@ def test_validation_on_confidential_options(): path = Path(__file__).parent / "messages/instance_confidential_machine.json" message_dict = json.loads(path.read_text()) # Patch the hypervisor to be something other than QEmu - message_dict["content"]["environment"]["hypervisor"] = "firecracker" + message_dict["content"]["environment"]["hypervisor"] = HypervisorType.firecracker try: _ = create_new_message(message_dict, factory=InstanceMessage) raise AssertionError("An exception should have been raised before this point.") @@ -184,6 +185,48 @@ def test_validation_on_confidential_options(): ) +def test_instance_message_machine_with_gpu_options(): + path = Path(__file__).parent / "messages/instance_gpu_machine.json" + message = create_message_from_file(path, factory=InstanceMessage) + + assert isinstance(message, InstanceMessage) + assert hash(message.content) + assert message.content.payment.type == PaymentType.superfluid + assert message.content.environment.hypervisor == HypervisorType.qemu + + +def test_validation_on_gpu_payment_options(): + """Ensure that a gpu option is only allowed for stream payments.""" + path = Path(__file__).parent / "messages/instance_gpu_machine.json" + message_dict = json.loads(path.read_text()) + # Patch the gpu field with some info + message_dict["content"]["payment"]["type"] = "hold" + try: + _ = create_new_message(message_dict, factory=InstanceMessage) + raise AssertionError("An exception should have been raised before this point.") + except ValidationError as e: + assert e.errors()[0]["loc"] == ("content", "__root__") + assert ( + e.errors()[0]["msg"] == "Stream payment type is needed for GPU requirement" + ) + + +def test_validation_on_gpu_hypervisor_options(): + """Ensure that a gpu option is only allowed for Qemu hypervisor.""" + path = Path(__file__).parent / "messages/instance_gpu_machine.json" + message_dict = json.loads(path.read_text()) + # Patch the gpu field with some info + message_dict["content"]["environment"]["hypervisor"] = HypervisorType.firecracker + try: + _ = create_new_message(message_dict, factory=InstanceMessage) + raise AssertionError("An exception should have been raised before this point.") + except ValidationError as e: + assert e.errors()[0]["loc"] == ("content", "__root__") + assert ( + e.errors()[0]["msg"] == "GPU option is only supported for QEmu hypervisor" + ) + + def test_message_machine_port_mapping(): message_dict = { "chain": "ETH",