Fix: Added tests to cover that use case

aleph-im · Nov 27, 2024 · 4f578ac · 4f578ac
1 parent 1769024
commit 4f578ac
Show file tree

Hide file tree

Showing 4 changed files with 169 additions and 11 deletions.
diff --git a/aleph_message/models/execution/environment.py b/aleph_message/models/execution/environment.py
@@ -191,7 +191,7 @@ class HostRequirements(HashableModel):
     node: Optional[NodeRequirements] = Field(
         default=None, description="Required Compute Resource Node properties"
     )
-    gpus: Optional[List[GpuProperties]] = Field(
+    gpu: Optional[List[GpuProperties]] = Field(
         default=None, description="GPUs needed to pass-through from the host"
     )
 

diff --git a/aleph_message/models/execution/instance.py b/aleph_message/models/execution/instance.py
@@ -1,13 +1,11 @@
 from __future__ import annotations
 
-from typing import Self
-
 from pydantic import Field, root_validator
 
 from aleph_message.models.abstract import HashableModel
 
 from .abstract import BaseExecutableContent
-from .environment import InstanceEnvironment
+from .environment import InstanceEnvironment, HypervisorType
 from .volume import ParentVolume, PersistentVolumeSizeMib, VolumePersistence
 
 
@@ -35,9 +33,15 @@ class InstanceContent(BaseExecutableContent):
         description="Root filesystem of the system, will be booted by the kernel"
     )
 
-    @root_validator(pre=True)
-    def check_gpu_requirement(self) -> Self:
-        if self.requirements and self.requirements.gpus:
-            if self.payment and not self.payment.is_stream:
+    @root_validator()
+    def check_gpu_requirement(cls, values):
+        if values.get("requirements") and values.get("requirements").gpu:
+            if values.get("payment") and not values.get("payment").is_stream:
                 raise ValueError("Stream payment type is needed for GPU requirement")
-        return self
+
+            if (
+                values.get("environment")
+                and values.get("environment").hypervisor != HypervisorType.qemu
+            ):
+                raise ValueError("GPU option is only supported for QEmu hypervisor")
+        return values
diff --git a/aleph_message/tests/messages/instance_gpu_machine.json b/aleph_message/tests/messages/instance_gpu_machine.json
@@ -0,0 +1,111 @@
+{
+    "_id": {
+        "$oid": "6080402d7f44efefd611dc1e"
+    },
+    "chain": "ETH",
+    "sender": "0x9319Ad3B7A8E0eE24f2E639c40D8eD124C5520Ba",
+    "type": "INSTANCE",
+    "channel": "Fun-dApps",
+    "confirmed": true,
+    "content": {
+        "address": "0x9319Ad3B7A8E0eE24f2E639c40D8eD124C5520Ba",
+        "allow_amend": false,
+        "variables": {
+            "VM_CUSTOM_VARIABLE": "SOMETHING",
+            "VM_CUSTOM_VARIABLE_2": "32"
+        },
+        "environment": {
+            "reproducible": true,
+            "internet": false,
+            "aleph_api": false,
+            "shared_cache": false,
+            "hypervisor": "qemu"
+        },
+        "resources": {
+            "vcpus": 1,
+            "memory": 128,
+            "seconds": 30
+        },
+        "payment": {
+            "type": "superfluid",
+            "chain": "BASE",
+            "receiver": "0x9319Ad3B7A8E0eE24f2E639c40D8eD124C5520Ba"
+        },
+        "requirements": {
+            "cpu": {
+                "architecture": "x86_64"
+            },
+            "node": {
+                "node_hash": "4d4db19afca380fdf06ba7f916153d0f740db9de9eee23ad26ba96a90d8a2920"
+            },
+            "gpu": [
+                {
+                    "vendor": "NVIDIA",
+                    "device_class": "0300",
+                    "device_name": "NVIDIA H100",
+                    "device_id": "10de:2504"
+                }
+            ]
+        },
+        "rootfs": {
+            "parent": {
+              "ref": "549ec451d9b099cad112d4aaa2c00ac40fb6729a92ff252ff22eef0b5c3cb613",
+              "use_latest": true
+            },
+            "persistence": "host",
+            "size_mib": 20000
+        },
+        "authorized_keys": [
+            "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGULT6A41Msmw2KEu0R9MvUjhuWNAsbdeZ0DOwYbt4Qt user@example",
+            "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIH0jqdc5dmt75QhTrWqeHDV9xN8vxbgFyOYs2fuQl7CI",
+            "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDRsrQV1HVrcnskNhyH0may8TG9fHCPawpAi3ZgAWU6V/R7ezvZOHnZdaFeIsOpFbPbt/l67Fur3qniSXllI2kvuh2D4BBJ9PwwlB2sgWzFDF34ADsfLQf+C/vpwrWemEEE91Tpj0dWbnf219i3mZLxy/+5Sv6kUy9YJlzWnDEUbaMAZK2CXrlK90b9Ns7mT82h6h3x9dLF/oCjBAKOSxbH2X+KgsDEZT0soxyluDqKNgKflkav+pvKFyD4J9IWM4j36r80yW+OPGsHqWoWleEhprfNb60RJPwKAYCDiBiSg6wCq5P+kS15O79Ko45wPaYDUwhRoNTcrWeadvTaCZgz9X3KDHgrX6wzdKqzQwtQeabhCaIGLFRMNl1Oy/BR8VozPbIe/mY28IN84An50UYkbve7nOGJucKc4hKxZKEVPpnVpRtIoWGwBJY2fi6C6wy2pBa8UX4C4t9NLJjNQSwFBzYOrphLu3ZW9A+267nogQHGnsJ5xnQ/MXximP3BlwM= user@example"
+        ],
+        "volumes": [
+            {
+                "comment": "Python libraries. Read-only since a 'ref' is specified.",
+                "mount": "/opt/venv",
+                "ref": "5f31b0706f59404fad3d0bff97ef89ddf24da4761608ea0646329362c662ba51",
+                "use_latest": false
+            },
+            {
+                "comment": "Ephemeral storage, read-write but will not persist after the VM stops",
+                "mount": "/var/cache",
+                "ephemeral": true,
+                "size_mib": 5
+            },
+            {
+                "comment": "Working data persisted on the VM supervisor, not available on other nodes",
+                "mount": "/var/lib/sqlite",
+                "name": "sqlite-data",
+                "persistence": "host",
+                "size_mib": 10
+            },
+            {
+                "comment": "Working data persisted on the Aleph network. New VMs will try to use the latest version of this volume, with no guarantee against conflicts",
+                "mount": "/var/lib/statistics",
+                "name": "statistics",
+                "persistence": "store",
+                "size_mib": 10
+            },
+            {
+                "comment": "Raw drive to use by a process, do not mount it",
+                "name": "raw-data",
+                "persistence": "host",
+                "size_mib": 10
+            }
+        ],
+        "replaces": "0x9319Ad3B7A8E0eE24f2E639c40D8eD124C5520Ba",
+        "time": 1619017773.8950517
+    },
+    "item_type": "inline",
+    "signature": "0x372da8230552b8c3e65c05b31a0ff3a24666d66c575f8e11019f62579bf48c2b7fe2f0bbe907a2a5bf8050989cdaf8a59ff8a1cbcafcdef0656c54279b4aa0c71b",
+    "size": 749,
+    "time": 1619017773.8950577,
+    "confirmations": [
+        {
+            "chain": "ETH",
+            "height": 12284734,
+            "hash": "0x67f2f3cde5e94e70615c92629c70d22dc959a118f46e9411b29659c2fce87cdc"
+        }
+    ]
+}
diff --git a/aleph_message/tests/test_models.py b/aleph_message/tests/test_models.py
@@ -25,8 +25,9 @@
     create_message_from_json,
     create_new_message,
     parse_message,
+    PaymentType,
 )
-from aleph_message.models.execution.environment import AMDSEVPolicy
+from aleph_message.models.execution.environment import AMDSEVPolicy, HypervisorType
 from aleph_message.tests.download_messages import MESSAGES_STORAGE_PATH
 
 console = Console(color_system="windows")
@@ -172,7 +173,7 @@ def test_validation_on_confidential_options():
     path = Path(__file__).parent / "messages/instance_confidential_machine.json"
     message_dict = json.loads(path.read_text())
     # Patch the hypervisor to be something other than QEmu
-    message_dict["content"]["environment"]["hypervisor"] = "firecracker"
+    message_dict["content"]["environment"]["hypervisor"] = HypervisorType.firecracker
     try:
         _ = create_new_message(message_dict, factory=InstanceMessage)
         raise AssertionError("An exception should have been raised before this point.")
@@ -184,6 +185,48 @@ def test_validation_on_confidential_options():
         )
 
 
+def test_instance_message_machine_with_gpu_options():
+    path = Path(__file__).parent / "messages/instance_gpu_machine.json"
+    message = create_message_from_file(path, factory=InstanceMessage)
+
+    assert isinstance(message, InstanceMessage)
+    assert hash(message.content)
+    assert message.content.payment.type == PaymentType.superfluid
+    assert message.content.environment.hypervisor == HypervisorType.qemu
+
+
+def test_validation_on_gpu_payment_options():
+    """Ensure that a gpu option is only allowed for stream payments."""
+    path = Path(__file__).parent / "messages/instance_gpu_machine.json"
+    message_dict = json.loads(path.read_text())
+    # Patch the gpu field with some info
+    message_dict["content"]["payment"]["type"] = "hold"
+    try:
+        _ = create_new_message(message_dict, factory=InstanceMessage)
+        raise AssertionError("An exception should have been raised before this point.")
+    except ValidationError as e:
+        assert e.errors()[0]["loc"] == ("content", "__root__")
+        assert (
+            e.errors()[0]["msg"] == "Stream payment type is needed for GPU requirement"
+        )
+
+
+def test_validation_on_gpu_hypervisor_options():
+    """Ensure that a gpu option is only allowed for Qemu hypervisor."""
+    path = Path(__file__).parent / "messages/instance_gpu_machine.json"
+    message_dict = json.loads(path.read_text())
+    # Patch the gpu field with some info
+    message_dict["content"]["environment"]["hypervisor"] = HypervisorType.firecracker
+    try:
+        _ = create_new_message(message_dict, factory=InstanceMessage)
+        raise AssertionError("An exception should have been raised before this point.")
+    except ValidationError as e:
+        assert e.errors()[0]["loc"] == ("content", "__root__")
+        assert (
+            e.errors()[0]["msg"] == "GPU option is only supported for QEmu hypervisor"
+        )
+
+
 def test_message_machine_port_mapping():
     message_dict = {
         "chain": "ETH",