Merge pull request #81 from crystian/AMD2

AMD support (only linux)
crystian · Jul 21, 2024 · 65d5f37 · 65d5f37
2 parents 209dc93 + b8d6a2f
commit 65d5f37
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 41 deletions.
diff --git a/README.md b/README.md
@@ -36,7 +36,7 @@ You can configure the refresh rate which resources to show:
 ![Monitors](./docs/monitor-settings.png)
 
 > **Notes:**
-> - The GPU data is only available when you use CUDA (only NVIDIA cards, sorry AMD users).
+> - The GPU data is only available when you use CUDA (only NVIDIA cards, partial support of AMD users on linux).
 > - This extension needs ComfyUI 1915 (or higher).
 > - The cost of the monitor is low (0.1 to 0.5% of utilization), you can disable it from settings (`Refres rate` to `0`).
 > - Data comes from these libraries:
@@ -512,6 +512,9 @@ You have predefined switches (string, latent, image, conditioning) but you can u
 
 ### Crystools
 
+### 1.15.0 (21/07/2024)
+- AMD Branch merged to the main branch, should work for AMD users on **Linux**
+
 ### 1.14.0 (15/07/2024)
 - Tried to use AMD info, but it breaks installation on windows, so I removed it ¯\_(ツ)_/¯
 - AMD Branch added, if you use AMD and Linux, you can try it (not tested for me)
@@ -580,13 +583,6 @@ You have predefined switches (string, latent, image, conditioning) but you can u
     ```
 3. Start up ComfyUI.
 
-#### For AMD users
-If you are an AMD user with Linux, you can try the AMD branch:
-(replace the clone command of step 2 with this one)
-```
-git clone -b AMD https://github.com/crystian/ComfyUI-Crystools.git
-```
-
 ### Install from manager
 
 Search for `crystools` in the [manager](https://github.com/ltdrdata/ComfyUI-Manager.git) and install it.

diff --git a/__init__.py b/__init__.py
@@ -2,7 +2,7 @@
 @author: Crystian
 @title: Crystools
 @nickname: Crystools
-@version: 1.14.0
+@version: 1.15.0
 @project: "https://github.com/crystian/ComfyUI-Crystools",
 @description: Plugins for multiples uses, mainly for debugging, you need them! IG: https://www.instagram.com/crystian.ia
 """

diff --git a/core/version.py b/core/version.py
@@ -1 +1 @@
-version = "1.14.0"
+version = "1.15.0"
diff --git a/general/gpu.py b/general/gpu.py
@@ -2,13 +2,17 @@
 import pynvml
 import comfy.model_management
 from ..core import logger
+from ctypes import *
+from pyrsmi import rocml
 
 class CGPUInfo:
     """
     This class is responsible for getting information from GPU (ONLY).
     """
     cuda = False
     pynvmlLoaded = False
+    pyamdLoaded = False
+    anygpuLoaded = False
     cudaAvailable = False
     torchDevice = 'cpu'
     cudaDevice = 'cpu'
@@ -25,32 +29,32 @@ def __init__(self):
         try:
             pynvml.nvmlInit()
             self.pynvmlLoaded = True
+            logger.info('Pynvml (Nvidia) initialized.')
         except Exception as e:
-            self.pynvmlLoaded = False
-            logger.error('Could not init pynvml.' + str(e))
+            logger.error('Could not init pynvml (Nvidia).' + str(e))
 
-        if self.pynvmlLoaded and pynvml.nvmlDeviceGetCount() > 0:
-            self.cudaDevicesFound = pynvml.nvmlDeviceGetCount()
+        if not self.pynvmlLoaded:
+            try:
+                rocml.smi_initialize()
+                self.pyamdLoaded = True
+                logger.info('Pyrsmi (AMD) initialized.')
+            except Exception as e:
+                logger.error('Could not init pyrsmi (AMD).' + str(e))
+
+        self.anygpuLoaded = self.pynvmlLoaded or self.pyamdLoaded
+
+        if self.anygpuLoaded and self.deviceGetCount() > 0:
+            self.cudaDevicesFound = self.deviceGetCount()
 
             logger.info(f"GPU/s:")
 
             # for simulate multiple GPUs (for testing) interchange these comments:
             # for deviceIndex in range(3):
-            #   deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(0)
+            #     deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(0)
             for deviceIndex in range(self.cudaDevicesFound):
-                deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(deviceIndex)
-                gpuName = 'Unknown GPU'
+                deviceHandle = self.deviceGetHandleByIndex(deviceIndex)
 
-                try:
-                    gpuName = pynvml.nvmlDeviceGetName(deviceHandle)
-                    try:
-                        gpuName = gpuName.decode('utf-8', errors='ignore')
-                    except AttributeError as e:
-                        pass
-
-                except UnicodeDecodeError as e:
-                    gpuName = 'Unknown GPU (decoding error)'
-                    print(f"UnicodeDecodeError: {e}")
+                gpuName = self.deviceGetName(deviceHandle, deviceIndex)
 
                 logger.info(f"{deviceIndex}) {gpuName}")
 
@@ -65,7 +69,7 @@ def __init__(self):
                 self.gpusTemperature.append(True)
 
             self.cuda = True
-            logger.info(f'NVIDIA Driver: {pynvml.nvmlSystemGetDriverVersion()}')
+            logger.info(self.systemGetDriverVersion())
         else:
             logger.warn('No GPU with CUDA detected.')
 
@@ -107,12 +111,12 @@ def getStatus(self):
         else:
             gpuType = self.cudaDevice
 
-            if self.pynvmlLoaded and self.cuda and self.cudaAvailable:
+            if self.anygpuLoaded and self.cuda and self.cudaAvailable:
                 # for simulate multiple GPUs (for testing) interchange these comments:
                 # for deviceIndex in range(3):
-                #   deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(0)
+                #     deviceHandle = self.deviceGetHandleByIndex(0)
                 for deviceIndex in range(self.cudaDevicesFound):
-                    deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(deviceIndex)
+                    deviceHandle = self.deviceGetHandleByIndex(deviceIndex)
 
                     gpuUtilization = 0
                     vramPercent = 0
@@ -123,8 +127,7 @@ def getStatus(self):
                     # GPU Utilization
                     if self.switchGPU and self.gpusUtilization[deviceIndex]:
                         try:
-                            utilization = pynvml.nvmlDeviceGetUtilizationRates(deviceHandle)
-                            gpuUtilization = utilization.gpu
+                            gpuUtilization = self.deviceGetUtilizationRates(deviceHandle)
                         except Exception as e:
                             if str(e) == "Unknown Error":
                                 logger.error('For some reason, pynvml is not working in a laptop with only battery, try to connect and turn on the monitor')
@@ -137,20 +140,22 @@ def getStatus(self):
                     # VRAM
                     if self.switchVRAM and self.gpusVRAM[deviceIndex]:
                         # Torch or pynvml?, pynvml is more accurate with the system, torch is more accurate with comfyUI
-                        memory = pynvml.nvmlDeviceGetMemoryInfo(deviceHandle)
-                        vramUsed = memory.used
-                        vramTotal = memory.total
+                        memory = self.deviceGetMemoryInfo(deviceHandle)
+                        vramUsed = memory['used']
+                        vramTotal = memory['total']
 
                         # device = torch.device(gpuType)
                         # vramUsed = torch.cuda.memory_allocated(device)
                         # vramTotal = torch.cuda.get_device_properties(device).total_memory
 
-                        vramPercent = vramUsed / vramTotal * 100
+                        # check if vramTotal is not zero or None
+                        if vramTotal and vramTotal != 0:
+                            vramPercent = vramUsed / vramTotal * 100
 
                     # Temperature
                     if self.switchTemperature and self.gpusTemperature[deviceIndex]:
                         try:
-                            gpuTemperature = pynvml.nvmlDeviceGetTemperature(deviceHandle, 0)
+                            gpuTemperature = self.deviceGetTemperature(deviceHandle)
                         except Exception as e:
                             logger.error('Could not get GPU temperature. Turning off this feature. ' + str(e))
                             self.switchTemperature = False
@@ -167,3 +172,79 @@ def getStatus(self):
             'device_type': gpuType,
             'gpus': gpus,
         }
+
+    def deviceGetCount(self):
+        if self.pynvmlLoaded:
+            return pynvml.nvmlDeviceGetCount()
+        elif self.pyamdLoaded:
+            return rocml.smi_get_device_count()
+        else:
+            return 0
+
+    def deviceGetHandleByIndex(self, index):
+        if self.pynvmlLoaded:
+            return pynvml.nvmlDeviceGetHandleByIndex(index)
+        elif self.pyamdLoaded:
+            return index
+        else:
+            return 0
+
+    def deviceGetName(self, deviceHandle, deviceIndex):
+        if self.pynvmlLoaded:
+            gpuName = 'Unknown GPU'
+
+            try:
+                gpuName = pynvml.nvmlDeviceGetName(deviceHandle)
+                try:
+                    gpuName = gpuName.decode('utf-8', errors='ignore')
+                except AttributeError as e:
+                    pass
+
+            except UnicodeDecodeError as e:
+                gpuName = 'Unknown GPU (decoding error)'
+                print(f"UnicodeDecodeError: {e}")
+
+            return gpuName
+        elif self.pyamdLoaded:
+            return rocml.smi_get_device_name(deviceIndex)
+        else:
+            return ''
+
+    def systemGetDriverVersion(self):
+        if self.pynvmlLoaded:
+            return f'NVIDIA Driver: {pynvml.nvmlSystemGetDriverVersion()}'
+        elif self.pyamdLoaded:
+            ver_str = create_string_buffer(256)
+            rocml.rocm_lib.rsmi_version_str_get(0, ver_str, 256)
+            return f'AMD Driver: {ver_str.value.decode()}'
+        else:
+            return 'Driver unknown'
+
+    def deviceGetUtilizationRates(self, deviceHandle):
+        if self.pynvmlLoaded:
+            return pynvml.nvmlDeviceGetUtilizationRates(deviceHandle).gpu
+        elif self.pyamdLoaded:
+            return rocml.smi_get_device_utilization(deviceHandle)
+        else:
+            return 0
+
+    def deviceGetMemoryInfo(self, deviceHandle):
+        if self.pynvmlLoaded:
+            mem = pynvml.nvmlDeviceGetMemoryInfo(deviceHandle)
+            return {'total': mem.total, 'used': mem.used}
+        elif self.pyamdLoaded:
+            mem_used = rocml.smi_get_device_memory_used(deviceHandle)
+            mem_total = rocml.smi_get_device_memory_total(deviceHandle)
+            return {'total': mem_total, 'used': mem_used}
+        else:
+            return {'total': 1, 'used': 1}
+
+    def deviceGetTemperature(self, deviceHandle):
+        if self.pynvmlLoaded:
+            return pynvml.nvmlDeviceGetTemperature(deviceHandle, pynvml.NVML_TEMPERATURE_GPU)
+        elif self.pyamdLoaded:
+            temp = c_int64(0)
+            rocml.rocm_lib.rsmi_dev_temp_metric_get(deviceHandle, 1, 0, byref(temp))
+            return temp.value / 1000
+        else:
+            return 0
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,9 +1,9 @@
 [project]
 name = "comfyui-crystools"
 description = "With this suit, you can see the resources monitor, progress bar & time elapsed, metadata and compare between two images, compare between two JSONs, show any value to console/display, pipes, and more!\nThis provides better nodes to load/save images, previews, etc, and see \"hidden\" data without loading a new workflow."
-version = "1.14.0"
+version = "1.15.0"
 license = "LICENSE"
-dependencies = ["deepdiff", "torch", "numpy", "Pillow>=9.5.0", "pynvml", "py-cpuinfo>=9.0.0"]
+dependencies = ["deepdiff", "torch", "numpy", "Pillow>=9.5.0", "pynvml", "py-cpuinfo>=9.0.0", "pyrsmi"]
 
 [project.urls]
 Repository = "https://github.com/crystian/ComfyUI-Crystools"

diff --git a/requirements.txt b/requirements.txt
@@ -4,4 +4,5 @@ numpy
 Pillow>=9.5.0
 pynvml
 py-cpuinfo>=9.0.0
+pyrsmi
 piexif
diff --git a/version b/version
@@ -1 +1 @@
-1.14.0
+1.15.0
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,4 +4,5 @@ numpy @@
     Pillow>=9.5.0
     pynvml
     py-cpuinfo>=9.0.0
+    pyrsmi
     piexif