Skip to content

Commit

Permalink
Merge pull request #81 from crystian/AMD2
Browse files Browse the repository at this point in the history
AMD support (only linux)
  • Loading branch information
crystian authored Jul 21, 2024
2 parents 209dc93 + b8d6a2f commit 65d5f37
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 41 deletions.
12 changes: 4 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ You can configure the refresh rate which resources to show:
![Monitors](./docs/monitor-settings.png)

> **Notes:**
> - The GPU data is only available when you use CUDA (only NVIDIA cards, sorry AMD users).
> - The GPU data is only available when you use CUDA (only NVIDIA cards, partial support of AMD users on linux).
> - This extension needs ComfyUI 1915 (or higher).
> - The cost of the monitor is low (0.1 to 0.5% of utilization), you can disable it from settings (`Refres rate` to `0`).
> - Data comes from these libraries:
Expand Down Expand Up @@ -512,6 +512,9 @@ You have predefined switches (string, latent, image, conditioning) but you can u

### Crystools

### 1.15.0 (21/07/2024)
- AMD Branch merged to the main branch, should work for AMD users on **Linux**

### 1.14.0 (15/07/2024)
- Tried to use AMD info, but it breaks installation on windows, so I removed it ¯\_(ツ)_
- AMD Branch added, if you use AMD and Linux, you can try it (not tested for me)
Expand Down Expand Up @@ -580,13 +583,6 @@ You have predefined switches (string, latent, image, conditioning) but you can u
```
3. Start up ComfyUI.
#### For AMD users
If you are an AMD user with Linux, you can try the AMD branch:
(replace the clone command of step 2 with this one)
```
git clone -b AMD https://github.com/crystian/ComfyUI-Crystools.git
```
### Install from manager
Search for `crystools` in the [manager](https://github.com/ltdrdata/ComfyUI-Manager.git) and install it.
Expand Down
2 changes: 1 addition & 1 deletion __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
@author: Crystian
@title: Crystools
@nickname: Crystools
@version: 1.14.0
@version: 1.15.0
@project: "https://github.com/crystian/ComfyUI-Crystools",
@description: Plugins for multiples uses, mainly for debugging, you need them! IG: https://www.instagram.com/crystian.ia
"""
Expand Down
2 changes: 1 addition & 1 deletion core/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "1.14.0"
version = "1.15.0"
137 changes: 109 additions & 28 deletions general/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
import pynvml
import comfy.model_management
from ..core import logger
from ctypes import *
from pyrsmi import rocml

class CGPUInfo:
"""
This class is responsible for getting information from GPU (ONLY).
"""
cuda = False
pynvmlLoaded = False
pyamdLoaded = False
anygpuLoaded = False
cudaAvailable = False
torchDevice = 'cpu'
cudaDevice = 'cpu'
Expand All @@ -25,32 +29,32 @@ def __init__(self):
try:
pynvml.nvmlInit()
self.pynvmlLoaded = True
logger.info('Pynvml (Nvidia) initialized.')
except Exception as e:
self.pynvmlLoaded = False
logger.error('Could not init pynvml.' + str(e))
logger.error('Could not init pynvml (Nvidia).' + str(e))

if self.pynvmlLoaded and pynvml.nvmlDeviceGetCount() > 0:
self.cudaDevicesFound = pynvml.nvmlDeviceGetCount()
if not self.pynvmlLoaded:
try:
rocml.smi_initialize()
self.pyamdLoaded = True
logger.info('Pyrsmi (AMD) initialized.')
except Exception as e:
logger.error('Could not init pyrsmi (AMD).' + str(e))

self.anygpuLoaded = self.pynvmlLoaded or self.pyamdLoaded

if self.anygpuLoaded and self.deviceGetCount() > 0:
self.cudaDevicesFound = self.deviceGetCount()

logger.info(f"GPU/s:")

# for simulate multiple GPUs (for testing) interchange these comments:
# for deviceIndex in range(3):
# deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(0)
# deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(0)
for deviceIndex in range(self.cudaDevicesFound):
deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(deviceIndex)
gpuName = 'Unknown GPU'
deviceHandle = self.deviceGetHandleByIndex(deviceIndex)

try:
gpuName = pynvml.nvmlDeviceGetName(deviceHandle)
try:
gpuName = gpuName.decode('utf-8', errors='ignore')
except AttributeError as e:
pass

except UnicodeDecodeError as e:
gpuName = 'Unknown GPU (decoding error)'
print(f"UnicodeDecodeError: {e}")
gpuName = self.deviceGetName(deviceHandle, deviceIndex)

logger.info(f"{deviceIndex}) {gpuName}")

Expand All @@ -65,7 +69,7 @@ def __init__(self):
self.gpusTemperature.append(True)

self.cuda = True
logger.info(f'NVIDIA Driver: {pynvml.nvmlSystemGetDriverVersion()}')
logger.info(self.systemGetDriverVersion())
else:
logger.warn('No GPU with CUDA detected.')

Expand Down Expand Up @@ -107,12 +111,12 @@ def getStatus(self):
else:
gpuType = self.cudaDevice

if self.pynvmlLoaded and self.cuda and self.cudaAvailable:
if self.anygpuLoaded and self.cuda and self.cudaAvailable:
# for simulate multiple GPUs (for testing) interchange these comments:
# for deviceIndex in range(3):
# deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(0)
# deviceHandle = self.deviceGetHandleByIndex(0)
for deviceIndex in range(self.cudaDevicesFound):
deviceHandle = pynvml.nvmlDeviceGetHandleByIndex(deviceIndex)
deviceHandle = self.deviceGetHandleByIndex(deviceIndex)

gpuUtilization = 0
vramPercent = 0
Expand All @@ -123,8 +127,7 @@ def getStatus(self):
# GPU Utilization
if self.switchGPU and self.gpusUtilization[deviceIndex]:
try:
utilization = pynvml.nvmlDeviceGetUtilizationRates(deviceHandle)
gpuUtilization = utilization.gpu
gpuUtilization = self.deviceGetUtilizationRates(deviceHandle)
except Exception as e:
if str(e) == "Unknown Error":
logger.error('For some reason, pynvml is not working in a laptop with only battery, try to connect and turn on the monitor')
Expand All @@ -137,20 +140,22 @@ def getStatus(self):
# VRAM
if self.switchVRAM and self.gpusVRAM[deviceIndex]:
# Torch or pynvml?, pynvml is more accurate with the system, torch is more accurate with comfyUI
memory = pynvml.nvmlDeviceGetMemoryInfo(deviceHandle)
vramUsed = memory.used
vramTotal = memory.total
memory = self.deviceGetMemoryInfo(deviceHandle)
vramUsed = memory['used']
vramTotal = memory['total']

# device = torch.device(gpuType)
# vramUsed = torch.cuda.memory_allocated(device)
# vramTotal = torch.cuda.get_device_properties(device).total_memory

vramPercent = vramUsed / vramTotal * 100
# check if vramTotal is not zero or None
if vramTotal and vramTotal != 0:
vramPercent = vramUsed / vramTotal * 100

# Temperature
if self.switchTemperature and self.gpusTemperature[deviceIndex]:
try:
gpuTemperature = pynvml.nvmlDeviceGetTemperature(deviceHandle, 0)
gpuTemperature = self.deviceGetTemperature(deviceHandle)
except Exception as e:
logger.error('Could not get GPU temperature. Turning off this feature. ' + str(e))
self.switchTemperature = False
Expand All @@ -167,3 +172,79 @@ def getStatus(self):
'device_type': gpuType,
'gpus': gpus,
}

def deviceGetCount(self):
if self.pynvmlLoaded:
return pynvml.nvmlDeviceGetCount()
elif self.pyamdLoaded:
return rocml.smi_get_device_count()
else:
return 0

def deviceGetHandleByIndex(self, index):
if self.pynvmlLoaded:
return pynvml.nvmlDeviceGetHandleByIndex(index)
elif self.pyamdLoaded:
return index
else:
return 0

def deviceGetName(self, deviceHandle, deviceIndex):
if self.pynvmlLoaded:
gpuName = 'Unknown GPU'

try:
gpuName = pynvml.nvmlDeviceGetName(deviceHandle)
try:
gpuName = gpuName.decode('utf-8', errors='ignore')
except AttributeError as e:
pass

except UnicodeDecodeError as e:
gpuName = 'Unknown GPU (decoding error)'
print(f"UnicodeDecodeError: {e}")

return gpuName
elif self.pyamdLoaded:
return rocml.smi_get_device_name(deviceIndex)
else:
return ''

def systemGetDriverVersion(self):
if self.pynvmlLoaded:
return f'NVIDIA Driver: {pynvml.nvmlSystemGetDriverVersion()}'
elif self.pyamdLoaded:
ver_str = create_string_buffer(256)
rocml.rocm_lib.rsmi_version_str_get(0, ver_str, 256)
return f'AMD Driver: {ver_str.value.decode()}'
else:
return 'Driver unknown'

def deviceGetUtilizationRates(self, deviceHandle):
if self.pynvmlLoaded:
return pynvml.nvmlDeviceGetUtilizationRates(deviceHandle).gpu
elif self.pyamdLoaded:
return rocml.smi_get_device_utilization(deviceHandle)
else:
return 0

def deviceGetMemoryInfo(self, deviceHandle):
if self.pynvmlLoaded:
mem = pynvml.nvmlDeviceGetMemoryInfo(deviceHandle)
return {'total': mem.total, 'used': mem.used}
elif self.pyamdLoaded:
mem_used = rocml.smi_get_device_memory_used(deviceHandle)
mem_total = rocml.smi_get_device_memory_total(deviceHandle)
return {'total': mem_total, 'used': mem_used}
else:
return {'total': 1, 'used': 1}

def deviceGetTemperature(self, deviceHandle):
if self.pynvmlLoaded:
return pynvml.nvmlDeviceGetTemperature(deviceHandle, pynvml.NVML_TEMPERATURE_GPU)
elif self.pyamdLoaded:
temp = c_int64(0)
rocml.rocm_lib.rsmi_dev_temp_metric_get(deviceHandle, 1, 0, byref(temp))
return temp.value / 1000
else:
return 0
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[project]
name = "comfyui-crystools"
description = "With this suit, you can see the resources monitor, progress bar & time elapsed, metadata and compare between two images, compare between two JSONs, show any value to console/display, pipes, and more!\nThis provides better nodes to load/save images, previews, etc, and see \"hidden\" data without loading a new workflow."
version = "1.14.0"
version = "1.15.0"
license = "LICENSE"
dependencies = ["deepdiff", "torch", "numpy", "Pillow>=9.5.0", "pynvml", "py-cpuinfo>=9.0.0"]
dependencies = ["deepdiff", "torch", "numpy", "Pillow>=9.5.0", "pynvml", "py-cpuinfo>=9.0.0", "pyrsmi"]

[project.urls]
Repository = "https://github.com/crystian/ComfyUI-Crystools"
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ numpy
Pillow>=9.5.0
pynvml
py-cpuinfo>=9.0.0
pyrsmi
piexif
2 changes: 1 addition & 1 deletion version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.14.0
1.15.0

0 comments on commit 65d5f37

Please sign in to comment.