From 70d198b52862fd1f76be23bb3ab3ac9162bb8a95 Mon Sep 17 00:00:00 2001 From: gshaibi Date: Wed, 8 May 2024 13:28:30 +0300 Subject: [PATCH] Add jupyter-notebook component to Makefile and push-all target --- Dockerfile | 3 +++ Makefile | 3 +++ cmd/nvidia-smi/main.go | 23 ++++++++++------------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 767a646..2f66ca7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,6 +38,9 @@ COPY ./cmd/mig-faker/ ./cmd/mig-faker/ COPY ./internal/ ./internal/ RUN --mount=type=cache,target=/root/.cache/go-build make build COMPONENT=mig-faker +FROM jupyter/minimal-notebook as jupyter-notebook +COPY --from=nvidia-smi-builder /go/src/github.com/run-ai/fake-gpu-operator/bin/nvidia-smi /bin/ + FROM ubuntu as device-plugin COPY --from=device-plugin-builder /go/src/github.com/run-ai/fake-gpu-operator/bin/device-plugin /bin/ COPY --from=nvidia-smi-builder /go/src/github.com/run-ai/fake-gpu-operator/bin/nvidia-smi /bin/ diff --git a/Makefile b/Makefile index a33538a..9f19bb5 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,7 @@ images: make image COMPONENT=status-exporter make image COMPONENT=topology-server make image COMPONENT=mig-faker + make image COMPONENT=jupyter-notebook .PHONY: images push: @@ -42,6 +43,7 @@ push-all: make push COMPONENT=status-exporter make push COMPONENT=topology-server make push COMPONENT=mig-faker + make push COMPONENT=jupyter-notebook .PHONY: push-all restart: @@ -57,6 +59,7 @@ deploy-all: make image push COMPONENT=status-exporter make image push COMPONENT=topology-server make image push COMPONENT=mig-faker + make image push COMPONENT=jupyter-notebook .PHONY: deploy-all image-test: diff --git a/cmd/nvidia-smi/main.go b/cmd/nvidia-smi/main.go index ca05268..c7c94a0 100644 --- a/cmd/nvidia-smi/main.go +++ b/cmd/nvidia-smi/main.go @@ -64,23 +64,20 @@ func getNvidiaSmiArgs() (args nvidiaSmiArgs) { args.GpuTotalMem = int(float64(nodeTopology.GpuMemory) * gpuPortion) var gpuIdx int - if os.Getenv("NVIDIA_VISIBLE_DEVICES") == "" { - // Whole GPU is used - podName := os.Getenv("HOSTNAME") - // Search gpu for the podName - for idx, gpu := range nodeTopology.Gpus { - if gpu.Status.AllocatedBy.Pod == podName { + currentPodName := os.Getenv("HOSTNAME") + currentPodUuid := os.Getenv("POD_UUID") + for idx, gpu := range nodeTopology.Gpus { + if gpu.Status.AllocatedBy.Pod == currentPodName { + gpuIdx = idx + break + } + + for podUuid := range gpu.Status.PodGpuUsageStatus { + if string(podUuid) == currentPodUuid { gpuIdx = idx break } } - } else { - // Shared GPU is used - gpuIdxStr := os.Getenv("NVIDIA_VISIBLE_DEVICES") - gpuIdx, err = strconv.Atoi(gpuIdxStr) - if err != nil { - panic(err) - } } args.GpuIdx = gpuIdx