fix optimum.quanto version

singnet · Dec 4, 2024 · 11913a0 · 11913a0
1 parent 8cc9012
commit 11913a0
Showing 1 changed file with 2 additions and 0 deletions.
diff --git a/multigen/loader.py b/multigen/loader.py
@@ -105,6 +105,7 @@ def load_pipeline(self, cls: Type[DiffusionPipeline], path, torch_dtype=torch.bf
                 quantize(result, dtype=quantize_dtype)
 
             if result.device != device:
+                logger.debug(f"move pipe to {device}")
                 result = result.to(dtype=torch_dtype, device=device)
             if result.dtype != torch_dtype:
                 result = result.to(dtype=torch_dtype)
@@ -148,6 +149,7 @@ def cache_pipeline(self, pipe: DiffusionPipeline,  descriptor: ModelDescriptor):
                 item = pipe
                 if pipe.device.type == 'cuda':
                     device = pipe.device
+                    logger.debug("deepcopy pipe from gpu to save it in cpu cache")
                     item = cp.deepcopy(pipe.to('cpu'))
                     pipe.to(device)
                 self._cpu_pipes[descriptor] = item