Skip to content

Commit

Permalink
fix optimum.quanto version
Browse files Browse the repository at this point in the history
  • Loading branch information
noskill committed Dec 4, 2024
1 parent 8cc9012 commit 11913a0
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions multigen/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def load_pipeline(self, cls: Type[DiffusionPipeline], path, torch_dtype=torch.bf
quantize(result, dtype=quantize_dtype)

if result.device != device:
logger.debug(f"move pipe to {device}")
result = result.to(dtype=torch_dtype, device=device)
if result.dtype != torch_dtype:
result = result.to(dtype=torch_dtype)
Expand Down Expand Up @@ -148,6 +149,7 @@ def cache_pipeline(self, pipe: DiffusionPipeline, descriptor: ModelDescriptor):
item = pipe
if pipe.device.type == 'cuda':
device = pipe.device
logger.debug("deepcopy pipe from gpu to save it in cpu cache")
item = cp.deepcopy(pipe.to('cpu'))
pipe.to(device)
self._cpu_pipes[descriptor] = item
Expand Down

0 comments on commit 11913a0

Please sign in to comment.