diff --git a/clip_interrogator/clip_interrogator.py b/clip_interrogator/clip_interrogator.py index 02a9b896..77a902cb 100644 --- a/clip_interrogator/clip_interrogator.py +++ b/clip_interrogator/clip_interrogator.py @@ -197,7 +197,7 @@ def generate_caption(self, pil_image: Image) -> str: def image_to_features(self, image: Image) -> torch.Tensor: self._prepare_clip() images = self.clip_preprocess(image).unsqueeze(0).to(self.device) - with torch.no_grad(), torch.cuda.amp.autocast(): + with torch.no_grad(), torch.amp.autocast(device_type='cuda' if self.device == 'cuda' else 'cpu'): image_features = self.clip_model.encode_image(images) image_features /= image_features.norm(dim=-1, keepdim=True) return image_features @@ -257,7 +257,7 @@ def interrogate(self, image: Image, min_flavors: int=8, max_flavors: int=32, cap def rank_top(self, image_features: torch.Tensor, text_array: List[str], reverse: bool=False) -> str: self._prepare_clip() text_tokens = self.tokenize([text for text in text_array]).to(self.device) - with torch.no_grad(), torch.cuda.amp.autocast(): + with torch.no_grad(), torch.amp.autocast(device_type='cuda' if self.device == 'cuda' else 'cpu'): text_features = self.clip_model.encode_text(text_tokens) text_features /= text_features.norm(dim=-1, keepdim=True) similarity = text_features @ image_features.T @@ -268,7 +268,7 @@ def rank_top(self, image_features: torch.Tensor, text_array: List[str], reverse: def similarity(self, image_features: torch.Tensor, text: str) -> float: self._prepare_clip() text_tokens = self.tokenize([text]).to(self.device) - with torch.no_grad(), torch.cuda.amp.autocast(): + with torch.no_grad(), torch.amp.autocast(device_type='cuda' if self.device == 'cuda' else 'cpu'): text_features = self.clip_model.encode_text(text_tokens) text_features /= text_features.norm(dim=-1, keepdim=True) similarity = text_features @ image_features.T @@ -277,7 +277,7 @@ def similarity(self, image_features: torch.Tensor, text: str) -> float: def similarities(self, image_features: torch.Tensor, text_array: List[str]) -> List[float]: self._prepare_clip() text_tokens = self.tokenize([text for text in text_array]).to(self.device) - with torch.no_grad(), torch.cuda.amp.autocast(): + with torch.no_grad(), torch.amp.autocast(device_type='cuda' if self.device == 'cuda' else 'cpu'): text_features = self.clip_model.encode_text(text_tokens) text_features /= text_features.norm(dim=-1, keepdim=True) similarity = text_features @ image_features.T @@ -319,7 +319,7 @@ def __init__(self, labels:List[str], desc:str, ci: Interrogator): chunks = np.array_split(self.labels, max(1, len(self.labels)/config.chunk_size)) for chunk in tqdm(chunks, desc=f"Preprocessing {desc}" if desc else None, disable=self.config.quiet): text_tokens = self.tokenize(chunk).to(self.device) - with torch.no_grad(), torch.cuda.amp.autocast(): + with torch.no_grad(), torch.amp.autocast(device_type='cuda' if self.device == 'cuda' else 'cpu'): text_features = clip_model.encode_text(text_tokens) text_features /= text_features.norm(dim=-1, keepdim=True) text_features = text_features.half().cpu().numpy() @@ -373,7 +373,7 @@ def _load_cached(self, desc:str, hash:str, sanitized_name:str) -> bool: def _rank(self, image_features: torch.Tensor, text_embeds: torch.Tensor, top_count: int=1, reverse: bool=False) -> str: top_count = min(top_count, len(text_embeds)) text_embeds = torch.stack([torch.from_numpy(t) for t in text_embeds]).to(self.device) - with torch.cuda.amp.autocast(): + with torch.amp.autocast(device_type='cuda' if self.device == 'cuda' else 'cpu'): similarity = image_features @ text_embeds.T if reverse: similarity = -similarity