flask-docker

Sleeping

App Files Files Community

ning8429 commited on Oct 26, 2024

Commit

531c3cd

verified ·

1 Parent(s): 4dd8b9b

Update clip_model.py

Browse files

Files changed (1) hide show

clip_model.py +15 -8

clip_model.py CHANGED Viewed

@@ -4,7 +4,7 @@ from PIL import Image
 from transformers import ChineseCLIPProcessor, ChineseCLIPModel
 class ClipModel:
-    def __init__(self, model_name="OFA-Sys/chinese-clip-vit-base-patch16", model_path=None):
         # Set device
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -19,13 +19,20 @@ class ClipModel:
         self.processor = ChineseCLIPProcessor.from_pretrained(model_name)
-    def clip_result(self, image_path, vocab_path='./chiikawa/word_list.txt', top_k=3):
-        # Load image
-        image = Image.open(image_path)
         # Load Chinese vocabulary
         with open(vocab_path, 'r', encoding='utf-8') as f:
-            vocab = [line.strip() for line in f.readlines()]
         # Process images and texts
         batch_size = 16  # Process 16 vocab at a time
@@ -35,8 +42,8 @@ class ClipModel:
         torch.cuda.empty_cache()
         with torch.no_grad():
-            for i in range(0, len(vocab), batch_size):
-                batch_vocab = vocab[i:i + batch_size]
                 inputs = self.processor(
                     text=batch_vocab,
                     images=image,
@@ -56,7 +63,7 @@ class ClipModel:
         # Find top-3 similarities
         top_k_indices = torch.topk(similarity, top_k).indices.tolist()
-        top_k_words = [vocab[idx] for idx in top_k_indices]
         # 6. 輸出最接近的前3名中文詞彙
         return top_k_words

 from transformers import ChineseCLIPProcessor, ChineseCLIPModel
 class ClipModel:
+    def __init__(self, model_name="OFA-Sys/chinese-clip-vit-base-patch16", model_path=None, vocab_path='./chiikawa/word_list.txt'):
         # Set device
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.processor = ChineseCLIPProcessor.from_pretrained(model_name)
+        print("***** Clip Model LOAD DONE *****")
         # Load Chinese vocabulary
         with open(vocab_path, 'r', encoding='utf-8') as f:
+            self.vocab = [line.strip() for line in f.readlines()]
+    def clip_result(self, image_path, top_k=3):
+        """
+        給定圖片路徑，返回最接近的 top_k 詞彙
+        """
+        # Load image
+        image = Image.open(image_path)
+        print(f"===== Clip Model_clip_result : {image_path} ===== ")
         # Process images and texts
         batch_size = 16  # Process 16 vocab at a time
         torch.cuda.empty_cache()
         with torch.no_grad():
+            for i in range(0, len(self.vocab), batch_size):
+                batch_vocab = self.vocab[i:i + batch_size]
                 inputs = self.processor(
                     text=batch_vocab,
                     images=image,
         # Find top-3 similarities
         top_k_indices = torch.topk(similarity, top_k).indices.tolist()
+        top_k_words = [self.vocab[idx] for idx in top_k_indices]
         # 6. 輸出最接近的前3名中文詞彙
         return top_k_words