ImageBind_zeroshot_demo2

Runtime error

Rajagopal commited on Aug 4, 2023

Commit

a48bf14

1 Parent(s): dfb5932

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -82,6 +82,29 @@ def video_text_zeroshot(image, text_list):
     return score_dict
 def inference(
@@ -96,7 +119,7 @@ def inference(
     elif task == "audio-text":
         result = audio_text_zeroshot(audio, text_list)
     elif task == "video-text":
-        result = image_text_zeroshot(image2, text_list)
     else:
         raise NotImplementedError
     return result

     return score_dict
+def doubleimage_text_zeroshot(image, image2,  text_list):
+    image_paths = [image]
+    labels = [label.strip(" ") for label in text_list.strip(" ").split("|")]
+    inputs = {
+        ModalityType.TEXT: data.load_and_transform_text(labels, device),
+        ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device),
+    }
+    with torch.no_grad():
+        embeddings = model(inputs)
+    scores = (
+        torch.softmax(
+            embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1
+        )
+        .squeeze(0)
+        .tolist()
+    )
+    score_dict = {label: score for label, score in zip(labels, scores)}
+    return score_dict
 def inference(
     elif task == "audio-text":
         result = audio_text_zeroshot(audio, text_list)
     elif task == "video-text":
+        result = doubleimage_text_zeroshot(image, image2, text_list)
     else:
         raise NotImplementedError
     return result