Spaces:

flax-community
/

koclip

Build error

App Files Files Community

jaketae commited on Jul 18, 2021

Commit

2cf3514

1 Parent(s): 9928549

style: run linter

Browse files

Files changed (6) hide show

app.py +0 -1
embed_captions.py +4 -2
embed_images.py +14 -9
image2text.py +4 -2
text2image.py +10 -8
utils.py +12 -7

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import streamlit as st
 import image2text
 import text2image
 PAGES = {"Text to Image": text2image, "Image to Text": image2text}
 st.sidebar.title("Navigation")

 import image2text
 import text2image
 PAGES = {"Text to Image": text2image, "Image to Text": image2text}
 st.sidebar.title("Navigation")

embed_captions.py CHANGED Viewed

@@ -1,13 +1,15 @@
-import csv
 import argparse
 from utils import load_model
 def main(args):
     caption_txt_path = args.text_path
     f = open(caption_txt_path)
     captions = [sent.strip() for sent in f.readlines()
-    for model_name in ["koclip-base", "koclip-large"]:
         model, processor = load_model(f"koclip/{model_name}")
         captions_processed = [processor(sent,images=None,return_tensors='jax') for sent in captions]
         vec = [np.asarray(model.get_text_features(**c)) for c in captions_processed]

 import argparse
+import csv
 from utils import load_model
 def main(args):
     caption_txt_path = args.text_path
     f = open(caption_txt_path)
     captions = [sent.strip() for sent in f.readlines()
+    for model_name in ["koclip-base", "koclip-large"]:
         model, processor = load_model(f"koclip/{model_name}")
         captions_processed = [processor(sent,images=None,return_tensors='jax') for sent in captions]
         vec = [np.asarray(model.get_text_features(**c)) for c in captions_processed]

embed_images.py CHANGED Viewed

@@ -2,20 +2,19 @@ import argparse
 import csv
 import os
-from PIL import Image
-from utils import load_model
 import jax.numpy as jnp
 from jax import jit
 from tqdm import tqdm
 def main(args):
     root = args.image_path
     files = list(os.listdir(root))
     for f in files:
-        assert(f[-4:] == ".jpg")
     for model_name in ["koclip-base", "koclip-large"]:
         model, processor = load_model(f"koclip/{model_name}")
         with tqdm(total=len(files)) as pbar:
@@ -24,22 +23,28 @@ def main(args):
                 image_ids = []
                 for idx in range(counter, min(len(files), counter + args.batch_size)):
                     file_ = files[idx]
-                    image = Image.open(os.path.join(root, file_)).convert('RGB')
                     images.append(image)
                     image_ids.append(file_)
                 pbar.update(args.batch_size)
                 try:
-                    inputs = processor(text=[""], images=images, return_tensors="jax", padding=True)
                 except:
                     print(image_ids)
                     break
-                inputs['pixel_values'] = jnp.transpose(inputs['pixel_values'], axes=[0, 2, 3, 1])
                 features = model(**inputs).image_embeds
                 with open(os.path.join(args.out_path, f"{model_name}.tsv"), "a+") as f:
                     writer = csv.writer(f, delimiter="\t")
                     for image_id, feature in zip(image_ids, features):
-                        writer.writerow([image_id, ",".join(map(lambda x: str(x), feature))])
 if __name__ == "__main__":

 import csv
 import os
 import jax.numpy as jnp
 from jax import jit
+from PIL import Image
 from tqdm import tqdm
+from utils import load_model
 def main(args):
     root = args.image_path
     files = list(os.listdir(root))
     for f in files:
+        assert f[-4:] == ".jpg"
     for model_name in ["koclip-base", "koclip-large"]:
         model, processor = load_model(f"koclip/{model_name}")
         with tqdm(total=len(files)) as pbar:
                 image_ids = []
                 for idx in range(counter, min(len(files), counter + args.batch_size)):
                     file_ = files[idx]
+                    image = Image.open(os.path.join(root, file_)).convert("RGB")
                     images.append(image)
                     image_ids.append(file_)
                 pbar.update(args.batch_size)
                 try:
+                    inputs = processor(
+                        text=[""], images=images, return_tensors="jax", padding=True
+                    )
                 except:
                     print(image_ids)
                     break
+                inputs["pixel_values"] = jnp.transpose(
+                    inputs["pixel_values"], axes=[0, 2, 3, 1]
+                )
                 features = model(**inputs).image_embeds
                 with open(os.path.join(args.out_path, f"{model_name}.tsv"), "a+") as f:
                     writer = csv.writer(f, delimiter="\t")
                     for image_id, feature in zip(image_ids, features):
+                        writer.writerow(
+                            [image_id, ",".join(map(lambda x: str(x), feature))]
+                        )
 if __name__ == "__main__":

image2text.py CHANGED Viewed

@@ -7,6 +7,8 @@ def app(model_name):
     model, processor = load_model(model_name)
     st.title("Image to Text")
-    st.markdown("""
         Some text goes in here.
-    """)

     model, processor = load_model(model_name)
     st.title("Image to Text")
+    st.markdown(
+        """
         Some text goes in here.
+    """
+    )

text2image.py CHANGED Viewed

@@ -1,21 +1,22 @@
 import os
 import streamlit as st
-from utils import load_model, load_index
-import numpy as np
-import matplotlib.pyplot as plt
 def app(model_name):
-    images_directory = 'images/val2017'
-    features_directory = f'features/val2017/{model_name}.tsv'
     files, index = load_index(features_directory)
-    model, processor = load_model(f'koclip/{model_name}')
     st.title("Text to Image Search Engine")
-    st.markdown("""
         This demonstration explores capability of KoCLIP as a Korean-language Image search engine. Embeddings for each of
         5000 images from [MSCOCO](https://cocodataset.org/#home) 2017 validation set was generated using trained KoCLIP
         vision model. They are ranked based on cosine similarity distance from input Text query embeddings and top 10 images
@@ -27,7 +28,8 @@ def app(model_name):
         Larger model `koclip-large` uses `klue/roberta` as text encoder and bigger `google/vit-large-patch16-224` as image encoder.
         Example Queries : 컴퓨터하는 고양이(Cat playing on a computer), 길 위에서 달리는 자동차(Car running on the road),
-    """)
     query = st.text_input("한글 질문을 적어주세요 (Korean Text Query) :", value="아파트")
     if st.button("질문 (Query)"):

 import os
+import matplotlib.pyplot as plt
+import numpy as np
 import streamlit as st
+from utils import load_index, load_model
 def app(model_name):
+    images_directory = "images/val2017"
+    features_directory = f"features/val2017/{model_name}.tsv"
     files, index = load_index(features_directory)
+    model, processor = load_model(f"koclip/{model_name}")
     st.title("Text to Image Search Engine")
+    st.markdown(
+        """
         This demonstration explores capability of KoCLIP as a Korean-language Image search engine. Embeddings for each of
         5000 images from [MSCOCO](https://cocodataset.org/#home) 2017 validation set was generated using trained KoCLIP
         vision model. They are ranked based on cosine similarity distance from input Text query embeddings and top 10 images
         Larger model `koclip-large` uses `klue/roberta` as text encoder and bigger `google/vit-large-patch16-224` as image encoder.
         Example Queries : 컴퓨터하는 고양이(Cat playing on a computer), 길 위에서 달리는 자동차(Car running on the road),
+    """
+    )
     query = st.text_input("한글 질문을 적어주세요 (Korean Text Query) :", value="아파트")
     if st.button("질문 (Query)"):

utils.py CHANGED Viewed

@@ -1,26 +1,28 @@
 import nmslib
-import streamlit as st
-from transformers import CLIPProcessor, AutoTokenizer, ViTFeatureExtractor
 import numpy as np
 from koclip import FlaxHybridCLIP
 @st.cache(allow_output_mutation=True)
 def load_index(img_file):
     filenames, embeddings = [], []
     lines = open(img_file, "r")
     for line in lines:
-        cols = line.strip().split('\t')
         filename = cols[0]
-        embedding = np.array([float(x) for x in cols[1].split(',')])
         filenames.append(filename)
         embeddings.append(embedding)
     embeddings = np.array(embeddings)
-    index = nmslib.init(method='hnsw', space='cosinesimil')
     index.addDataPointBatch(embeddings)
-    index.createIndex({'post': 2}, print_progress=True)
     return filenames, index
 @st.cache(allow_output_mutation=True)
 def load_model(model_name="koclip/koclip-base"):
     assert model_name in {"koclip/koclip-base", "koclip/koclip-large"}
@@ -28,9 +30,12 @@ def load_model(model_name="koclip/koclip-base"):
     processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
     processor.tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
     if model_name == "koclip/koclip-large":
-        processor.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-large-patch16-224")
     return model, processor
 @st.cache(allow_output_mutation=True)
 def load_model_v2(model_name="koclip/koclip"):
     model = FlaxHybridCLIP.from_pretrained(model_name)

 import nmslib
 import numpy as np
+import streamlit as st
+from transformers import AutoTokenizer, CLIPProcessor, ViTFeatureExtractor
 from koclip import FlaxHybridCLIP
 @st.cache(allow_output_mutation=True)
 def load_index(img_file):
     filenames, embeddings = [], []
     lines = open(img_file, "r")
     for line in lines:
+        cols = line.strip().split("\t")
         filename = cols[0]
+        embedding = [float(x) for x in cols[1].split(",")]
         filenames.append(filename)
         embeddings.append(embedding)
     embeddings = np.array(embeddings)
+    index = nmslib.init(method="hnsw", space="cosinesimil")
     index.addDataPointBatch(embeddings)
+    index.createIndex({"post": 2}, print_progress=True)
     return filenames, index
 @st.cache(allow_output_mutation=True)
 def load_model(model_name="koclip/koclip-base"):
     assert model_name in {"koclip/koclip-base", "koclip/koclip-large"}
     processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
     processor.tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
     if model_name == "koclip/koclip-large":
+        processor.feature_extractor = ViTFeatureExtractor.from_pretrained(
+            "google/vit-large-patch16-224"
+        )
     return model, processor
 @st.cache(allow_output_mutation=True)
 def load_model_v2(model_name="koclip/koclip"):
     model = FlaxHybridCLIP.from_pretrained(model_name)