Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -40,7 +40,7 @@ if not precomputed_results:
|
|
40 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
41 |
model = model.to(device)
|
42 |
|
43 |
-
def change_language(lang
|
44 |
# compute text embeddings
|
45 |
labels = babel_imagenet[lang][1]
|
46 |
class_order = list(range(len(labels)))
|
@@ -78,7 +78,7 @@ def select(idx, lang, choice, correct, model_choice, player_score, clip_score, c
|
|
78 |
|
79 |
return correct_text, player_score_text, clip_score_text, player_score, clip_score
|
80 |
|
81 |
-
def prepare(raw_idx, lang, text_embeddings, class_order
|
82 |
# prepared next question, loads image, and computes choices
|
83 |
|
84 |
raw_idx = (raw_idx+1) % len(babel_imagenet[lang][0])
|
@@ -94,8 +94,7 @@ def prepare(raw_idx, lang, text_embeddings, class_order, randomize_images):
|
|
94 |
class_idx = lang_class_idxs[idx]
|
95 |
|
96 |
img_idx = 0
|
97 |
-
|
98 |
-
img_idx = np.random.choice(min(len(babelnet_images[class_idx]), max_image_choices))
|
99 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
100 |
class_labels = babel_imagenet[lang][1] if lang != "EN" else openai_en_classes
|
101 |
|
@@ -107,7 +106,7 @@ def prepare(raw_idx, lang, text_embeddings, class_order, randomize_images):
|
|
107 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
108 |
except:
|
109 |
gr.Warning("There is a problem with the next class. Skipping it.")
|
110 |
-
return prepare(raw_idx, lang, text_embeddings, class_order
|
111 |
|
112 |
similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
113 |
choices = np.argsort(similarity)[-4:].tolist()
|
@@ -135,7 +134,7 @@ def prepare(raw_idx, lang, text_embeddings, class_order, randomize_images):
|
|
135 |
return next_radio, next_image, raw_idx, correct_choice, model_choice, choice_values
|
136 |
|
137 |
|
138 |
-
def reroll(raw_idx, lang, text_embeddings, class_order
|
139 |
# prepared next question, loads image, and computes choices
|
140 |
|
141 |
idx = class_order[raw_idx]
|
@@ -143,8 +142,7 @@ def reroll(raw_idx, lang, text_embeddings, class_order, randomize_images):
|
|
143 |
class_idx = lang_class_idxs[idx]
|
144 |
|
145 |
img_idx = 0
|
146 |
-
|
147 |
-
img_idx = np.random.choice(min(len(babelnet_images[class_idx]), max_image_choices))
|
148 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
149 |
class_labels = babel_imagenet[lang][1] if lang != "EN" else openai_en_classes
|
150 |
|
@@ -156,7 +154,7 @@ def reroll(raw_idx, lang, text_embeddings, class_order, randomize_images):
|
|
156 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
157 |
except:
|
158 |
gr.Warning("There is a problem with the next class. Skipping it.")
|
159 |
-
return prepare(raw_idx, lang, text_embeddings, class_order
|
160 |
|
161 |
similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
162 |
choices = np.argsort(similarity)[-4:].tolist()
|
@@ -199,16 +197,15 @@ with (gr.Blocks(title="Babel-ImageNet Quiz") as demo):
|
|
199 |
|
200 |
# Title Area
|
201 |
gr.Markdown("""
|
202 |
-
#
|
203 |
|
204 |
-
<small>
|
205 |
|
206 |
""")
|
207 |
# language select dropdown
|
208 |
with gr.Row():
|
209 |
language_select = gr.Dropdown(choices=main_language_values, value="EN", interactive=True, label="Select your language:")
|
210 |
-
|
211 |
-
start_btn = gr.Button(value="Start", variant="primary")
|
212 |
|
213 |
# quiz area
|
214 |
with gr.Row():
|
@@ -217,9 +214,9 @@ with (gr.Blocks(title="Babel-ImageNet Quiz") as demo):
|
|
217 |
width=IMG_WIDTH, height=IMG_WIDTH)
|
218 |
|
219 |
with gr.Column(scale=1):
|
220 |
-
options = gr.Radio(choices=["
|
221 |
# with gr.Row():
|
222 |
-
correct_text = gr.Text("
|
223 |
player_score_text = gr.Text(f"Player score: 0")
|
224 |
clip_score_text = gr.Text(f"mSigLIP score: 0")
|
225 |
reroll_btn = gr.Button(value="Reroll the image (for bad images or errors)")
|
@@ -231,18 +228,18 @@ with (gr.Blocks(title="Babel-ImageNet Quiz") as demo):
|
|
231 |
inputs=[class_idx, language_select, options, correct_choice, model_choice, player_score, clip_score, choices],
|
232 |
outputs=[correct_text, player_score_text, clip_score_text, player_score, clip_score]
|
233 |
).then(fn=prepare,
|
234 |
-
inputs=[class_idx, language_select, text_embeddings, class_order
|
235 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices])
|
236 |
|
237 |
start_btn.click(fn=change_language,
|
238 |
-
inputs=[language_select
|
239 |
outputs=[text_embeddings, class_idx, class_order, correct_text, player_score_text, clip_score_text, player_score, clip_score]
|
240 |
).then(fn=prepare,
|
241 |
-
inputs=[class_idx, language_select, text_embeddings, class_order
|
242 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices])
|
243 |
|
244 |
reroll_btn.click(fn=reroll,
|
245 |
-
inputs=[class_idx, language_select, text_embeddings, class_order
|
246 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices])
|
247 |
|
248 |
|
|
|
40 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
41 |
model = model.to(device)
|
42 |
|
43 |
+
def change_language(lang):
|
44 |
# compute text embeddings
|
45 |
labels = babel_imagenet[lang][1]
|
46 |
class_order = list(range(len(labels)))
|
|
|
78 |
|
79 |
return correct_text, player_score_text, clip_score_text, player_score, clip_score
|
80 |
|
81 |
+
def prepare(raw_idx, lang, text_embeddings, class_order):
|
82 |
# prepared next question, loads image, and computes choices
|
83 |
|
84 |
raw_idx = (raw_idx+1) % len(babel_imagenet[lang][0])
|
|
|
94 |
class_idx = lang_class_idxs[idx]
|
95 |
|
96 |
img_idx = 0
|
97 |
+
img_idx = np.random.choice(min(len(babelnet_images[class_idx]), max_image_choices))
|
|
|
98 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
99 |
class_labels = babel_imagenet[lang][1] if lang != "EN" else openai_en_classes
|
100 |
|
|
|
106 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
107 |
except:
|
108 |
gr.Warning("There is a problem with the next class. Skipping it.")
|
109 |
+
return prepare(raw_idx, lang, text_embeddings, class_order)
|
110 |
|
111 |
similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
112 |
choices = np.argsort(similarity)[-4:].tolist()
|
|
|
134 |
return next_radio, next_image, raw_idx, correct_choice, model_choice, choice_values
|
135 |
|
136 |
|
137 |
+
def reroll(raw_idx, lang, text_embeddings, class_order):
|
138 |
# prepared next question, loads image, and computes choices
|
139 |
|
140 |
idx = class_order[raw_idx]
|
|
|
142 |
class_idx = lang_class_idxs[idx]
|
143 |
|
144 |
img_idx = 0
|
145 |
+
img_idx = np.random.choice(min(len(babelnet_images[class_idx]), max_image_choices))
|
|
|
146 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
147 |
class_labels = babel_imagenet[lang][1] if lang != "EN" else openai_en_classes
|
148 |
|
|
|
154 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
155 |
except:
|
156 |
gr.Warning("There is a problem with the next class. Skipping it.")
|
157 |
+
return prepare(raw_idx, lang, text_embeddings, class_order)
|
158 |
|
159 |
similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
160 |
choices = np.argsort(similarity)[-4:].tolist()
|
|
|
197 |
|
198 |
# Title Area
|
199 |
gr.Markdown("""
|
200 |
+
# ¿Eres más lista 🤓 que CLIP 🤖?
|
201 |
|
202 |
+
<small>por Gregor Geigle, WüNLP & Computer Vision Lab, University of Würzburg y Both.rocks!</small>
|
203 |
|
204 |
""")
|
205 |
# language select dropdown
|
206 |
with gr.Row():
|
207 |
language_select = gr.Dropdown(choices=main_language_values, value="EN", interactive=True, label="Select your language:")
|
208 |
+
start_btn = gr.Button(value="¡Adelante!", variant="primary")
|
|
|
209 |
|
210 |
# quiz area
|
211 |
with gr.Row():
|
|
|
214 |
width=IMG_WIDTH, height=IMG_WIDTH)
|
215 |
|
216 |
with gr.Column(scale=1):
|
217 |
+
options = gr.Radio(choices=["", "", "", ""], interactive=False, label="Pulsa ¡Adelante! para empezar.")
|
218 |
# with gr.Row():
|
219 |
+
correct_text = gr.Text("Pulsa ¡Adelante! para empezar.")
|
220 |
player_score_text = gr.Text(f"Player score: 0")
|
221 |
clip_score_text = gr.Text(f"mSigLIP score: 0")
|
222 |
reroll_btn = gr.Button(value="Reroll the image (for bad images or errors)")
|
|
|
228 |
inputs=[class_idx, language_select, options, correct_choice, model_choice, player_score, clip_score, choices],
|
229 |
outputs=[correct_text, player_score_text, clip_score_text, player_score, clip_score]
|
230 |
).then(fn=prepare,
|
231 |
+
inputs=[class_idx, language_select, text_embeddings, class_order],
|
232 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices])
|
233 |
|
234 |
start_btn.click(fn=change_language,
|
235 |
+
inputs=[language_select],
|
236 |
outputs=[text_embeddings, class_idx, class_order, correct_text, player_score_text, clip_score_text, player_score, clip_score]
|
237 |
).then(fn=prepare,
|
238 |
+
inputs=[class_idx, language_select, text_embeddings, class_order],
|
239 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices])
|
240 |
|
241 |
reroll_btn.click(fn=reroll,
|
242 |
+
inputs=[class_idx, language_select, text_embeddings, class_order],
|
243 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices])
|
244 |
|
245 |
|