Spaces:
Running
Running
add our app
Browse files
app.py
CHANGED
@@ -1,147 +1,173 @@
|
|
1 |
-
|
2 |
-
import random
|
3 |
-
from typing import List, Tuple
|
4 |
|
5 |
-
import
|
6 |
import panel as pn
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
@pn.cache
|
30 |
-
def load_processor_model(
|
31 |
-
processor_name: str, model_name: str
|
32 |
-
) -> Tuple[CLIPProcessor, CLIPModel]:
|
33 |
-
processor = CLIPProcessor.from_pretrained(processor_name)
|
34 |
-
model = CLIPModel.from_pretrained(model_name)
|
35 |
-
return processor, model
|
36 |
-
|
37 |
-
|
38 |
-
async def open_image_url(image_url: str) -> Image:
|
39 |
-
async with aiohttp.ClientSession() as session:
|
40 |
-
async with session.get(image_url) as resp:
|
41 |
-
return Image.open(io.BytesIO(await resp.read()))
|
42 |
-
|
43 |
-
|
44 |
-
def get_similarity_scores(class_items: List[str], image: Image) -> List[float]:
|
45 |
-
processor, model = load_processor_model(
|
46 |
-
"openai/clip-vit-base-patch32", "openai/clip-vit-base-patch32"
|
47 |
)
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
)
|
53 |
-
outputs = model(**inputs)
|
54 |
-
logits_per_image = outputs.logits_per_image
|
55 |
-
class_likelihoods = logits_per_image.softmax(dim=1).detach().numpy()
|
56 |
-
return class_likelihoods[0]
|
57 |
-
|
58 |
-
|
59 |
-
async def process_inputs(class_names: List[str], image_url: str):
|
60 |
-
"""
|
61 |
-
High level function that takes in the user inputs and returns the
|
62 |
-
classification results as panel objects.
|
63 |
-
"""
|
64 |
-
try:
|
65 |
-
main.disabled = True
|
66 |
-
if not image_url:
|
67 |
-
yield "##### β οΈ Provide an image URL"
|
68 |
-
return
|
69 |
-
|
70 |
-
yield "##### β Fetching image and running model..."
|
71 |
-
try:
|
72 |
-
pil_img = await open_image_url(image_url)
|
73 |
-
img = pn.pane.Image(pil_img, height=400, align="center")
|
74 |
-
except Exception as e:
|
75 |
-
yield f"##### π Something went wrong, please try a different URL!"
|
76 |
-
return
|
77 |
-
|
78 |
-
class_items = class_names.split(",")
|
79 |
-
class_likelihoods = get_similarity_scores(class_items, pil_img)
|
80 |
-
|
81 |
-
# build the results column
|
82 |
-
results = pn.Column("##### π Here are the results!", img)
|
83 |
-
|
84 |
-
for class_item, class_likelihood in zip(class_items, class_likelihoods):
|
85 |
-
row_label = pn.widgets.StaticText(
|
86 |
-
name=class_item.strip(), value=f"{class_likelihood:.2%}", align="center"
|
87 |
-
)
|
88 |
-
row_bar = pn.indicators.Progress(
|
89 |
-
value=int(class_likelihood * 100),
|
90 |
-
sizing_mode="stretch_width",
|
91 |
-
bar_color="secondary",
|
92 |
-
margin=(0, 10),
|
93 |
-
design=pn.theme.Material,
|
94 |
-
)
|
95 |
-
results.append(pn.Column(row_label, row_bar))
|
96 |
-
yield results
|
97 |
-
finally:
|
98 |
-
main.disabled = False
|
99 |
-
|
100 |
-
|
101 |
-
# create widgets
|
102 |
-
randomize_url = pn.widgets.Button(name="Randomize URL", align="end")
|
103 |
-
|
104 |
-
image_url = pn.widgets.TextInput(
|
105 |
-
name="Image URL to classify",
|
106 |
-
value=pn.bind(random_url, randomize_url),
|
107 |
-
)
|
108 |
-
class_names = pn.widgets.TextInput(
|
109 |
-
name="Comma separated class names",
|
110 |
-
placeholder="Enter possible class names, e.g. cat, dog",
|
111 |
-
value="cat, dog, parrot",
|
112 |
-
)
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
)
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
pn.
|
123 |
-
|
124 |
-
)
|
125 |
-
|
126 |
-
# add footer
|
127 |
-
footer_row = pn.Row(pn.Spacer(), align="center")
|
128 |
-
for icon, url in ICON_URLS.items():
|
129 |
-
href_button = pn.widgets.Button(icon=icon, width=35, height=35)
|
130 |
-
href_button.js_on_click(code=f"window.open('{url}')")
|
131 |
-
footer_row.append(href_button)
|
132 |
-
footer_row.append(pn.Spacer())
|
133 |
-
|
134 |
-
# create dashboard
|
135 |
-
main = pn.WidgetBox(
|
136 |
-
input_widgets,
|
137 |
-
interactive_result,
|
138 |
-
footer_row,
|
139 |
)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
main=main,
|
145 |
-
|
146 |
-
|
147 |
-
).servable(title=title)
|
|
|
1 |
+
# app_panel.py β Panel-based CGD Survey Explorer
|
|
|
|
|
2 |
|
3 |
+
import os, io, json, gc
|
4 |
import panel as pn
|
5 |
+
import pandas as pd
|
6 |
+
import boto3, torch
|
7 |
+
from sentence_transformers import SentenceTransformer, util
|
8 |
+
import psycopg2
|
9 |
+
|
10 |
+
pn.extension()
|
11 |
+
|
12 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
13 |
+
# 1) Data / Embeddings Loaders
|
14 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
15 |
+
DB_HOST = os.getenv("DB_HOST")
|
16 |
+
DB_PORT = os.getenv("DB_PORT", "5432")
|
17 |
+
DB_NAME = os.getenv("DB_NAME")
|
18 |
+
DB_USER = os.getenv("DB_USER")
|
19 |
+
DB_PASSWORD = os.getenv("DB_PASSWORD")
|
20 |
+
|
21 |
+
@pn.cache()
|
22 |
+
def get_data():
|
23 |
+
conn = psycopg2.connect(
|
24 |
+
host=DB_HOST, port=DB_PORT,
|
25 |
+
dbname=DB_NAME, user=DB_USER, password=DB_PASSWORD,
|
26 |
+
sslmode="require"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
)
|
28 |
+
df_ = pd.read_sql_query("""
|
29 |
+
SELECT id, country, year, section,
|
30 |
+
question_code, question_text,
|
31 |
+
answer_code, answer_text
|
32 |
+
FROM survey_info;
|
33 |
+
""", conn)
|
34 |
+
conn.close()
|
35 |
+
return df_
|
36 |
+
|
37 |
+
df = get_data()
|
38 |
+
row_lookup = {row.id: i for i, row in df.iterrows()}
|
39 |
+
|
40 |
+
@pn.cache()
|
41 |
+
def load_embeddings():
|
42 |
+
BUCKET, KEY = "cgd-embeddings-bucket", "survey_info_embeddings.pt"
|
43 |
+
buf = io.BytesIO()
|
44 |
+
boto3.client("s3").download_fileobj(BUCKET, KEY, buf)
|
45 |
+
buf.seek(0)
|
46 |
+
ckpt = torch.load(buf, map_location="cpu")
|
47 |
+
buf.close(); gc.collect()
|
48 |
+
return ckpt["ids"], ckpt["embeddings"]
|
49 |
+
|
50 |
+
ids_list, emb_tensor = load_embeddings()
|
51 |
+
|
52 |
+
@pn.cache()
|
53 |
+
def get_st_model():
|
54 |
+
return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device="cpu")
|
55 |
+
|
56 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
57 |
+
# 2) Widgets
|
58 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
59 |
+
country_opts = sorted(df["country"].dropna().unique())
|
60 |
+
year_opts = sorted(df["year"].dropna().unique())
|
61 |
+
|
62 |
+
w_countries = pn.widgets.MultiSelect(name="Countries", options=country_opts)
|
63 |
+
w_years = pn.widgets.MultiSelect(name="Years", options=year_opts)
|
64 |
+
w_keyword = pn.widgets.TextInput(name="Keyword Search", placeholder="Search questions or answers")
|
65 |
+
w_group = pn.widgets.Checkbox(name="Group by Question Text", value=False)
|
66 |
+
|
67 |
+
# Semantic search
|
68 |
+
w_semquery = pn.widgets.TextInput(name="Semantic Query")
|
69 |
+
w_search_button = pn.widgets.Button(name="Search", button_type="primary", disabled=False)
|
70 |
+
|
71 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
72 |
+
# 3) Filtering Logic
|
73 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
74 |
+
@pn.depends(w_countries, w_years, w_keyword, w_group)
|
75 |
+
def keyword_filter(countries, years, keyword, group):
|
76 |
+
filt = df.copy()
|
77 |
+
if countries:
|
78 |
+
filt = filt[filt["country"].isin(countries)]
|
79 |
+
if years:
|
80 |
+
filt = filt[filt["year"].isin(years)]
|
81 |
+
if keyword:
|
82 |
+
filt = filt[
|
83 |
+
filt["question_text"].str.contains(keyword, case=False, na=False) |
|
84 |
+
filt["answer_text"].str.contains(keyword, case=False, na=False) |
|
85 |
+
filt["question_code"].astype(str).str.contains(keyword, case=False, na=False)
|
86 |
+
]
|
87 |
+
|
88 |
+
if group:
|
89 |
+
grouped = (
|
90 |
+
filt.groupby("question_text")
|
91 |
+
.agg({
|
92 |
+
"country": lambda x: sorted(set(x)),
|
93 |
+
"year": lambda x: sorted(set(x)),
|
94 |
+
"answer_text": lambda x: list(x)[:3]
|
95 |
+
})
|
96 |
+
.reset_index()
|
97 |
+
.rename(columns={
|
98 |
+
"country": "Countries",
|
99 |
+
"year": "Years",
|
100 |
+
"answer_text": "Sample Answers"
|
101 |
+
})
|
102 |
+
)
|
103 |
+
return pn.pane.DataFrame(grouped, sizing_mode="stretch_width", height=400)
|
104 |
+
|
105 |
+
return pn.pane.DataFrame(
|
106 |
+
filt[["country", "year", "question_text", "answer_text"]],
|
107 |
+
sizing_mode="stretch_width", height=400
|
108 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
111 |
+
# 4) Semantic Search Callback
|
112 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
113 |
+
def semantic_search(event=None):
|
114 |
+
query = w_semquery.value.strip()
|
115 |
+
if not query:
|
116 |
+
return
|
117 |
+
|
118 |
+
model = get_st_model()
|
119 |
+
q_vec = model.encode(query, convert_to_tensor=True, device="cpu").cpu()
|
120 |
+
sims = util.cos_sim(q_vec, emb_tensor)[0]
|
121 |
+
top_vals, top_idx = torch.topk(sims, k=50)
|
122 |
+
|
123 |
+
sem_ids = [ids_list[i] for i in top_idx.tolist()]
|
124 |
+
sem_rows = df.loc[df["id"].isin(sem_ids)].copy()
|
125 |
+
score_map = dict(zip(sem_ids, top_vals.tolist()))
|
126 |
+
sem_rows["Score"] = sem_rows["id"].map(score_map)
|
127 |
+
sem_rows = sem_rows.sort_values("Score", ascending=False)
|
128 |
+
|
129 |
+
# Get keyword-filtered data
|
130 |
+
keyword_df = keyword_filter(
|
131 |
+
w_countries.value,
|
132 |
+
w_years.value,
|
133 |
+
w_keyword.value,
|
134 |
+
False
|
135 |
+
).object
|
136 |
+
|
137 |
+
remainder = keyword_df.loc[~keyword_df["id"].isin(sem_ids)].copy()
|
138 |
+
remainder["Score"] = ""
|
139 |
+
|
140 |
+
combined = pd.concat([sem_rows, remainder], ignore_index=True)
|
141 |
+
|
142 |
+
result_pane.object = combined[["Score", "country", "year", "question_text", "answer_text"]]
|
143 |
+
|
144 |
+
w_search_button.on_click(semantic_search)
|
145 |
+
|
146 |
+
result_pane = pn.pane.DataFrame(height=500, sizing_mode="stretch_width")
|
147 |
+
|
148 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
149 |
+
# 5) Layout
|
150 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
151 |
+
sidebar = pn.Column(
|
152 |
+
"## π Filter Questions",
|
153 |
+
w_countries, w_years, w_keyword, w_group,
|
154 |
+
pn.Spacer(height=20),
|
155 |
+
"## π§ Semantic Search",
|
156 |
+
w_semquery, w_search_button,
|
157 |
+
width=300
|
158 |
)
|
159 |
|
160 |
+
main = pn.Column(
|
161 |
+
pn.pane.Markdown("## π CGD Survey Explorer"),
|
162 |
+
pn.Tabs(
|
163 |
+
("Filtered Results", keyword_filter),
|
164 |
+
("Semantic Search Results", result_pane),
|
165 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
)
|
167 |
|
168 |
+
pn.template.FastListTemplate(
|
169 |
+
title="CGD Survey Explorer",
|
170 |
+
sidebar=sidebar,
|
171 |
main=main,
|
172 |
+
theme_toggle=True,
|
173 |
+
).servable()
|
|