Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,8 @@ from dotenv import load_dotenv
|
|
15 |
import helpers
|
16 |
from omeka_s_api_client import OmekaSClient, OmekaSClientError
|
17 |
from lancedb_client import LanceDBManager
|
|
|
|
|
18 |
|
19 |
# Load .env for credentials
|
20 |
load_dotenv()
|
@@ -52,7 +54,7 @@ app.layout = html.Div([
|
|
52 |
# Header
|
53 |
dbc.NavbarSimple(
|
54 |
children=[],
|
55 |
-
brand="Omeka S Computer Vision
|
56 |
brand_href="/",
|
57 |
color="light",
|
58 |
dark=False,
|
@@ -70,8 +72,8 @@ app.layout = html.Div([
|
|
70 |
|
71 |
# Tabs
|
72 |
dcc.Tabs(id="data-tabs", value="api", children=[
|
73 |
-
dcc.Tab(label="
|
74 |
-
dcc.Tab(label="
|
75 |
]),
|
76 |
|
77 |
html.Div(id="data-tab-content"),
|
@@ -204,36 +206,6 @@ app.layout = html.Div([
|
|
204 |
html.Div(id="status"),
|
205 |
dcc.Store(id="omeka-client-config", storage_type="session"),
|
206 |
]),
|
207 |
-
|
208 |
-
# Footer
|
209 |
-
html.Footer([
|
210 |
-
html.Hr(),
|
211 |
-
dbc.Container([
|
212 |
-
dbc.Row([
|
213 |
-
dbc.Col([
|
214 |
-
html.Img(src="SmartBibl.IA_Solutions.png", height="50"),
|
215 |
-
html.Small([
|
216 |
-
html.Br(),
|
217 |
-
html.A("Géraldine Geoffroy", href="mailto:[email protected]", className="text-muted")
|
218 |
-
])
|
219 |
-
]),
|
220 |
-
dbc.Col([
|
221 |
-
html.H5("Code source"),
|
222 |
-
html.Ul([
|
223 |
-
html.Li(html.A("Github", href="https://github.com/gegedenice/openalex-explorer", className="text-muted", target="_blank"))
|
224 |
-
])
|
225 |
-
]),
|
226 |
-
dbc.Col([
|
227 |
-
html.H5("Ressources"),
|
228 |
-
html.Ul([
|
229 |
-
html.Li(html.A("Nomic Atlas", href="https://atlas.nomic.ai/", target="_blank", className="text-muted")),
|
230 |
-
html.Li(html.A("Model nomic-embed-text-v1.5", href="https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", target="_blank", className="text-muted")),
|
231 |
-
html.Li(html.A("Model nomic-embed-vision-v1.5", href="https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5", target="_blank", className="text-muted"))
|
232 |
-
])
|
233 |
-
])
|
234 |
-
])
|
235 |
-
])
|
236 |
-
], className="mt-5 p-3 bg-light border-top")
|
237 |
])
|
238 |
|
239 |
# -------------------- UI Callbacks --------------------
|
@@ -248,7 +220,7 @@ def render_tab_content(tab):
|
|
248 |
if tab == "omeka":
|
249 |
return html.Div([
|
250 |
html.Div([
|
251 |
-
html.H5("
|
252 |
# API URL input with full width
|
253 |
dbc.InputGroup([
|
254 |
dbc.Input(
|
@@ -308,7 +280,7 @@ def render_tab_content(tab):
|
|
308 |
], className="border rounded bg-white shadow-sm")
|
309 |
elif tab == "lance":
|
310 |
return html.Div([
|
311 |
-
html.H5("
|
312 |
dbc.Button("Load LanceDB tables", id="load-tables", color="link", size="sm", className="mt-2"),
|
313 |
dcc.Dropdown(id="db-tables-dropdown", placeholder="Select an existing table"),
|
314 |
dbc.Button("Display Table", id="load-data-db", color="success", size="sm", className="mt-2"),
|
@@ -409,14 +381,22 @@ def handle_omeka_data(n_clicks, item_set_id, client_config, table_name):
|
|
409 |
|
410 |
text_embed = helpers.generate_text_embed(df['text'].tolist())
|
411 |
img_embed = helpers.generate_img_embed(df['images_urls'].tolist())
|
412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
df["embeddings"] = embeddings.tolist()
|
414 |
|
415 |
-
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1,
|
416 |
umap_embeddings = reducer.fit_transform(embeddings)
|
417 |
df["umap_embeddings"] = umap_embeddings.tolist()
|
418 |
|
419 |
-
clusterer = hdbscan.HDBSCAN(min_cluster_size=10)
|
420 |
cluster_labels = clusterer.fit_predict(umap_embeddings)
|
421 |
df["Cluster"] = cluster_labels
|
422 |
|
@@ -708,7 +688,7 @@ def create_umap_plot(df):
|
|
708 |
paper_bgcolor='white',
|
709 |
height=700,
|
710 |
margin=dict(t=30, b=30, l=30, r=30),
|
711 |
-
showlegend=
|
712 |
legend=dict(
|
713 |
yanchor="top",
|
714 |
y=0.99,
|
|
|
15 |
import helpers
|
16 |
from omeka_s_api_client import OmekaSClient, OmekaSClientError
|
17 |
from lancedb_client import LanceDBManager
|
18 |
+
import torch
|
19 |
+
import torch.nn.functional as F
|
20 |
|
21 |
# Load .env for credentials
|
22 |
load_dotenv()
|
|
|
54 |
# Header
|
55 |
dbc.NavbarSimple(
|
56 |
children=[],
|
57 |
+
brand="Omeka S Computer Vision Assistant",
|
58 |
brand_href="/",
|
59 |
color="light",
|
60 |
dark=False,
|
|
|
72 |
|
73 |
# Tabs
|
74 |
dcc.Tabs(id="data-tabs", value="api", children=[
|
75 |
+
dcc.Tab(label="From Omeka S", value="omeka"),
|
76 |
+
dcc.Tab(label="From LanceDB", value="lance")
|
77 |
]),
|
78 |
|
79 |
html.Div(id="data-tab-content"),
|
|
|
206 |
html.Div(id="status"),
|
207 |
dcc.Store(id="omeka-client-config", storage_type="session"),
|
208 |
]),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
])
|
210 |
|
211 |
# -------------------- UI Callbacks --------------------
|
|
|
220 |
if tab == "omeka":
|
221 |
return html.Div([
|
222 |
html.Div([
|
223 |
+
html.H5("From Omeka S", className="mb-3"),
|
224 |
# API URL input with full width
|
225 |
dbc.InputGroup([
|
226 |
dbc.Input(
|
|
|
280 |
], className="border rounded bg-white shadow-sm")
|
281 |
elif tab == "lance":
|
282 |
return html.Div([
|
283 |
+
html.H5("From LanceDB"),
|
284 |
dbc.Button("Load LanceDB tables", id="load-tables", color="link", size="sm", className="mt-2"),
|
285 |
dcc.Dropdown(id="db-tables-dropdown", placeholder="Select an existing table"),
|
286 |
dbc.Button("Display Table", id="load-data-db", color="success", size="sm", className="mt-2"),
|
|
|
381 |
|
382 |
text_embed = helpers.generate_text_embed(df['text'].tolist())
|
383 |
img_embed = helpers.generate_img_embed(df['images_urls'].tolist())
|
384 |
+
# Convert to tensors if needed
|
385 |
+
text_tensor = torch.tensor(text_embed)
|
386 |
+
img_tensor = torch.tensor(img_embed)
|
387 |
+
|
388 |
+
# Average then normalize
|
389 |
+
combined = (0.7 * text_tensor + 0.3 * img_tensor)
|
390 |
+
normalized_embeddings = F.normalize(combined, p=2, dim=1)
|
391 |
+
|
392 |
+
embeddings = normalized_embeddings.numpy()
|
393 |
df["embeddings"] = embeddings.tolist()
|
394 |
|
395 |
+
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, metric="cosine")
|
396 |
umap_embeddings = reducer.fit_transform(embeddings)
|
397 |
df["umap_embeddings"] = umap_embeddings.tolist()
|
398 |
|
399 |
+
clusterer = hdbscan.HDBSCAN(min_cluster_size=10, metric="euclidean")
|
400 |
cluster_labels = clusterer.fit_predict(umap_embeddings)
|
401 |
df["Cluster"] = cluster_labels
|
402 |
|
|
|
688 |
paper_bgcolor='white',
|
689 |
height=700,
|
690 |
margin=dict(t=30, b=30, l=30, r=30),
|
691 |
+
showlegend=True,
|
692 |
legend=dict(
|
693 |
yanchor="top",
|
694 |
y=0.99,
|