Spaces:

Geraldine
/

omeka-s-computer-vision

Sleeping

App Files Files Community

Geraldine commited on Apr 29

Commit

ff06935

verified ·

1 Parent(s): 5bbaba7

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -39

app.py CHANGED Viewed

@@ -15,6 +15,8 @@ from dotenv import load_dotenv
 import helpers
 from omeka_s_api_client import OmekaSClient, OmekaSClientError
 from lancedb_client import LanceDBManager
 # Load .env for credentials
 load_dotenv()
@@ -52,7 +54,7 @@ app.layout = html.Div([
     # Header
     dbc.NavbarSimple(
         children=[],
-        brand="Omeka S Computer Vision Asistant",
         brand_href="/",
         color="light",
         dark=False,
@@ -70,8 +72,8 @@ app.layout = html.Div([
                         # Tabs
                         dcc.Tabs(id="data-tabs", value="api", children=[
-                            dcc.Tab(label="🔍 From Omeka S", value="omeka"),
-                            dcc.Tab(label="📁 From LanceDB", value="lance")
                         ]),
                         html.Div(id="data-tab-content"),
@@ -204,36 +206,6 @@ app.layout = html.Div([
             html.Div(id="status"),
             dcc.Store(id="omeka-client-config", storage_type="session"),
         ]),
-    # Footer
-    html.Footer([
-        html.Hr(),
-        dbc.Container([
-            dbc.Row([
-                dbc.Col([
-                    html.Img(src="SmartBibl.IA_Solutions.png", height="50"),
-                    html.Small([
-                        html.Br(),
-                        html.A("Géraldine Geoffroy", href="mailto:[email protected]", className="text-muted")
-                    ])
-                ]),
-                dbc.Col([
-                    html.H5("Code source"),
-                    html.Ul([
-                        html.Li(html.A("Github", href="https://github.com/gegedenice/openalex-explorer", className="text-muted", target="_blank"))
-                    ])
-                ]),
-                dbc.Col([
-                    html.H5("Ressources"),
-                    html.Ul([
-                        html.Li(html.A("Nomic Atlas", href="https://atlas.nomic.ai/", target="_blank", className="text-muted")),
-                        html.Li(html.A("Model nomic-embed-text-v1.5", href="https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", target="_blank", className="text-muted")),
-                        html.Li(html.A("Model nomic-embed-vision-v1.5", href="https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5", target="_blank", className="text-muted"))
-                    ])
-                ])
-            ])
-        ])
-    ], className="mt-5 p-3 bg-light border-top")
 ])
 # -------------------- UI Callbacks --------------------
@@ -248,7 +220,7 @@ def render_tab_content(tab):
     if tab == "omeka":
         return html.Div([
             html.Div([
-                html.H5("🔍 From Omeka S", className="mb-3"),
                 # API URL input with full width
                 dbc.InputGroup([
                     dbc.Input(
@@ -308,7 +280,7 @@ def render_tab_content(tab):
         ], className="border rounded bg-white shadow-sm")
     elif tab == "lance":
         return html.Div([
-            html.H5("📁 From LanceDB"),
             dbc.Button("Load LanceDB tables", id="load-tables", color="link", size="sm", className="mt-2"),
             dcc.Dropdown(id="db-tables-dropdown", placeholder="Select an existing table"),
             dbc.Button("Display Table", id="load-data-db", color="success", size="sm", className="mt-2"),
@@ -409,14 +381,22 @@ def handle_omeka_data(n_clicks, item_set_id, client_config, table_name):
     text_embed = helpers.generate_text_embed(df['text'].tolist())
     img_embed = helpers.generate_img_embed(df['images_urls'].tolist())
-    embeddings = (text_embed + img_embed) / 2 # Average the embeddings
     df["embeddings"] = embeddings.tolist()
-    reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2, metric='cosine', random_state=42)
     umap_embeddings = reducer.fit_transform(embeddings)
     df["umap_embeddings"] = umap_embeddings.tolist()
-    clusterer = hdbscan.HDBSCAN(min_cluster_size=10)
     cluster_labels = clusterer.fit_predict(umap_embeddings)
     df["Cluster"] = cluster_labels
@@ -708,7 +688,7 @@ def create_umap_plot(df):
         paper_bgcolor='white',
         height=700,
         margin=dict(t=30, b=30, l=30, r=30),
-        showlegend=False,
         legend=dict(
             yanchor="top",
             y=0.99,

 import helpers
 from omeka_s_api_client import OmekaSClient, OmekaSClientError
 from lancedb_client import LanceDBManager
+import torch
+import torch.nn.functional as F
 # Load .env for credentials
 load_dotenv()
     # Header
     dbc.NavbarSimple(
         children=[],
+        brand="Omeka S Computer Vision Assistant",
         brand_href="/",
         color="light",
         dark=False,
                         # Tabs
                         dcc.Tabs(id="data-tabs", value="api", children=[
+                            dcc.Tab(label="From Omeka S", value="omeka"),
+                            dcc.Tab(label="From LanceDB", value="lance")
                         ]),
                         html.Div(id="data-tab-content"),
             html.Div(id="status"),
             dcc.Store(id="omeka-client-config", storage_type="session"),
         ]),
 ])
 # -------------------- UI Callbacks --------------------
     if tab == "omeka":
         return html.Div([
             html.Div([
+                html.H5("From Omeka S", className="mb-3"),
                 # API URL input with full width
                 dbc.InputGroup([
                     dbc.Input(
         ], className="border rounded bg-white shadow-sm")
     elif tab == "lance":
         return html.Div([
+            html.H5("From LanceDB"),
             dbc.Button("Load LanceDB tables", id="load-tables", color="link", size="sm", className="mt-2"),
             dcc.Dropdown(id="db-tables-dropdown", placeholder="Select an existing table"),
             dbc.Button("Display Table", id="load-data-db", color="success", size="sm", className="mt-2"),
     text_embed = helpers.generate_text_embed(df['text'].tolist())
     img_embed = helpers.generate_img_embed(df['images_urls'].tolist())
+    # Convert to tensors if needed
+    text_tensor = torch.tensor(text_embed)
+    img_tensor = torch.tensor(img_embed)
+    # Average then normalize
+    combined = (0.7 * text_tensor + 0.3 * img_tensor)
+    normalized_embeddings = F.normalize(combined, p=2, dim=1)
+    embeddings = normalized_embeddings.numpy()
     df["embeddings"] = embeddings.tolist()
+    reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, metric="cosine")
     umap_embeddings = reducer.fit_transform(embeddings)
     df["umap_embeddings"] = umap_embeddings.tolist()
+    clusterer = hdbscan.HDBSCAN(min_cluster_size=10, metric="euclidean")
     cluster_labels = clusterer.fit_predict(umap_embeddings)
     df["Cluster"] = cluster_labels
         paper_bgcolor='white',
         height=700,
         margin=dict(t=30, b=30, l=30, r=30),
+        showlegend=True,
         legend=dict(
             yanchor="top",
             y=0.99,