reddgr commited on
Commit
e6c7897
·
1 Parent(s): ddea0e0

Add application file

Browse files
Files changed (1) hide show
  1. app.py +517 -0
app.py ADDED
@@ -0,0 +1,517 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ start_time = time.time()
3
+ from pathlib import Path
4
+ from typing import Tuple
5
+ import pandas as pd
6
+ import gradio as gr
7
+ import json
8
+
9
+ import duckdb
10
+ from sentence_transformers import SentenceTransformer
11
+ from datasets import load_dataset
12
+
13
+ USE_DOTENV = False
14
+
15
+ ROOT = Path(__file__).parent
16
+
17
+ JSON_PATH = ROOT / "json"
18
+ # DATASET_PATH = ROOT / "pkl" / "app_dataset.pkl"
19
+ DOTENV_PATH = ROOT.parent.parent / "apis" / ".env"
20
+ # DUCKDB_PATH = ROOT / "db" / "sss_vectordb.duckdb"
21
+
22
+ from src import front_dataset_handler as fdh, app_utils as utils, semantic_search as ss, env_options
23
+ tokens = env_options.check_env(use_dotenv=USE_DOTENV, dotenv_path=DOTENV_PATH, env_tokens = ["HF_TOKEN"])
24
+ print(f"Libraries loaded. {time.time() - start_time:.2f} seconds.")
25
+ # Carga de modelo de embeddings y conexión a DuckDB
26
+ emb_model = SentenceTransformer("FinLang/finance-embeddings-investopedia", token = tokens.get("HF_TOKEN"))
27
+ # con = duckdb.connect(DUCKDB_PATH)
28
+ print(f"Model loaded. {time.time() - start_time:.2f} seconds.")
29
+ #### CONEXIÓN DUCKDB A HUGGING FACE HUB ####
30
+ print("Initializing DuckDB connection...")
31
+ con = duckdb.connect()
32
+ hf_token = tokens.get("HF_TOKEN")
33
+ ##################################
34
+ masked_hf_token = hf_token[:4] + "*" * (len(hf_token) - 8) + hf_token[-4:]
35
+ print(f"Using Hugging Face token: {masked_hf_token}")
36
+ ##################################
37
+
38
+ hf_token = tokens.get("HF_TOKEN")
39
+ masked_hf_token = hf_token[:4] + "*" * (len(hf_token) - 8) + hf_token[-4:]
40
+ '''
41
+ create_secret_query = f"""
42
+ INSTALL httpfs;
43
+ LOAD httpfs;
44
+ CREATE PERSISTENT SECRET hf_token (
45
+ TYPE huggingface,
46
+ TOKEN '{hf_token}'
47
+ );
48
+ """
49
+ '''
50
+ # con.sql(create_secret_query)
51
+ # print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
52
+ dataset_name = "reddgr/swift-stock-screener"
53
+ # con.sql(query="INSTALL vss; LOAD vss;")
54
+
55
+ create_secret_query = f"""
56
+ INSTALL httpfs;
57
+ LOAD httpfs;
58
+ CREATE PERSISTENT SECRET hf_token (
59
+ TYPE huggingface,
60
+ TOKEN '{hf_token}'
61
+ );
62
+ """
63
+ con.sql(create_secret_query)
64
+ print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf().iloc[0,-2])
65
+ print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf().iloc[0,-1])
66
+ print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
67
+
68
+ create_table_query = f"""
69
+ INSTALL vss;
70
+ LOAD vss;
71
+ SET hnsw_enable_experimental_persistence = true;
72
+ CREATE TABLE vector_table AS
73
+ SELECT *, embeddings::float[{emb_model.get_sentence_embedding_dimension()}] as embeddings_float
74
+ FROM 'hf://datasets/{dataset_name}/data/train-00000-of-00001.parquet';
75
+ """
76
+
77
+ con.sql(create_table_query)
78
+
79
+ print("Indexing data for vector search...")
80
+ create_index_query = f"""
81
+ CREATE INDEX sss_hnsw_index ON vector_table USING HNSW (embeddings_float) WITH (metric = 'cosine');
82
+ """
83
+ con.sql(create_index_query)
84
+
85
+ # print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
86
+ print(f"Created search index. {time.time() - start_time:.2f} seconds.")
87
+ ########################################
88
+
89
+ # ESTADO GLOBAL
90
+ last_result_df: pd.DataFrame = pd.DataFrame()
91
+
92
+ ######################
93
+ last_search_type: str = ""
94
+ last_search_query: str = ""
95
+ # last_filtros_values: Tuple = ()
96
+ last_column_filters: list[tuple[str, str]] = []
97
+ last_sort_col_label: str = ""
98
+ last_sort_dir: str = ""
99
+ #######################
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # CONFIG --------------------------------------------------------------------
103
+ # ---------------------------------------------------------------------------
104
+ app_dataset = load_dataset("reddgr/swift-stock-screener", split="train", token = tokens.get("HF_TOKEN")).to_pandas()
105
+
106
+ # dh_app = fdh.FrontDatasetHandler(app_dataset=pd.read_pickle(DATASET_PATH))
107
+ dh_app = fdh.FrontDatasetHandler(app_dataset=app_dataset)
108
+ maestro = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='EQUITY'].copy()
109
+ maestro_etf = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='ETF'].copy()
110
+
111
+ with open(JSON_PATH / "app_column_config.json", "r") as f:
112
+ variables_busq_norm = json.load(f)["variables_busq_norm"]
113
+
114
+ with open(JSON_PATH / "app_column_config.json", "r") as f:
115
+ caracteristicas = json.load(f)["cols_tabla_equity"]
116
+
117
+ with open(JSON_PATH / "app_column_config.json", "r") as f:
118
+ caracteristicas_etf = json.load(f)["cols_tabla_etfs"]
119
+
120
+ with open(JSON_PATH / "cat_cols.json", "r") as f:
121
+ cat_cols = json.load(f)["cat_cols"]
122
+
123
+ with open(JSON_PATH / "col_names_map.json", "r") as f:
124
+ rename_columns = json.load(f)["col_names_map"]
125
+
126
+ with open(JSON_PATH / "gamma_params.json", "r") as f:
127
+ gamma_params = json.load(f)
128
+
129
+ with open(JSON_PATH / "semantic_search_params.json", "r") as f:
130
+ semantic_search_params = json.load(f)["semantic_search_params"]
131
+
132
+ # Columnas a estilizar en rojo si son negativas
133
+ neg_display_cols = [rename_columns.get(c, c)
134
+ for c in ("ret_365", "revenueGrowth")]
135
+
136
+ # Parámetros de la función de distribución de distancias:
137
+ shape, loc, scale = gamma_params["shape"], gamma_params["loc"], gamma_params["scale"]
138
+ max_dist, precision_cdf = gamma_params["max_dist"], gamma_params["precision_cdf"]
139
+ y_cdf, _ = dh_app.configura_distr_prob(shape, loc, scale, max_dist, precision_cdf)
140
+
141
+ # Parámetros de la de búsqueda VSS:
142
+ k = semantic_search_params["k"]
143
+ brevity_penalty = semantic_search_params["brevity_penalty"]
144
+ reward_for_literal = semantic_search_params["reward_for_literal"]
145
+ partial_match_factor = semantic_search_params["partial_match_factor"]
146
+ print(f"VSS params: k={k}, brevity_penalty={brevity_penalty}, reward_for_literal={reward_for_literal}, partial_match_factor={partial_match_factor}")
147
+
148
+ filtros_keys = caracteristicas[2:]
149
+
150
+ MAX_ROWS = 13000
151
+ ROWS_PER_PAGE = 100
152
+
153
+ # ---------------------------------------------------------------------------
154
+ # FUNCIONES UI --------------------------------------------------------------
155
+ # ---------------------------------------------------------------------------
156
+
157
+ # Dejamos en este módulo (en lugar de app_utils) funciones específicas de gestión de la interfaz
158
+
159
+ def _paginate(df: pd.DataFrame, page: int, per_page: int = ROWS_PER_PAGE) -> Tuple[pd.DataFrame, str]:
160
+ total_pages = max(1, (len(df) + per_page - 1) // per_page)
161
+ page = max(1, min(page, total_pages))
162
+ slice_df = df.iloc[(page-1)*per_page : (page-1)*per_page + per_page]
163
+ slice_df = utils.styler_negative_red(slice_df, cols=neg_display_cols)
164
+ return slice_df, f"Page {page} of {total_pages}"
165
+
166
+
167
+ def search_dynamic(ticker: str, page: int, *filtros_values) -> Tuple[pd.DataFrame, str]:
168
+ global last_result_df
169
+
170
+ ticker = ticker.upper().strip()
171
+ if ticker == "":
172
+ last_result_df = pd.DataFrame()
173
+ return pd.DataFrame(), "Page 1 of 1"
174
+
175
+ filtros = dict(zip(filtros_keys, filtros_values))
176
+
177
+ neighbors_df = dh_app.vecinos_cercanos(
178
+ df=maestro,
179
+ variables_busq=variables_busq_norm,
180
+ caracteristicas=caracteristicas,
181
+ target_ticker=ticker,
182
+ y_cdf=y_cdf,
183
+ precision_cdf=precision_cdf,
184
+ max_dist=max_dist,
185
+ n_neighbors=len(maestro),
186
+ filtros=filtros,
187
+ )
188
+
189
+ if isinstance(neighbors_df, str):
190
+ last_result_df = pd.DataFrame()
191
+ return pd.DataFrame(), "Page 1 de 1"
192
+
193
+ neighbors_df.reset_index(inplace=True)
194
+ neighbors_df.drop(columns=["distance"], inplace=True)
195
+ # neighbors_df = format_results(neighbors_df)
196
+ neighbors_df = utils.format_results(neighbors_df, rename_columns)
197
+
198
+ last_result_df = neighbors_df.head(MAX_ROWS).copy()
199
+ return _paginate(last_result_df, page)
200
+
201
+
202
+ def search_theme(theme: str, page: int, *filtros_values) -> Tuple[pd.DataFrame, str]:
203
+ global last_result_df
204
+ query = theme.strip()
205
+ if query == "":
206
+ last_result_df = pd.DataFrame()
207
+ return pd.DataFrame(), "Page 1 of 1"
208
+
209
+ # Llamada al algoritmo de búsqueda, que devuelve un dataframe con k activos:
210
+ result_df = ss.duckdb_vss_local(
211
+ model=emb_model,
212
+ duckdb_connection=con,
213
+ query=query,
214
+ k=k,
215
+ brevity_penalty=brevity_penalty,
216
+ reward_for_literal=reward_for_literal,
217
+ partial_match_factor=partial_match_factor,
218
+ table_name="vector_table",
219
+ embedding_column="embeddings"
220
+ )
221
+ theme_dist = result_df[['ticker', 'distance']].rename(columns={'distance': 'Search dist.'})
222
+ # Cruzamos el dataframe de distancias con el maestro y mantenemos las columnas originales:
223
+ clean_feats = [c for c in caracteristicas if c != 'ticker']
224
+ # indexamos por ticker para cruzar las tablas:
225
+ maestro_subset = maestro.set_index('ticker')[clean_feats]
226
+ merged = theme_dist.set_index('ticker').join(maestro_subset, how='inner').reset_index()
227
+ # Reordenamos las columnas y añadimos la distancia:
228
+ ordered_cols = ['ticker'] + clean_feats + ['Search dist.']
229
+ merged = merged[ordered_cols]
230
+ # Ajustamos los formatos de las columnas:
231
+ formatted = utils.format_results(merged, rename_columns)
232
+ last_result_df = formatted.head(MAX_ROWS).copy()
233
+ return _paginate(last_result_df, page)
234
+
235
+
236
+ def _compose_summary() -> str:
237
+ parts = []
238
+ if last_search_type == "theme":
239
+ parts.append(f"Theme search for '{last_search_query}'")
240
+ elif last_search_type == "ticker":
241
+ parts.append(f"Ticker search for '{last_search_query}'")
242
+ if last_column_filters:
243
+ fstr = ", ".join(f"{col} = '{val}'" for col, val in last_column_filters)
244
+ parts.append(f"Filters: {fstr}")
245
+ if last_sort_col_label:
246
+ parts.append(f"Sorted by: {last_sort_col_label} ({last_sort_dir})")
247
+ return ". ".join(parts)
248
+
249
+ def search_all(theme: str, ticker: str, page: int) -> tuple[pd.DataFrame,str,str,str,str]:
250
+ global last_search_type, last_search_query, last_column_filters
251
+ last_column_filters.clear()
252
+
253
+ if theme.strip():
254
+ last_search_type, last_search_query = "theme", theme.strip()
255
+ df, label = search_theme(theme, page)
256
+ # new_ticker, new_theme = "", theme.strip()
257
+ new_ticker, new_theme = "", "" # limpia las cajas de búsqueda
258
+
259
+ elif ticker.strip():
260
+ last_search_type, last_search_query = "ticker", ticker.strip().upper()
261
+ df, label = search_dynamic(ticker, page)
262
+ # new_ticker, new_theme = last_search_query, ""
263
+ new_ticker, new_theme = "", ""
264
+
265
+ else:
266
+ df, label = _paginate(last_result_df, page)
267
+ new_ticker, new_theme = "", ""
268
+
269
+ summary = _compose_summary()
270
+ return df, label, new_ticker, new_theme, summary
271
+
272
+ def page_change(theme: str, ticker: str, page: int) -> tuple[pd.DataFrame,str,str,str,str]:
273
+ return search_all(theme, ticker, page)
274
+
275
+
276
+ # ---------------------------------------------------------------------------
277
+ # SORTING -------------------------------------------------------------------
278
+ # ---------------------------------------------------------------------------
279
+
280
+ def apply_sort(col_label: str, direction: str) -> tuple[pd.DataFrame, str, int, str]:
281
+ global last_sort_col_label, last_sort_dir, last_search_type, last_search_query, last_column_filters, last_result_df
282
+
283
+ # record selection and clear previous state
284
+ last_sort_col_label, last_sort_dir = col_label or "", direction or ""
285
+ last_search_type = last_search_query = ""
286
+ last_column_filters.clear()
287
+
288
+ # reload raw data
289
+ df_raw = maestro[caracteristicas].head(MAX_ROWS).copy()
290
+
291
+ # sort on original data column if specified
292
+ if col_label:
293
+ # reverse lookup original column key
294
+ inv_map = {v: k for k, v in rename_columns.items()}
295
+ orig_col = inv_map.get(col_label, col_label)
296
+ asc = (direction == "Ascending")
297
+ df_raw = df_raw.sort_values(
298
+ by=orig_col,
299
+ ascending=asc,
300
+ na_position='last'
301
+ ).reset_index(drop=True)
302
+
303
+ # apply existing formatting helpers
304
+ df_formatted = utils.format_results(df_raw, rename_columns)
305
+
306
+ # update global and paginate
307
+ last_result_df = df_formatted.copy()
308
+ slice_df, label = _paginate(last_result_df, 1)
309
+ summary = f"Sorted by: {col_label} ({direction})" if col_label else ""
310
+ return slice_df, label, 1, summary
311
+
312
+
313
+
314
+ def reset_initial() -> tuple[pd.DataFrame,str,int,str,str,str]:
315
+ global last_search_type, last_search_query, last_column_filters, last_sort_col_label, last_sort_dir, last_result_df
316
+ last_search_type = last_search_query = ""
317
+ last_column_filters.clear()
318
+ last_sort_col_label = last_sort_dir = ""
319
+ last_result_df = utils.format_results(maestro[caracteristicas].head(MAX_ROWS).copy(), rename_columns)
320
+ slice_df, label = _paginate(last_result_df, 1)
321
+ default_sort = rename_columns.get("marketCap","marketCap")
322
+ return slice_df, label, 1, "", "", default_sort, ""
323
+
324
+
325
+ # ---------------------------------------------------------------------------
326
+ # DATOS INICIALES -----------------------------------------------------------
327
+ # ---------------------------------------------------------------------------
328
+
329
+ last_result_df = utils.format_results(maestro[caracteristicas].head(MAX_ROWS).copy(), rename_columns)
330
+ _initial_slice, _initial_label = _paginate(last_result_df, 1)
331
+
332
+ # ---------------------------------------------------------------------------
333
+ # UI ------------------------------------------------------------------------
334
+ # ---------------------------------------------------------------------------
335
+
336
+ def _load_html(name: str) -> str:
337
+ return (ROOT / "html" / name).read_text(encoding="utf-8")
338
+
339
+ html_front_layout = _load_html("front_layout.html")
340
+
341
+ with gr.Blocks(title="Swift Stock Screener, by Reddgr") as front:
342
+ gr.HTML(html_front_layout)
343
+
344
+ # ---------------------- TOP INPUT -------------------------------------
345
+ with gr.Row(equal_height=True):
346
+ theme_input = gr.Textbox(show_label=False, placeholder="Search a theme. i.e. 'lithium'", scale=2)
347
+ ticker_input = gr.Textbox(show_label=False, placeholder="Enter a ticker symbol", scale=1)
348
+ buscar_button = gr.Button("Search")
349
+ gr.HTML("<div></div>")
350
+ reset_button = gr.Button("Reset", elem_classes="small-btn")
351
+ # gr.HTML("<div></div>")
352
+ random_button = gr.Button("Random ticker", elem_classes="small-btn")
353
+
354
+ # ---------------------- SEARCH SUMMARY ------------------------
355
+ summary_display = gr.Markdown("", elem_classes="search-spec")
356
+
357
+ # ---------------------- DATAFRAME & PAGINATION ------------------------
358
+
359
+ output_df = gr.Dataframe(
360
+ value=_initial_slice,
361
+ interactive=False,
362
+ elem_classes="clickable-columns",
363
+ # max_height=500
364
+ )
365
+
366
+
367
+ # ---------------------- PAGINATION AND SORT CONTROLS ---------------------
368
+ with gr.Row():
369
+ btn_prev = gr.Button("Previous", elem_classes="small-btn")
370
+ pagination_label = gr.Markdown(_initial_label)
371
+ btn_next = gr.Button("Next", elem_classes="small-btn")
372
+ gr.Markdown("&nbsp;" * 20)
373
+ # merged sort controls on right
374
+ sort_col = gr.Dropdown(
375
+ choices=[rename_columns.get(c, c) for c in caracteristicas],
376
+ value=None,
377
+ label="Reset and sort by:",
378
+ allow_custom_value=False,
379
+ scale=2,
380
+ )
381
+ sort_dir = gr.Radio(
382
+ choices=["Ascending", "Descending"],
383
+ value="Descending",
384
+ label="",
385
+ scale=1,
386
+ )
387
+
388
+ page_state = gr.State(1)
389
+
390
+ # ---------------------- EXCLUSION FILTER TOGGLES --------------------------------
391
+ # De momento excluimos esta funcionalidad, al menos en la tabla de acciones,
392
+ # por la complejidad que añade (es una herencia del buscador de fondos de inversión)
393
+ # Potencial mejora para cuando incorporemos la tabla de ETFs
394
+ '''
395
+ with gr.Row():
396
+ toggle_components = [
397
+ gr.Checkbox(value=True, label=rename_columns.get(k, k)) for k in filtros_keys
398
+ ]
399
+ '''
400
+
401
+ # ---------------------- HELPERS ---------------------------------------
402
+ def reset_page():
403
+ return 1
404
+
405
+ def prev_page(p):
406
+ return max(p - 1, 1)
407
+
408
+ def next_page(p):
409
+ return p + 1
410
+
411
+ def search_inputs():
412
+ return [theme_input, ticker_input, page_state]
413
+
414
+ def random_action() -> tuple[str,int,str]:
415
+ return utils.random_ticker(maestro), 1, ""
416
+
417
+ # ---------------------- BINDINGS --------------------------------------
418
+ # search_dynamic -> search_all
419
+ inputs = [theme_input, ticker_input, page_state]
420
+
421
+ buscar_button.click(
422
+ search_all,
423
+ inputs=inputs,
424
+ outputs=[output_df, pagination_label, ticker_input, theme_input, summary_display]
425
+ )
426
+
427
+ ticker_input.submit(
428
+ reset_page, None, page_state
429
+ ).then(
430
+ search_all,
431
+ inputs=inputs,
432
+ outputs=[output_df, pagination_label, ticker_input, theme_input, summary_display]
433
+ )
434
+
435
+ theme_input.submit(
436
+ reset_page, None, page_state
437
+ ).then(
438
+ search_all,
439
+ inputs=inputs,
440
+ outputs=[output_df, pagination_label, ticker_input, theme_input, summary_display]
441
+ )
442
+
443
+ random_button.click(
444
+ random_action,
445
+ None,
446
+ [ticker_input, page_state, theme_input]
447
+ ).then(
448
+ search_all,
449
+ inputs=inputs,
450
+ outputs=[output_df, pagination_label, ticker_input, theme_input, summary_display]
451
+ )
452
+
453
+ reset_button.click(
454
+ reset_initial,
455
+ None,
456
+ [output_df, pagination_label, page_state, ticker_input, theme_input, sort_col, summary_display]
457
+ )
458
+
459
+ btn_prev.click(
460
+ prev_page, page_state, page_state
461
+ ).then(
462
+ page_change,
463
+ inputs=inputs,
464
+ outputs=[output_df, pagination_label, ticker_input, theme_input, summary_display]
465
+ )
466
+
467
+ btn_next.click(
468
+ next_page, page_state, page_state
469
+ ).then(
470
+ page_change,
471
+ inputs=inputs,
472
+ outputs=[output_df, pagination_label, ticker_input, theme_input, summary_display]
473
+ )
474
+
475
+ sort_col.change(
476
+ apply_sort,
477
+ inputs=[sort_col, sort_dir],
478
+ outputs=[output_df, pagination_label, page_state, summary_display]
479
+ )
480
+
481
+ sort_dir.change(
482
+ apply_sort,
483
+ inputs=[sort_col, sort_dir],
484
+ outputs=[output_df, pagination_label, page_state, summary_display]
485
+ )
486
+
487
+ # ---------------------- FILTERS BY COLUMN ------------------ #
488
+ filterable_columns = [rename_columns.get(c, c) for c in cat_cols]
489
+
490
+
491
+ def filter_by_column(evt: gr.SelectData) -> tuple[pd.DataFrame,str,int,str]:
492
+ global last_result_df, last_column_filters
493
+ if last_result_df.empty:
494
+ return pd.DataFrame(), "Page 1 of 1", 1, _compose_summary()
495
+
496
+ col = last_result_df.columns[evt.index[1]]
497
+ # print(f"DEBUG: resolving to column #{evt.index[1]} → '{col}'")
498
+ val = evt.value
499
+ last_column_filters.append((col, val))
500
+ filtered = last_result_df[last_result_df[col] == val]
501
+ last_result_df = filtered.copy()
502
+ slice_df, label = _paginate(last_result_df, 1)
503
+ summary = _compose_summary()
504
+ return slice_df, label, 1, summary
505
+
506
+
507
+ output_df.select(
508
+ filter_by_column,
509
+ outputs=[output_df, pagination_label, page_state, summary_display]
510
+ )
511
+
512
+ # ---------------------------------------------------------------------------
513
+ # LAUNCH --------------------------------------------------------------------
514
+ # ---------------------------------------------------------------------------
515
+
516
+ if __name__ == "__main__":
517
+ front.launch()