Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -26,7 +26,8 @@ from appStore.region_utils import (
|
|
26 |
from appStore.rag_utils import (
|
27 |
highlight_query,
|
28 |
get_rag_answer,
|
29 |
-
compute_title
|
|
|
30 |
)
|
31 |
from appStore.filter_utils import (
|
32 |
parse_budget,
|
@@ -126,14 +127,14 @@ unique_country_names = sorted(country_name_mapping.keys())
|
|
126 |
# Define reset_filters function using session_state
|
127 |
###########################################
|
128 |
def reset_filters():
|
129 |
-
st.session_state["region_filter"] = "All/Not allocated"
|
130 |
-
st.session_state["country_filter"] = "All/Not allocated"
|
131 |
current_year = datetime.now().year
|
132 |
default_start_year = current_year - 4
|
133 |
st.session_state["end_year_range"] = (default_start_year, max_end_year)
|
134 |
-
st.session_state["crs_filter"] = "All/Not allocated"
|
135 |
st.session_state["min_budget"] = min_budget_val
|
136 |
-
st.session_state["client_filter"] = "All/Not allocated"
|
137 |
st.session_state["query"] = ""
|
138 |
st.session_state["show_exact_matches"] = False
|
139 |
st.session_state["page"] = 1
|
@@ -144,17 +145,19 @@ def reset_filters():
|
|
144 |
col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
|
145 |
|
146 |
with col1:
|
147 |
-
region_filter = st.
|
148 |
-
|
|
|
149 |
filtered_country_names = unique_country_names
|
150 |
else:
|
151 |
filtered_country_names = [
|
152 |
name for name, code in country_name_mapping.items()
|
153 |
-
if iso_code_to_sub_region.get(code)
|
154 |
]
|
155 |
|
156 |
with col2:
|
157 |
-
country_filter = st.
|
|
|
158 |
|
159 |
with col3:
|
160 |
current_year = datetime.now().year
|
@@ -169,7 +172,8 @@ with col3:
|
|
169 |
|
170 |
with col4:
|
171 |
crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
|
172 |
-
crs_filter = st.
|
|
|
173 |
|
174 |
with col5:
|
175 |
min_budget = st.slider(
|
@@ -187,7 +191,8 @@ col1_2, col2_2, col3_2, col4_2, col5_2 = st.columns(5)
|
|
187 |
|
188 |
with col1_2:
|
189 |
client_options = sorted(project_data["client"].dropna().unique().tolist())
|
190 |
-
client_filter = st.
|
|
|
191 |
with col2_2:
|
192 |
st.empty()
|
193 |
with col3_2:
|
@@ -195,7 +200,6 @@ with col3_2:
|
|
195 |
with col4_2:
|
196 |
st.empty()
|
197 |
with col5_2:
|
198 |
-
# Plain reset button (will be moved to row 3 as well)
|
199 |
st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
|
200 |
|
201 |
###########################################
|
@@ -237,6 +241,7 @@ else:
|
|
237 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
238 |
|
239 |
# 2) Filter results based on the user’s selections
|
|
|
240 |
filtered_semantic = filter_results(
|
241 |
semantic_thresholded,
|
242 |
country_filter,
|
@@ -262,10 +267,10 @@ else:
|
|
262 |
get_country_name
|
263 |
)
|
264 |
|
265 |
-
# Additional filter by client
|
266 |
-
if
|
267 |
-
filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client")
|
268 |
-
filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client")
|
269 |
|
270 |
# Remove duplicates
|
271 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
|
@@ -277,8 +282,8 @@ else:
|
|
277 |
except (ValueError, TypeError):
|
278 |
return value
|
279 |
|
280 |
-
# --- Reprint Query (
|
281 |
-
st.markdown(f"<div style='text-align:
|
282 |
|
283 |
# 3) Display results
|
284 |
# Lexical Search Results Branch
|
@@ -302,7 +307,7 @@ else:
|
|
302 |
if "page" not in st.session_state:
|
303 |
st.session_state.page = 1
|
304 |
current_page = st.session_state.page
|
305 |
-
# Top pagination widget (right aligned, 1/7 width)
|
306 |
col_pag_top = st.columns([6, 1])[1]
|
307 |
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
|
308 |
st.session_state.page = new_page_top
|
@@ -317,7 +322,7 @@ else:
|
|
317 |
metadata["title"] = compute_title(metadata)
|
318 |
title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
319 |
title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
|
320 |
-
# Prepend the result number
|
321 |
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
322 |
|
323 |
objective = metadata.get("objective", "None")
|
@@ -351,20 +356,23 @@ else:
|
|
351 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
352 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
353 |
|
354 |
-
#
|
355 |
predecessor = metadata.get("predecessor_id", "").strip()
|
356 |
successor = metadata.get("successor_id", "").strip()
|
357 |
extra_line = ""
|
358 |
-
if predecessor:
|
359 |
-
|
360 |
-
|
361 |
-
|
|
|
|
|
|
|
362 |
|
363 |
additional_text = (
|
364 |
f"**Objective:** {highlight_query(objective, var)}<br>"
|
365 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
366 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
367 |
-
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b
|
368 |
+ extra_line +
|
369 |
f"<br>**Country:** {country_raw}<br>"
|
370 |
f"**Sector:** {crs_combined}"
|
@@ -393,7 +401,7 @@ else:
|
|
393 |
st.session_state.page = 1
|
394 |
current_page = st.session_state.page
|
395 |
|
396 |
-
# Top pagination widget (right aligned, 1/7 width)
|
397 |
col_pag_top = st.columns([6, 1])[1]
|
398 |
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
|
399 |
st.session_state.page = new_page_top
|
@@ -402,20 +410,19 @@ else:
|
|
402 |
end_index = start_index + page_size
|
403 |
top_results = filtered_semantic_no_dupe[start_index:end_index]
|
404 |
|
405 |
-
# Prominent page info with bold numbers
|
406 |
page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
|
407 |
total_pages_str = f"<b>{total_pages}</b>"
|
408 |
st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
|
409 |
|
410 |
-
# --- RAG Answer (
|
411 |
rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
|
412 |
bullet_lines = []
|
413 |
for line in rag_answer.splitlines():
|
414 |
if line.strip():
|
415 |
-
# Bold any numbers in the line
|
416 |
line_bold = re.sub(r'(\d+)', r'<b>\1</b>', line)
|
417 |
bullet_lines.append(f"<li>{line_bold}</li>")
|
418 |
-
formatted_rag_answer = "<ul style='text-align:
|
419 |
st.markdown(formatted_rag_answer, unsafe_allow_html=True)
|
420 |
st.divider()
|
421 |
|
@@ -424,7 +431,6 @@ else:
|
|
424 |
if "title" not in metadata:
|
425 |
metadata["title"] = compute_title(metadata)
|
426 |
title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
|
427 |
-
# Prepend result number and make title bold
|
428 |
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
429 |
|
430 |
desc_en = metadata.get("description.en", "").strip()
|
@@ -461,16 +467,19 @@ else:
|
|
461 |
predecessor = metadata.get("predecessor_id", "").strip()
|
462 |
successor = metadata.get("successor_id", "").strip()
|
463 |
extra_line = ""
|
464 |
-
if predecessor:
|
465 |
-
|
466 |
-
|
467 |
-
|
|
|
|
|
|
|
468 |
|
469 |
additional_text = (
|
470 |
f"**Objective:** {metadata.get('objective', '')}<br>"
|
471 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
472 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
473 |
-
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b
|
474 |
+ extra_line +
|
475 |
f"<br>**Country:** {country_raw}<br>"
|
476 |
f"**Sector:** {crs_combined}"
|
@@ -481,7 +490,7 @@ else:
|
|
481 |
st.markdown(additional_text, unsafe_allow_html=True)
|
482 |
st.divider()
|
483 |
|
484 |
-
# Bottom pagination widget (right aligned, 1/7 width)
|
485 |
col_pag_bot = st.columns([6, 1])[1]
|
486 |
new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
|
487 |
st.session_state.page = new_page_bot
|
|
|
26 |
from appStore.rag_utils import (
|
27 |
highlight_query,
|
28 |
get_rag_answer,
|
29 |
+
compute_title,
|
30 |
+
format_project_id # <-- Imported formatting function
|
31 |
)
|
32 |
from appStore.filter_utils import (
|
33 |
parse_budget,
|
|
|
127 |
# Define reset_filters function using session_state
|
128 |
###########################################
|
129 |
def reset_filters():
|
130 |
+
st.session_state["region_filter"] = ["All/Not allocated"]
|
131 |
+
st.session_state["country_filter"] = ["All/Not allocated"]
|
132 |
current_year = datetime.now().year
|
133 |
default_start_year = current_year - 4
|
134 |
st.session_state["end_year_range"] = (default_start_year, max_end_year)
|
135 |
+
st.session_state["crs_filter"] = ["All/Not allocated"]
|
136 |
st.session_state["min_budget"] = min_budget_val
|
137 |
+
st.session_state["client_filter"] = ["All/Not allocated"]
|
138 |
st.session_state["query"] = ""
|
139 |
st.session_state["show_exact_matches"] = False
|
140 |
st.session_state["page"] = 1
|
|
|
145 |
col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
|
146 |
|
147 |
with col1:
|
148 |
+
region_filter = st.multiselect("Region", options=["All/Not allocated"] + sorted(unique_sub_regions),
|
149 |
+
default=["All/Not allocated"], key="region_filter")
|
150 |
+
if "All/Not allocated" in region_filter or not region_filter:
|
151 |
filtered_country_names = unique_country_names
|
152 |
else:
|
153 |
filtered_country_names = [
|
154 |
name for name, code in country_name_mapping.items()
|
155 |
+
if iso_code_to_sub_region.get(code) in region_filter
|
156 |
]
|
157 |
|
158 |
with col2:
|
159 |
+
country_filter = st.multiselect("Country", options=["All/Not allocated"] + filtered_country_names,
|
160 |
+
default=["All/Not allocated"], key="country_filter")
|
161 |
|
162 |
with col3:
|
163 |
current_year = datetime.now().year
|
|
|
172 |
|
173 |
with col4:
|
174 |
crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
|
175 |
+
crs_filter = st.multiselect("CRS", options=crs_options,
|
176 |
+
default=["All/Not allocated"], key="crs_filter")
|
177 |
|
178 |
with col5:
|
179 |
min_budget = st.slider(
|
|
|
191 |
|
192 |
with col1_2:
|
193 |
client_options = sorted(project_data["client"].dropna().unique().tolist())
|
194 |
+
client_filter = st.multiselect("Client", options=["All/Not allocated"] + client_options,
|
195 |
+
default=["All/Not allocated"], key="client_filter")
|
196 |
with col2_2:
|
197 |
st.empty()
|
198 |
with col3_2:
|
|
|
200 |
with col4_2:
|
201 |
st.empty()
|
202 |
with col5_2:
|
|
|
203 |
st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
|
204 |
|
205 |
###########################################
|
|
|
241 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
242 |
|
243 |
# 2) Filter results based on the user’s selections
|
244 |
+
# (Assuming filter_results can handle a string "All/Not allocated" as meaning "no filter")
|
245 |
filtered_semantic = filter_results(
|
246 |
semantic_thresholded,
|
247 |
country_filter,
|
|
|
267 |
get_country_name
|
268 |
)
|
269 |
|
270 |
+
# Additional filter by client (if "All/Not allocated" is not selected)
|
271 |
+
if "All/Not allocated" not in client_filter:
|
272 |
+
filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") in client_filter]
|
273 |
+
filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") in client_filter]
|
274 |
|
275 |
# Remove duplicates
|
276 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
|
|
|
282 |
except (ValueError, TypeError):
|
283 |
return value
|
284 |
|
285 |
+
# --- Reprint Query (Left aligned with "Query:") ---
|
286 |
+
st.markdown(f"<div style='text-align: left; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>", unsafe_allow_html=True)
|
287 |
|
288 |
# 3) Display results
|
289 |
# Lexical Search Results Branch
|
|
|
307 |
if "page" not in st.session_state:
|
308 |
st.session_state.page = 1
|
309 |
current_page = st.session_state.page
|
310 |
+
# Top pagination widget (right aligned, occupying 1/7 of page width)
|
311 |
col_pag_top = st.columns([6, 1])[1]
|
312 |
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
|
313 |
st.session_state.page = new_page_top
|
|
|
322 |
metadata["title"] = compute_title(metadata)
|
323 |
title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
324 |
title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
|
325 |
+
# Prepend the result number and make title bold
|
326 |
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
327 |
|
328 |
objective = metadata.get("objective", "None")
|
|
|
356 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
357 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
358 |
|
359 |
+
# Combine Predecessor and Successor (if available) in one row
|
360 |
predecessor = metadata.get("predecessor_id", "").strip()
|
361 |
successor = metadata.get("successor_id", "").strip()
|
362 |
extra_line = ""
|
363 |
+
if predecessor or successor:
|
364 |
+
parts = []
|
365 |
+
if predecessor:
|
366 |
+
parts.append(f"**Predecessor Project:** {format_project_id(predecessor)}")
|
367 |
+
if successor:
|
368 |
+
parts.append(f"**Successor Project:** {format_project_id(successor)}")
|
369 |
+
extra_line = "<br>" + " | ".join(parts)
|
370 |
|
371 |
additional_text = (
|
372 |
f"**Objective:** {highlight_query(objective, var)}<br>"
|
373 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
374 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
375 |
+
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
|
376 |
+ extra_line +
|
377 |
f"<br>**Country:** {country_raw}<br>"
|
378 |
f"**Sector:** {crs_combined}"
|
|
|
401 |
st.session_state.page = 1
|
402 |
current_page = st.session_state.page
|
403 |
|
404 |
+
# Top pagination widget (right aligned, occupying 1/7 of page width)
|
405 |
col_pag_top = st.columns([6, 1])[1]
|
406 |
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
|
407 |
st.session_state.page = new_page_top
|
|
|
410 |
end_index = start_index + page_size
|
411 |
top_results = filtered_semantic_no_dupe[start_index:end_index]
|
412 |
|
413 |
+
# Prominent page info with bold numbers (green if current page != 1)
|
414 |
page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
|
415 |
total_pages_str = f"<b>{total_pages}</b>"
|
416 |
st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
|
417 |
|
418 |
+
# --- RAG Answer (Left aligned, bullet points, with bold numbers) ---
|
419 |
rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
|
420 |
bullet_lines = []
|
421 |
for line in rag_answer.splitlines():
|
422 |
if line.strip():
|
|
|
423 |
line_bold = re.sub(r'(\d+)', r'<b>\1</b>', line)
|
424 |
bullet_lines.append(f"<li>{line_bold}</li>")
|
425 |
+
formatted_rag_answer = "<ul style='text-align: left; list-style-position: inside;'>" + "".join(bullet_lines) + "</ul>"
|
426 |
st.markdown(formatted_rag_answer, unsafe_allow_html=True)
|
427 |
st.divider()
|
428 |
|
|
|
431 |
if "title" not in metadata:
|
432 |
metadata["title"] = compute_title(metadata)
|
433 |
title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
|
|
|
434 |
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
435 |
|
436 |
desc_en = metadata.get("description.en", "").strip()
|
|
|
467 |
predecessor = metadata.get("predecessor_id", "").strip()
|
468 |
successor = metadata.get("successor_id", "").strip()
|
469 |
extra_line = ""
|
470 |
+
if predecessor or successor:
|
471 |
+
parts = []
|
472 |
+
if predecessor:
|
473 |
+
parts.append(f"**Predecessor Project:** {format_project_id(predecessor)}")
|
474 |
+
if successor:
|
475 |
+
parts.append(f"**Successor Project:** {format_project_id(successor)}")
|
476 |
+
extra_line = "<br>" + " | ".join(parts)
|
477 |
|
478 |
additional_text = (
|
479 |
f"**Objective:** {metadata.get('objective', '')}<br>"
|
480 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
481 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
482 |
+
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
|
483 |
+ extra_line +
|
484 |
f"<br>**Country:** {country_raw}<br>"
|
485 |
f"**Sector:** {crs_combined}"
|
|
|
490 |
st.markdown(additional_text, unsafe_allow_html=True)
|
491 |
st.divider()
|
492 |
|
493 |
+
# Bottom pagination widget (right aligned, occupying 1/7 width)
|
494 |
col_pag_bot = st.columns([6, 1])[1]
|
495 |
new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
|
496 |
st.session_state.page = new_page_bot
|