annikwag commited on
Commit
aa85f38
·
verified ·
1 Parent(s): 2e2d2bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -38
app.py CHANGED
@@ -26,7 +26,8 @@ from appStore.region_utils import (
26
  from appStore.rag_utils import (
27
  highlight_query,
28
  get_rag_answer,
29
- compute_title
 
30
  )
31
  from appStore.filter_utils import (
32
  parse_budget,
@@ -126,14 +127,14 @@ unique_country_names = sorted(country_name_mapping.keys())
126
  # Define reset_filters function using session_state
127
  ###########################################
128
  def reset_filters():
129
- st.session_state["region_filter"] = "All/Not allocated"
130
- st.session_state["country_filter"] = "All/Not allocated"
131
  current_year = datetime.now().year
132
  default_start_year = current_year - 4
133
  st.session_state["end_year_range"] = (default_start_year, max_end_year)
134
- st.session_state["crs_filter"] = "All/Not allocated"
135
  st.session_state["min_budget"] = min_budget_val
136
- st.session_state["client_filter"] = "All/Not allocated"
137
  st.session_state["query"] = ""
138
  st.session_state["show_exact_matches"] = False
139
  st.session_state["page"] = 1
@@ -144,17 +145,19 @@ def reset_filters():
144
  col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
145
 
146
  with col1:
147
- region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
148
- if region_filter == "All/Not allocated":
 
149
  filtered_country_names = unique_country_names
150
  else:
151
  filtered_country_names = [
152
  name for name, code in country_name_mapping.items()
153
- if iso_code_to_sub_region.get(code) == region_filter
154
  ]
155
 
156
  with col2:
157
- country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names, key="country_filter")
 
158
 
159
  with col3:
160
  current_year = datetime.now().year
@@ -169,7 +172,8 @@ with col3:
169
 
170
  with col4:
171
  crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
172
- crs_filter = st.selectbox("CRS", crs_options, key="crs_filter")
 
173
 
174
  with col5:
175
  min_budget = st.slider(
@@ -187,7 +191,8 @@ col1_2, col2_2, col3_2, col4_2, col5_2 = st.columns(5)
187
 
188
  with col1_2:
189
  client_options = sorted(project_data["client"].dropna().unique().tolist())
190
- client_filter = st.selectbox("Client", ["All/Not allocated"] + client_options, key="client_filter")
 
191
  with col2_2:
192
  st.empty()
193
  with col3_2:
@@ -195,7 +200,6 @@ with col3_2:
195
  with col4_2:
196
  st.empty()
197
  with col5_2:
198
- # Plain reset button (will be moved to row 3 as well)
199
  st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
200
 
201
  ###########################################
@@ -237,6 +241,7 @@ else:
237
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
238
 
239
  # 2) Filter results based on the user’s selections
 
240
  filtered_semantic = filter_results(
241
  semantic_thresholded,
242
  country_filter,
@@ -262,10 +267,10 @@ else:
262
  get_country_name
263
  )
264
 
265
- # Additional filter by client
266
- if client_filter != "All/Not allocated":
267
- filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
268
- filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
269
 
270
  # Remove duplicates
271
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
@@ -277,8 +282,8 @@ else:
277
  except (ValueError, TypeError):
278
  return value
279
 
280
- # --- Reprint Query (Right Aligned with "Query:") ---
281
- st.markdown(f"<div style='text-align: right; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>", unsafe_allow_html=True)
282
 
283
  # 3) Display results
284
  # Lexical Search Results Branch
@@ -302,7 +307,7 @@ else:
302
  if "page" not in st.session_state:
303
  st.session_state.page = 1
304
  current_page = st.session_state.page
305
- # Top pagination widget (right aligned, 1/7 width)
306
  col_pag_top = st.columns([6, 1])[1]
307
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
308
  st.session_state.page = new_page_top
@@ -317,7 +322,7 @@ else:
317
  metadata["title"] = compute_title(metadata)
318
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
319
  title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
320
- # Prepend the result number
321
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
322
 
323
  objective = metadata.get("objective", "None")
@@ -351,20 +356,23 @@ else:
351
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
352
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
353
 
354
- # Insert Predecessor/Successor line if available
355
  predecessor = metadata.get("predecessor_id", "").strip()
356
  successor = metadata.get("successor_id", "").strip()
357
  extra_line = ""
358
- if predecessor:
359
- extra_line += f"<br>**Predecessor Project:** {predecessor}"
360
- if successor:
361
- extra_line += f"<br>**Successor Project:** {successor}"
 
 
 
362
 
363
  additional_text = (
364
  f"**Objective:** {highlight_query(objective, var)}<br>"
365
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
366
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
367
- f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
368
  + extra_line +
369
  f"<br>**Country:** {country_raw}<br>"
370
  f"**Sector:** {crs_combined}"
@@ -393,7 +401,7 @@ else:
393
  st.session_state.page = 1
394
  current_page = st.session_state.page
395
 
396
- # Top pagination widget (right aligned, 1/7 width)
397
  col_pag_top = st.columns([6, 1])[1]
398
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
399
  st.session_state.page = new_page_top
@@ -402,20 +410,19 @@ else:
402
  end_index = start_index + page_size
403
  top_results = filtered_semantic_no_dupe[start_index:end_index]
404
 
405
- # Prominent page info with bold numbers and green highlight if current page is not 1
406
  page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
407
  total_pages_str = f"<b>{total_pages}</b>"
408
  st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
409
 
410
- # --- RAG Answer (Right aligned, bullet points, bold numbers) ---
411
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
412
  bullet_lines = []
413
  for line in rag_answer.splitlines():
414
  if line.strip():
415
- # Bold any numbers in the line
416
  line_bold = re.sub(r'(\d+)', r'<b>\1</b>', line)
417
  bullet_lines.append(f"<li>{line_bold}</li>")
418
- formatted_rag_answer = "<ul style='text-align: right; list-style-position: inside;'>" + "".join(bullet_lines) + "</ul>"
419
  st.markdown(formatted_rag_answer, unsafe_allow_html=True)
420
  st.divider()
421
 
@@ -424,7 +431,6 @@ else:
424
  if "title" not in metadata:
425
  metadata["title"] = compute_title(metadata)
426
  title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
427
- # Prepend result number and make title bold
428
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
429
 
430
  desc_en = metadata.get("description.en", "").strip()
@@ -461,16 +467,19 @@ else:
461
  predecessor = metadata.get("predecessor_id", "").strip()
462
  successor = metadata.get("successor_id", "").strip()
463
  extra_line = ""
464
- if predecessor:
465
- extra_line += f"<br>**Predecessor Project:** {predecessor}"
466
- if successor:
467
- extra_line += f"<br>**Successor Project:** {successor}"
 
 
 
468
 
469
  additional_text = (
470
  f"**Objective:** {metadata.get('objective', '')}<br>"
471
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
472
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
473
- f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
474
  + extra_line +
475
  f"<br>**Country:** {country_raw}<br>"
476
  f"**Sector:** {crs_combined}"
@@ -481,7 +490,7 @@ else:
481
  st.markdown(additional_text, unsafe_allow_html=True)
482
  st.divider()
483
 
484
- # Bottom pagination widget (right aligned, 1/7 width)
485
  col_pag_bot = st.columns([6, 1])[1]
486
  new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
487
  st.session_state.page = new_page_bot
 
26
  from appStore.rag_utils import (
27
  highlight_query,
28
  get_rag_answer,
29
+ compute_title,
30
+ format_project_id # <-- Imported formatting function
31
  )
32
  from appStore.filter_utils import (
33
  parse_budget,
 
127
  # Define reset_filters function using session_state
128
  ###########################################
129
  def reset_filters():
130
+ st.session_state["region_filter"] = ["All/Not allocated"]
131
+ st.session_state["country_filter"] = ["All/Not allocated"]
132
  current_year = datetime.now().year
133
  default_start_year = current_year - 4
134
  st.session_state["end_year_range"] = (default_start_year, max_end_year)
135
+ st.session_state["crs_filter"] = ["All/Not allocated"]
136
  st.session_state["min_budget"] = min_budget_val
137
+ st.session_state["client_filter"] = ["All/Not allocated"]
138
  st.session_state["query"] = ""
139
  st.session_state["show_exact_matches"] = False
140
  st.session_state["page"] = 1
 
145
  col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
146
 
147
  with col1:
148
+ region_filter = st.multiselect("Region", options=["All/Not allocated"] + sorted(unique_sub_regions),
149
+ default=["All/Not allocated"], key="region_filter")
150
+ if "All/Not allocated" in region_filter or not region_filter:
151
  filtered_country_names = unique_country_names
152
  else:
153
  filtered_country_names = [
154
  name for name, code in country_name_mapping.items()
155
+ if iso_code_to_sub_region.get(code) in region_filter
156
  ]
157
 
158
  with col2:
159
+ country_filter = st.multiselect("Country", options=["All/Not allocated"] + filtered_country_names,
160
+ default=["All/Not allocated"], key="country_filter")
161
 
162
  with col3:
163
  current_year = datetime.now().year
 
172
 
173
  with col4:
174
  crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
175
+ crs_filter = st.multiselect("CRS", options=crs_options,
176
+ default=["All/Not allocated"], key="crs_filter")
177
 
178
  with col5:
179
  min_budget = st.slider(
 
191
 
192
  with col1_2:
193
  client_options = sorted(project_data["client"].dropna().unique().tolist())
194
+ client_filter = st.multiselect("Client", options=["All/Not allocated"] + client_options,
195
+ default=["All/Not allocated"], key="client_filter")
196
  with col2_2:
197
  st.empty()
198
  with col3_2:
 
200
  with col4_2:
201
  st.empty()
202
  with col5_2:
 
203
  st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
204
 
205
  ###########################################
 
241
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
242
 
243
  # 2) Filter results based on the user’s selections
244
+ # (Assuming filter_results can handle a string "All/Not allocated" as meaning "no filter")
245
  filtered_semantic = filter_results(
246
  semantic_thresholded,
247
  country_filter,
 
267
  get_country_name
268
  )
269
 
270
+ # Additional filter by client (if "All/Not allocated" is not selected)
271
+ if "All/Not allocated" not in client_filter:
272
+ filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") in client_filter]
273
+ filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") in client_filter]
274
 
275
  # Remove duplicates
276
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
 
282
  except (ValueError, TypeError):
283
  return value
284
 
285
+ # --- Reprint Query (Left aligned with "Query:") ---
286
+ st.markdown(f"<div style='text-align: left; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>", unsafe_allow_html=True)
287
 
288
  # 3) Display results
289
  # Lexical Search Results Branch
 
307
  if "page" not in st.session_state:
308
  st.session_state.page = 1
309
  current_page = st.session_state.page
310
+ # Top pagination widget (right aligned, occupying 1/7 of page width)
311
  col_pag_top = st.columns([6, 1])[1]
312
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
313
  st.session_state.page = new_page_top
 
322
  metadata["title"] = compute_title(metadata)
323
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
324
  title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
325
+ # Prepend the result number and make title bold
326
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
327
 
328
  objective = metadata.get("objective", "None")
 
356
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
357
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
358
 
359
+ # Combine Predecessor and Successor (if available) in one row
360
  predecessor = metadata.get("predecessor_id", "").strip()
361
  successor = metadata.get("successor_id", "").strip()
362
  extra_line = ""
363
+ if predecessor or successor:
364
+ parts = []
365
+ if predecessor:
366
+ parts.append(f"**Predecessor Project:** {format_project_id(predecessor)}")
367
+ if successor:
368
+ parts.append(f"**Successor Project:** {format_project_id(successor)}")
369
+ extra_line = "<br>" + " | ".join(parts)
370
 
371
  additional_text = (
372
  f"**Objective:** {highlight_query(objective, var)}<br>"
373
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
374
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
375
+ f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
376
  + extra_line +
377
  f"<br>**Country:** {country_raw}<br>"
378
  f"**Sector:** {crs_combined}"
 
401
  st.session_state.page = 1
402
  current_page = st.session_state.page
403
 
404
+ # Top pagination widget (right aligned, occupying 1/7 of page width)
405
  col_pag_top = st.columns([6, 1])[1]
406
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
407
  st.session_state.page = new_page_top
 
410
  end_index = start_index + page_size
411
  top_results = filtered_semantic_no_dupe[start_index:end_index]
412
 
413
+ # Prominent page info with bold numbers (green if current page != 1)
414
  page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
415
  total_pages_str = f"<b>{total_pages}</b>"
416
  st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
417
 
418
+ # --- RAG Answer (Left aligned, bullet points, with bold numbers) ---
419
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
420
  bullet_lines = []
421
  for line in rag_answer.splitlines():
422
  if line.strip():
 
423
  line_bold = re.sub(r'(\d+)', r'<b>\1</b>', line)
424
  bullet_lines.append(f"<li>{line_bold}</li>")
425
+ formatted_rag_answer = "<ul style='text-align: left; list-style-position: inside;'>" + "".join(bullet_lines) + "</ul>"
426
  st.markdown(formatted_rag_answer, unsafe_allow_html=True)
427
  st.divider()
428
 
 
431
  if "title" not in metadata:
432
  metadata["title"] = compute_title(metadata)
433
  title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
 
434
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
435
 
436
  desc_en = metadata.get("description.en", "").strip()
 
467
  predecessor = metadata.get("predecessor_id", "").strip()
468
  successor = metadata.get("successor_id", "").strip()
469
  extra_line = ""
470
+ if predecessor or successor:
471
+ parts = []
472
+ if predecessor:
473
+ parts.append(f"**Predecessor Project:** {format_project_id(predecessor)}")
474
+ if successor:
475
+ parts.append(f"**Successor Project:** {format_project_id(successor)}")
476
+ extra_line = "<br>" + " | ".join(parts)
477
 
478
  additional_text = (
479
  f"**Objective:** {metadata.get('objective', '')}<br>"
480
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
481
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
482
+ f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
483
  + extra_line +
484
  f"<br>**Country:** {country_raw}<br>"
485
  f"**Sector:** {crs_combined}"
 
490
  st.markdown(additional_text, unsafe_allow_html=True)
491
  st.divider()
492
 
493
+ # Bottom pagination widget (right aligned, occupying 1/7 width)
494
  col_pag_bot = st.columns([6, 1])[1]
495
  new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
496
  st.session_state.page = new_page_bot