Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

App Files Files Community

Jan Mühlnikel commited on Mar 18, 2024

Commit

e3302f1

1 Parent(s): 55a6bd8

added country and orga filter

Browse files

Files changed (5) hide show

__pycache__/similarity_page.cpython-310.pyc +0 -0
functions/__pycache__/filter_projects.cpython-310.pyc +0 -0
functions/filter_projects.py +19 -1
modules/filter_modules.py +0 -21
similarity_page.py +40 -2

__pycache__/similarity_page.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/similarity_page.cpython-310.pyc and b/__pycache__/similarity_page.cpython-310.pyc differ

functions/__pycache__/filter_projects.cpython-310.pyc CHANGED Viewed

Binary files a/functions/__pycache__/filter_projects.cpython-310.pyc and b/functions/__pycache__/filter_projects.cpython-310.pyc differ

functions/filter_projects.py CHANGED Viewed

@@ -4,8 +4,11 @@ def contains_code(crs_codes, code_list):
     codes = str(crs_codes).split(';')
     return any(code in code_list for code in codes)
-def filter_projects(df, crs3_list, crs5_list, sdg_str):
     if crs3_list != [] or crs5_list != [] or sdg_str != "":
         if crs3_list and not crs5_list:
             df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
         elif crs3_list and crs5_list:
@@ -13,9 +16,24 @@ def filter_projects(df, crs3_list, crs5_list, sdg_str):
         elif not crs3_list and crs5_list:
             df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
         if sdg_str != "":
             df = df[df["sgd_pred_code"] == int(sdg_str)]
         return df

     codes = str(crs_codes).split(';')
     return any(code in code_list for code in codes)
+def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list):
+    # Check if filters where not all should be selected are empty
     if crs3_list != [] or crs5_list != [] or sdg_str != "":
+        # FILTER CRS
         if crs3_list and not crs5_list:
             df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
         elif crs3_list and crs5_list:
         elif not crs3_list and crs5_list:
             df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
+        # FILTER SDG
         if sdg_str != "":
             df = df[df["sgd_pred_code"] == int(sdg_str)]
+        # FILTER COUNTRY
+        if country_code_list != []:
+            country_filtered_df = pd.DataFrame()
+            for c in country_code_list:
+                c_df = df[df["country"].str.contains(c, na=False)]
+                country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=True)
+            df = country_filtered_df
+        # FILTER ORGANIZATION
+        if orga_code_list != []:
+            df = df[df['orga_abbreviation'].isin(orga_code_list)]
         return df

modules/filter_modules.py DELETED Viewed

@@ -1,21 +0,0 @@
-import pandas as pd
-import streamlit as st
-def country_option(special_cases, country_names):
-    country_option = st.multiselect(
-                'Country / Countries',
-                special_cases + country_names,
-                placeholder="Select"
-                )
-    return country_option
-def orga_option(special_cases, orga_names):
-    orga_list = special_cases + [f"{v[0]} ({k})" for k, v in orga_names.items()]
-    orga_option = st.multiselect(
-                'Development Bank / Organization',
-                orga_list,
-                placeholder="Select"
-                )
-    return orga_option

similarity_page.py CHANGED Viewed

@@ -79,6 +79,18 @@ def getSDG():
     return SDG_NAMES
 # Load Sentence Transformer Model
 @st.cache_resource
 def load_model():
@@ -110,6 +122,8 @@ CRS3_MERGED = getCRS3()
 CRS5_MERGED = getCRS5()
 SDG_NAMES = getSDG()
 model = load_model()
 sentences, embeddings, faiss_index = load_embeddings_and_index()
@@ -153,7 +167,25 @@ def show_page():
     with col2:
-        st.write("x")
     # CRS CODE LIST
@@ -166,8 +198,14 @@ def show_page():
     else:
         sdg_str = ""
     # FILTER DF WITH SELECTED FILTER OPTIONS
-    filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str)
     # FIND MATCHES
     p1_df, p2_df = calc_matches(filtered_df, projects_df, sim_matrix)

     return SDG_NAMES
+# Load Country Data
+@st.cache_data
+def getCountry():
+    # Read in countries from codelist
+    country_df = pd.read_csv('src/codelists/country_codes_ISO3166-1alpha-2.csv')
+    COUNTRY_CODES = country_df['Alpha-2 code'].tolist()
+    COUNTRY_NAMES = country_df['Country'].tolist()
+    COUNTRY_OPTION_LIST = [f"{COUNTRY_NAMES[i]} ({COUNTRY_CODES[i][-3:-1].upper()})"for i in range(len(COUNTRY_NAMES))]
+    return COUNTRY_OPTION_LIST
 # Load Sentence Transformer Model
 @st.cache_resource
 def load_model():
 CRS5_MERGED = getCRS5()
 SDG_NAMES = getSDG()
+COUNTRY_OPTION_LIST = getCountry()
 model = load_model()
 sentences, embeddings, faiss_index = load_embeddings_and_index()
     with col2:
+        # COUNTRY SELECTION
+        country_option = st.multiselect(
+                'Country / Countries',
+                COUNTRY_OPTION_LIST,
+                placeholder="Select"
+                )
+        # ORGA SELECTION
+        orga_abbreviation = projects_df["orga_abbreviation"].unique()
+        orga_full_names = projects_df["orga_full_name"].unique()
+        orga_list = [f"{orga_full_names[i]} ({orga_abbreviation[i].upper()})"for i in range(len(orga_abbreviation))]
+        orga_option = st.multiselect(
+                'Development Bank / Organization',
+                orga_list,
+                placeholder="Select"
+                )
     # CRS CODE LIST
     else:
         sdg_str = ""
+    # COUNTRY CODES LIST
+    country_code_list = [option[-3:-1] for option in country_option]
+    # ORGANIZATION CODES LIST
+    orga_code_list = [option.split("(")[1][:-1].lower() for option in orga_option]
     # FILTER DF WITH SELECTED FILTER OPTIONS
+    filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list)
     # FIND MATCHES
     p1_df, p2_df = calc_matches(filtered_df, projects_df, sim_matrix)