Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

App Files Files Community

Jan Mühlnikel commited on May 26, 2024

Commit

0ef6d21

1 Parent(s): 7d8805d

experiment

Browse files

Files changed (2) hide show

functions/{single_similar.py → single_project_matching.py} +19 -31
similarity_page.py +1 -1

functions/{single_similar.py → single_project_matching.py} RENAMED Viewed

@@ -1,54 +1,42 @@
-import pandas as pd
 import numpy as np
 from scipy.sparse import csr_matrix
 """
-def find_similar(p_index, similarity_matrix, filtered_df, top_x):
-    # filter out just projects from filtered df
-    filtered_indices = filtered_df.index.tolist()
-    index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
-    filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
-    # filter out the row of the selected poject
-    project_row = filtered_column_sim_matrix[p_index]
-    sorted_indices = np.argsort(project_row)
-    top_10_indices_descending = sorted_indices[-10:][::-1]
-    #top_10_original_indices = [index_position_mapping[position] for position in top_10_indices_descending]
-    top_10_values_descending = project_row[top_10_indices_descending]
-    result_df = filtered_df.iloc[top_10_indices_descending]
-    result_df["similarity"] = top_10_values_descending
-    return result_df
 """
 def find_similar(p_index, similarity_matrix, filtered_df, top_x):
-    # Ensure the similarity_matrix is in a suitable sparse format like CSR
     if not isinstance(similarity_matrix, csr_matrix):
         similarity_matrix = csr_matrix(similarity_matrix)
-    # Filter out just projects from filtered_df
-    filtered_indices = filtered_df.index.tolist()
-    # Create a mapping from new position to original indices
     index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
-    # Extract the submatrix corresponding to the filtered indices
-    filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
-    # Extract the row for the selected project efficiently
-    # Convert the sparse row slice to a dense array for argsort function
     project_row = filtered_column_sim_matrix.getrow(p_index).toarray().ravel()
-    # Find top_x indices with the highest similarity scores
     sorted_indices = np.argsort(project_row)[-top_x:][::-1]
     top_indices = [index_position_mapping[i] for i in sorted_indices]
     top_values = project_row[sorted_indices]
-    # Prepare the result DataFrame
     result_df = filtered_df.loc[top_indices]
     result_df['similarity'] = top_values

 import numpy as np
 from scipy.sparse import csr_matrix
 """
+Function to find similar project for the single project matching
+Single Project Matching empowers you to choose an individual project using
+either the project IATI ID or title, and then unveils the top x projects within a filter (filtered_df) that
+bear the closest resemblance to your selected one (p_index).
 """
 def find_similar(p_index, similarity_matrix, filtered_df, top_x):
+    """
+    p_index: index of selected project
+    similarity_matrix: matrix with similarities of all projects
+    filtered_df: df with filter applied
+    top_x: top x project which should be displayed
+    """
+    # convert npz sparse matrix into csr matrix
     if not isinstance(similarity_matrix, csr_matrix):
         similarity_matrix = csr_matrix(similarity_matrix)
+    # filter out just projects from filtered_df
+    filtered_indices =  filtered_df.index.tolist()
+    filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
+    # create a mapping from new position to original indices
     index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
+    # select just the row of th similarity matrix of the selected project index
     project_row = filtered_column_sim_matrix.getrow(p_index).toarray().ravel()
+    # find top_x indices with the highest similarity scores in the row
     sorted_indices = np.argsort(project_row)[-top_x:][::-1]
     top_indices = [index_position_mapping[i] for i in sorted_indices]
     top_values = project_row[sorted_indices]
+    # create result df with all top_x similar projects
     result_df = filtered_df.loc[top_indices]
     result_df['similarity'] = top_values

similarity_page.py CHANGED Viewed

@@ -16,7 +16,7 @@ from functions.filter_projects import filter_projects
 from functions.filter_single import filter_single
 from functions.multi_project_matching import calc_multi_matches
 from functions.same_country_filter import same_country_filter
-from functions.single_similar import find_similar
 #import psutil
 import os
 import gc

 from functions.filter_single import filter_single
 from functions.multi_project_matching import calc_multi_matches
 from functions.same_country_filter import same_country_filter
+from functions.single_project_matching import find_similar
 #import psutil
 import os
 import gc