Testys commited on
Commit
b73a811
·
1 Parent(s): a223079

Update search_utils.py

Browse files
Files changed (1) hide show
  1. search_utils.py +26 -1
search_utils.py CHANGED
@@ -141,4 +141,29 @@ class SemanticSearch:
141
  np.array(all_distances[:min_length]),
142
  np.array(all_global_indices[:min_length]),
143
  top_k
144
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  np.array(all_distances[:min_length]),
142
  np.array(all_global_indices[:min_length]),
143
  top_k
144
+ )
145
+
146
+ def _process_results(self, distances, global_indices, top_k):
147
+ """Process raw search results into formatted DataFrame"""
148
+ if len(global_indices) == 0 or len(distances) == 0:
149
+ return pd.DataFrame(columns=["title", "summary", "source", "similarity"])
150
+
151
+ try:
152
+ # Get metadata for valid indices
153
+ results = self.metadata_mgr.get_metadata(global_indices)
154
+
155
+ # Calculate similarity scores (convert L2 distance to cosine similarity approximation)
156
+ results['similarity'] = 1 - (distances / 2)
157
+
158
+ # Deduplicate results based on title and source
159
+ results = results.drop_duplicates(subset=["title", "source"])
160
+
161
+ # Sort by similarity and select top results
162
+ results = results.sort_values("similarity", ascending=False).head(top_k)
163
+
164
+ # Reset index for clean display
165
+ return results.reset_index(drop=True)
166
+
167
+ except Exception as e:
168
+ st.error(f"Error processing results: {str(e)}")
169
+ return pd.DataFrame(columns=["title", "summary", "source", "similarity"])