Zwounds commited on
Commit
3121c5d
·
verified ·
1 Parent(s): 68283c7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -10
app.py CHANGED
@@ -176,16 +176,37 @@ def load_data_and_setup_chroma():
176
  parsed = json.loads(item) if isinstance(item, str) else item
177
  parsed_metadatas.append(parsed if isinstance(parsed, dict) else {})
178
  except:
179
- parsed_metadatas.append({})
180
- metadatas_list = parsed_metadatas
181
-
182
- collection.add(
183
- ids=batch_df['id'].tolist(),
184
- embeddings=batch_df['embedding'].tolist(),
185
- documents=batch_df['document'].tolist(),
186
- metadatas=metadatas_list
187
- )
188
- except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  logging.error(f"Error adding batch {i+1}/{num_batches} to in-memory Chroma: {e}")
190
  error_count += 1
191
  progress_bar.progress((i + 1) / num_batches, text=f"Loading embeddings... Batch {i+1}/{num_batches}")
 
176
  parsed = json.loads(item) if isinstance(item, str) else item
177
  parsed_metadatas.append(parsed if isinstance(parsed, dict) else {})
178
  except:
179
+ parsed_metadatas.append({})
180
+ metadatas_list = parsed_metadatas
181
+
182
+ # --- Clean None values from metadata ---
183
+ cleaned_metadatas = []
184
+ for meta_dict in metadatas_list:
185
+ cleaned_dict = {}
186
+ if isinstance(meta_dict, dict):
187
+ for key, value in meta_dict.items():
188
+ # Replace None with empty string, keep other valid types
189
+ if value is None:
190
+ cleaned_dict[key] = ""
191
+ elif isinstance(value, (str, int, float, bool)):
192
+ cleaned_dict[key] = value
193
+ else:
194
+ # Attempt to convert other types to string, or skip
195
+ try:
196
+ cleaned_dict[key] = str(value)
197
+ logging.warning(f"Converted unexpected metadata type ({type(value)}) to string for key '{key}'.")
198
+ except:
199
+ logging.warning(f"Skipping metadata key '{key}' with unconvertible type {type(value)}.")
200
+ cleaned_metadatas.append(cleaned_dict)
201
+ # -----------------------------------------
202
+
203
+ collection.add(
204
+ ids=batch_df['id'].tolist(),
205
+ embeddings=batch_df['embedding'].tolist(),
206
+ documents=batch_df['document'].tolist(),
207
+ metadatas=cleaned_metadatas # Use cleaned list
208
+ )
209
+ except Exception as e:
210
  logging.error(f"Error adding batch {i+1}/{num_batches} to in-memory Chroma: {e}")
211
  error_count += 1
212
  progress_bar.progress((i + 1) / num_batches, text=f"Loading embeddings... Batch {i+1}/{num_batches}")