Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -177,36 +177,36 @@ def load_data_and_setup_chroma():
|
|
177 |
parsed_metadatas.append(parsed if isinstance(parsed, dict) else {})
|
178 |
except:
|
179 |
parsed_metadatas.append({})
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
logging.error(f"Error adding batch {i+1}/{num_batches} to in-memory Chroma: {e}")
|
211 |
error_count += 1
|
212 |
progress_bar.progress((i + 1) / num_batches, text=f"Loading embeddings... Batch {i+1}/{num_batches}")
|
|
|
177 |
parsed_metadatas.append(parsed if isinstance(parsed, dict) else {})
|
178 |
except:
|
179 |
parsed_metadatas.append({})
|
180 |
+
metadatas_list = parsed_metadatas # This line has the wrong indentation
|
181 |
+
|
182 |
+
# --- Clean None values from metadata ---
|
183 |
+
cleaned_metadatas = []
|
184 |
+
for meta_dict in metadatas_list:
|
185 |
+
cleaned_dict = {}
|
186 |
+
if isinstance(meta_dict, dict):
|
187 |
+
for key, value in meta_dict.items():
|
188 |
+
# Replace None with empty string, keep other valid types
|
189 |
+
if value is None:
|
190 |
+
cleaned_dict[key] = ""
|
191 |
+
elif isinstance(value, (str, int, float, bool)):
|
192 |
+
cleaned_dict[key] = value
|
193 |
+
else:
|
194 |
+
# Attempt to convert other types to string, or skip
|
195 |
+
try:
|
196 |
+
cleaned_dict[key] = str(value)
|
197 |
+
logging.warning(f"Converted unexpected metadata type ({type(value)}) to string for key '{key}'.")
|
198 |
+
except:
|
199 |
+
logging.warning(f"Skipping metadata key '{key}' with unconvertible type {type(value)}.")
|
200 |
+
cleaned_metadatas.append(cleaned_dict)
|
201 |
+
# -----------------------------------------
|
202 |
+
|
203 |
+
collection.add(
|
204 |
+
ids=batch_df['id'].tolist(),
|
205 |
+
embeddings=batch_df['embedding'].tolist(),
|
206 |
+
documents=batch_df['document'].tolist(),
|
207 |
+
metadatas=cleaned_metadatas # Use cleaned list
|
208 |
+
)
|
209 |
+
except Exception as e:
|
210 |
logging.error(f"Error adding batch {i+1}/{num_batches} to in-memory Chroma: {e}")
|
211 |
error_count += 1
|
212 |
progress_bar.progress((i + 1) / num_batches, text=f"Loading embeddings... Batch {i+1}/{num_batches}")
|