Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -210,14 +210,16 @@ if fetch_data:
|
|
210 |
st.success("All records are complete!")
|
211 |
|
212 |
st.subheader("Suggested Metadata Enhancements")
|
213 |
-
|
|
|
|
|
214 |
if len(filled_descriptions) > 1:
|
215 |
try:
|
216 |
tfidf = TfidfVectorizer(stop_words='english')
|
217 |
tfidf_matrix = tfidf.fit_transform(filled_descriptions)
|
218 |
suggestions = []
|
219 |
-
for idx, row in
|
220 |
-
if pd.notna(row['description']):
|
221 |
desc_vec = tfidf.transform([str(row['description'])])
|
222 |
sims = cosine_similarity(desc_vec, tfidf_matrix).flatten()
|
223 |
top_idx = sims.argmax()
|
|
|
210 |
st.success("All records are complete!")
|
211 |
|
212 |
st.subheader("Suggested Metadata Enhancements")
|
213 |
+
incomplete_with_desc = incomplete_records[incomplete_records['description'].notnull()]
|
214 |
+
reference_df = metadata_df[metadata_df['subject'].notnull() & metadata_df['description'].notnull()]
|
215 |
+
tfidf_matrix = tfidf.fit_transform(reference_df['description'])
|
216 |
if len(filled_descriptions) > 1:
|
217 |
try:
|
218 |
tfidf = TfidfVectorizer(stop_words='english')
|
219 |
tfidf_matrix = tfidf.fit_transform(filled_descriptions)
|
220 |
suggestions = []
|
221 |
+
for idx, row in incomplete_with_desc.iterrows():
|
222 |
+
if pd.isna(row['subject']) and pd.notna(row['description']):
|
223 |
desc_vec = tfidf.transform([str(row['description'])])
|
224 |
sims = cosine_similarity(desc_vec, tfidf_matrix).flatten()
|
225 |
top_idx = sims.argmax()
|