CCockrum commited on
Commit
909496d
·
verified ·
1 Parent(s): e911334

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -210,14 +210,16 @@ if fetch_data:
210
  st.success("All records are complete!")
211
 
212
  st.subheader("Suggested Metadata Enhancements")
213
- filled_descriptions = metadata_df[metadata_df['description'].notnull()]['description'].astype(str)
 
 
214
  if len(filled_descriptions) > 1:
215
  try:
216
  tfidf = TfidfVectorizer(stop_words='english')
217
  tfidf_matrix = tfidf.fit_transform(filled_descriptions)
218
  suggestions = []
219
- for idx, row in incomplete_records.iterrows():
220
- if pd.notna(row['description']):
221
  desc_vec = tfidf.transform([str(row['description'])])
222
  sims = cosine_similarity(desc_vec, tfidf_matrix).flatten()
223
  top_idx = sims.argmax()
 
210
  st.success("All records are complete!")
211
 
212
  st.subheader("Suggested Metadata Enhancements")
213
+ incomplete_with_desc = incomplete_records[incomplete_records['description'].notnull()]
214
+ reference_df = metadata_df[metadata_df['subject'].notnull() & metadata_df['description'].notnull()]
215
+ tfidf_matrix = tfidf.fit_transform(reference_df['description'])
216
  if len(filled_descriptions) > 1:
217
  try:
218
  tfidf = TfidfVectorizer(stop_words='english')
219
  tfidf_matrix = tfidf.fit_transform(filled_descriptions)
220
  suggestions = []
221
+ for idx, row in incomplete_with_desc.iterrows():
222
+ if pd.isna(row['subject']) and pd.notna(row['description']):
223
  desc_vec = tfidf.transform([str(row['description'])])
224
  sims = cosine_similarity(desc_vec, tfidf_matrix).flatten()
225
  top_idx = sims.argmax()