CCockrum commited on
Commit
a1c68b3
·
verified ·
1 Parent(s): c82546e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -35
app.py CHANGED
@@ -197,44 +197,45 @@ if fetch_data:
197
  incomplete_mask = metadata_df.map(is_incomplete).any(axis=1)
198
  incomplete_records = metadata_df[incomplete_mask]
199
 
200
- st.subheader("Records with Incomplete Metadata")
201
- if not incomplete_records.empty:
202
- st.dataframe(incomplete_records.astype(str))
203
- else:
204
- st.success("All metadata fields are complete in this collection!")
205
-
206
- st.subheader("Identifiers of Items Needing Metadata Updates")
207
- if not incomplete_records.empty:
208
- st.write(incomplete_records[['id', 'title']])
209
- else:
210
- st.success("All records are complete!")
211
 
212
- st.subheader("Suggested Metadata Enhancements")
213
- incomplete_with_desc = incomplete_records[incomplete_records['description'].notnull()]
214
- reference_df = metadata_df[metadata_df['subject'].notnull() & metadata_df['description'].notnull()]
215
- tfidf = TfidfVectorizer(stop_words='english')
216
- try:
217
- suggestions = []
218
- tfidf_matrix = tfidf.fit_transform(reference_df['description'])
 
219
 
220
- for idx, row in incomplete_with_desc.iterrows():
221
- if pd.isna(row['subject']) and pd.notna(row['description']):
222
- desc_vec = tfidf.transform([str(row['description'])])
223
- sims = cosine_similarity(desc_vec, tfidf_matrix).flatten()
224
- top_idx = sims.argmax()
225
- suggested_subject = metadata_df.iloc[top_idx]['subject']
226
- if pd.notna(suggested_subject) and suggested_subject:
227
- suggestions.append((row['title'], suggested_subject))
228
 
229
- if suggestions:
230
- suggestions_df = pd.DataFrame(suggestions, columns=["Title", "Suggested Subject"])
231
- st.table(suggestions_df)
232
- else:
233
- st.markdown("""
234
- <div class='custom-table'>
235
- <b>ℹ️ No metadata enhancement suggestions available.</b>
236
- </div>
237
- """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
238
 
239
  except Exception as e:
240
  st.error(f"Error generating metadata suggestions: {e}")
 
197
  incomplete_mask = metadata_df.map(is_incomplete).any(axis=1)
198
  incomplete_records = metadata_df[incomplete_mask]
199
 
200
+ st.subheader(" Suggested Metadata Enhancements")
 
 
 
 
 
 
 
 
 
 
201
 
202
+ incomplete_with_desc = incomplete_records[incomplete_records['description'].notnull()]
203
+ reference_df = metadata_df[metadata_df['subject'].notnull() & metadata_df['description'].notnull()]
204
+ tfidf = TfidfVectorizer(stop_words='english')
205
+
206
+ if len(incomplete_with_desc) > 1 and len(reference_df) > 1:
207
+ try:
208
+ suggestions = []
209
+ tfidf_matrix = tfidf.fit_transform(reference_df['description'])
210
 
211
+ for idx, row in incomplete_with_desc.iterrows():
212
+ if pd.isna(row['subject']) and pd.notna(row['description']):
213
+ desc_vec = tfidf.transform([str(row['description'])])
214
+ sims = cosine_similarity(desc_vec, tfidf_matrix).flatten()
215
+ top_idx = sims.argmax()
216
+ suggested_subject = reference_df.iloc[top_idx]['subject']
217
+ if pd.notna(suggested_subject) and suggested_subject:
218
+ suggestions.append((row['title'], suggested_subject))
219
 
220
+ if suggestions:
221
+ suggestions_df = pd.DataFrame(suggestions, columns=["Title", "Suggested Subject"])
222
+ st.markdown("<div class='custom-table'>" + suggestions_df.to_markdown(index=False) + "</div>", unsafe_allow_html=True)
223
+ else:
224
+ st.markdown("""
225
+ <div class='custom-table'>
226
+ <b>ℹ️ No metadata enhancement suggestions available.</b>
227
+ </div>
228
+ """, unsafe_allow_html=True)
229
+
230
+ except Exception as e:
231
+ st.error(f"Error generating metadata suggestions: {e}")
232
+ else:
233
+ st.markdown("""
234
+ <div class='custom-table'>
235
+ <b>ℹ️ Not enough descriptive data to generate metadata suggestions.</b>
236
+ </div>
237
+ """, unsafe_allow_html=True)
238
+
239
 
240
  except Exception as e:
241
  st.error(f"Error generating metadata suggestions: {e}")