awacke1 commited on
Commit
3a2fc8d
·
1 Parent(s): 30afcdf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -1,13 +1,9 @@
1
  import streamlit as st
2
- from sklearn.decomposition import NMF
3
- from sklearn.feature_extraction.text import CountVectorizer
4
- from sklearn.pipeline import Pipeline
5
  from bertopic import BERTopic
6
  import streamlit.components.v1 as components
7
  from sentence_transformers import SentenceTransformer
8
  from umap import UMAP
9
  from hdbscan import HDBSCAN
10
- from sklearn.feature_extraction.text import CountVectorizer
11
 
12
  # Initialize BERTopic model
13
  model = BERTopic()
@@ -56,12 +52,14 @@ if button and (uploaded_file is not None or input_text != ""):
56
 
57
  # Display top N most representative topics and their documents
58
  num_topics = st.sidebar.slider("Select number of topics to display", 1, 20, 5, 1)
59
- topic_words, topic_docs = model.get_topics(num_topics=num_topics, with_documents=True)
60
- for i, topic in enumerate(topic_words):
61
- st.write(f"## Topic {i}")
62
- st.write("Keywords:", ", ".join(topic))
 
 
63
  st.write("Documents:")
64
- for doc in topic_docs[i][:5]:
65
  st.write("-", texts[doc])
66
 
67
  # Display topic clusters
 
1
  import streamlit as st
 
 
 
2
  from bertopic import BERTopic
3
  import streamlit.components.v1 as components
4
  from sentence_transformers import SentenceTransformer
5
  from umap import UMAP
6
  from hdbscan import HDBSCAN
 
7
 
8
  # Initialize BERTopic model
9
  model = BERTopic()
 
52
 
53
  # Display top N most representative topics and their documents
54
  num_topics = st.sidebar.slider("Select number of topics to display", 1, 20, 5, 1)
55
+ topic_words, topic_docs = model.get_topics(with_documents=True)
56
+ for i, topic in enumerate(topic_words.items()):
57
+ if i >= num_topics:
58
+ break
59
+ st.write(f"## Topic {topic[0]}")
60
+ st.write("Keywords:", ", ".join(topic[1]))
61
  st.write("Documents:")
62
+ for doc in topic_docs[topic[0]][:5]:
63
  st.write("-", texts[doc])
64
 
65
  # Display topic clusters