from sklearn.decomposition import NMF from sklearn.feature_extraction.text import CountVectorizer from sklearn.pipeline import Pipeline bow_vectorizer = CountVectorizer() nmf = NMF(n_components=10) topic_pipeline = Pipeline( [ ("bow", bow_vectorizer), ("nmf", nmf), ] ) st.subheader("Topic Modeling with Topic-Wizard") uploaded_file = st.file_uploader("choose a text file", type=["txt"]) if uploaded_file is not None: st.session_state["text"] = uploaded_file.getvalue().decode('utf-8') st.write("OR") input_text = st.text_area( label="Enter text separated by newlines", value="", key="text", height=150 ) button=st.button('Get Segments') if (button==True) and input_text != "": texts = input_text.split('\n') sents = [] for text in texts: doc = nlp(text) for sent in doc.sents: sents.append(sent) topic_pipeline.fit(st.session_state["text"]) import topicwizard topicwizard.visualize(pipeline=topic_pipeline, corpus=texts)