import streamlit as st import pandas as pd import datetime import io import nltk from nltk.tokenize import sent_tokenize from sklearn.feature_extraction.text import CountVectorizer from sklearn.decomposition import LatentDirichletAllocation nltk.download('punkt') def save_text_as_file(text, file_type): current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") file_name = f"text_file_{current_time}.{file_type}" with open(file_name, "w") as file: file.write(text) st.success(f"Text saved as {file_name}") def save_csv_as_excel(text): try: df = pd.read_csv(io.StringIO(text), header=None) if df.iloc[0].dtype == object: header = 0 file_name = f"csv_with_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" else: header = None file_name = f"csv_without_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" df.to_excel(file_name, index=False, header=header) st.success(f"CSV data saved as {file_name}") st.dataframe(df) except pd.errors.EmptyDataError: st.error("The pasted text does not contain valid CSV data.") def split_sentences(text): sentences = sent_tokenize(text) return "\n".join(sentences) def perform_nlp(text): sentences = sent_tokenize(text) # Topic Modeling vectorizer = CountVectorizer(stop_words='english') X = vectorizer.fit_transform(sentences) lda = LatentDirichletAllocation(n_components=3, random_state=42) lda.fit(X) topics = lda.transform(X) # Display topics st.subheader("Topic Modeling") for i, topic in enumerate(topics): st.write(f"Topic {i+1}:") topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]]) st.write(topic_words) # Word Frequency word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10] st.subheader("Word Frequency") st.bar_chart(word_freq) def main(): st.title("AI UI for Text Processing") text_input = st.text_area("Paste your text here") if st.button("Process Text"): if text_input.strip() == "": st.warning("Please paste some text.") else: if "," in text_input or "\t" in text_input: save_csv_as_excel(text_input) elif "." in text_input or "!" in text_input or "?" in text_input: sentences = split_sentences(text_input) st.subheader("Sentences") st.write(sentences) perform_nlp(text_input) else: save_text_as_file(text_input, "txt") if __name__ == "__main__": main()