|
import streamlit as st |
|
import pandas as pd |
|
import datetime |
|
import io |
|
import nltk |
|
from nltk.tokenize import sent_tokenize |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
from sklearn.decomposition import LatentDirichletAllocation |
|
|
|
nltk.download('punkt') |
|
|
|
def save_text_as_file(text, file_type): |
|
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") |
|
file_name = f"text_file_{current_time}.{file_type}" |
|
with open(file_name, "w") as file: |
|
file.write(text) |
|
st.success(f"Text saved as {file_name}") |
|
|
|
def save_csv_as_excel(text): |
|
try: |
|
df = pd.read_csv(io.StringIO(text), header=None) |
|
if df.iloc[0].dtype == object: |
|
header = 0 |
|
file_name = f"csv_with_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" |
|
else: |
|
header = None |
|
file_name = f"csv_without_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" |
|
df.to_excel(file_name, index=False, header=header) |
|
st.success(f"CSV data saved as {file_name}") |
|
st.dataframe(df) |
|
except pd.errors.EmptyDataError: |
|
st.error("The pasted text does not contain valid CSV data.") |
|
|
|
def split_sentences(text): |
|
sentences = sent_tokenize(text) |
|
return "\n".join(sentences) |
|
|
|
def perform_nlp(text): |
|
sentences = sent_tokenize(text) |
|
|
|
|
|
vectorizer = CountVectorizer(stop_words='english') |
|
X = vectorizer.fit_transform(sentences) |
|
lda = LatentDirichletAllocation(n_components=3, random_state=42) |
|
lda.fit(X) |
|
topics = lda.transform(X) |
|
|
|
|
|
st.subheader("Topic Modeling") |
|
for i, topic in enumerate(topics): |
|
st.write(f"Topic {i+1}:") |
|
topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]]) |
|
st.write(topic_words) |
|
|
|
|
|
word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10] |
|
st.subheader("Word Frequency") |
|
st.bar_chart(word_freq) |
|
|
|
def main(): |
|
st.title("AI UI for Text Processing") |
|
|
|
text_input = st.text_area("Paste your text here") |
|
if st.button("Process Text"): |
|
if text_input.strip() == "": |
|
st.warning("Please paste some text.") |
|
else: |
|
if "," in text_input or "\t" in text_input: |
|
save_csv_as_excel(text_input) |
|
elif "." in text_input or "!" in text_input or "?" in text_input: |
|
sentences = split_sentences(text_input) |
|
st.subheader("Sentences") |
|
st.write(sentences) |
|
perform_nlp(text_input) |
|
else: |
|
save_text_as_file(text_input, "txt") |
|
|
|
if __name__ == "__main__": |
|
main() |