Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import datetime
|
|
4 |
import io
|
5 |
import nltk
|
6 |
import base64
|
|
|
7 |
from nltk.tokenize import sent_tokenize
|
8 |
from sklearn.feature_extraction.text import CountVectorizer
|
9 |
from sklearn.decomposition import LatentDirichletAllocation
|
@@ -37,36 +38,45 @@ def save_list_as_excel(text):
|
|
37 |
def get_download_link(file_path):
|
38 |
with open(file_path, 'rb') as f:
|
39 |
data = f.read()
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
|
44 |
def perform_nlp(text):
|
45 |
sentences = sent_tokenize(text)
|
46 |
-
|
47 |
# Topic Modeling
|
48 |
vectorizer = CountVectorizer(stop_words='english')
|
49 |
X = vectorizer.fit_transform(sentences)
|
50 |
lda = LatentDirichletAllocation(n_components=3, random_state=42)
|
51 |
lda.fit(X)
|
52 |
topics = lda.transform(X)
|
53 |
-
|
54 |
# Display topics
|
55 |
st.subheader("Topic Modeling")
|
56 |
for i, topic in enumerate(topics):
|
57 |
st.write(f"Topic {i+1}:")
|
58 |
topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
|
59 |
st.write(topic_words)
|
60 |
-
|
61 |
# Word Frequency
|
62 |
word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
|
63 |
st.subheader("Word Frequency")
|
64 |
st.bar_chart(word_freq)
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
def main():
|
67 |
st.title("AI UI for Text Processing")
|
68 |
-
|
69 |
text_input = st.text_area("Paste your text here")
|
|
|
70 |
if st.button("Process Text"):
|
71 |
if text_input.strip() == "":
|
72 |
st.warning("Please paste some text.")
|
@@ -74,20 +84,30 @@ def main():
|
|
74 |
file_name = None
|
75 |
if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
|
76 |
file_name = save_list_as_excel(text_input)
|
|
|
|
|
77 |
elif "." in text_input or "!" in text_input or "?" in text_input:
|
78 |
file_name = save_text_as_file(text_input, "txt")
|
|
|
|
|
79 |
perform_nlp(text_input)
|
80 |
else:
|
81 |
file_name = save_text_as_file(text_input, "txt")
|
82 |
-
|
|
|
|
|
83 |
if file_name:
|
84 |
try:
|
85 |
df = pd.read_excel(file_name)
|
86 |
st.subheader("Saved Data")
|
87 |
st.dataframe(df)
|
88 |
st.markdown(get_download_link(file_name), unsafe_allow_html=True)
|
|
|
|
|
89 |
except:
|
90 |
pass
|
91 |
|
|
|
|
|
92 |
if __name__ == "__main__":
|
93 |
main()
|
|
|
4 |
import io
|
5 |
import nltk
|
6 |
import base64
|
7 |
+
import os
|
8 |
from nltk.tokenize import sent_tokenize
|
9 |
from sklearn.feature_extraction.text import CountVectorizer
|
10 |
from sklearn.decomposition import LatentDirichletAllocation
|
|
|
38 |
def get_download_link(file_path):
|
39 |
with open(file_path, 'rb') as f:
|
40 |
data = f.read()
|
41 |
+
b64 = base64.b64encode(data).decode()
|
42 |
+
href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_path}">Download {file_path}</a>'
|
43 |
+
return href
|
44 |
|
45 |
def perform_nlp(text):
|
46 |
sentences = sent_tokenize(text)
|
|
|
47 |
# Topic Modeling
|
48 |
vectorizer = CountVectorizer(stop_words='english')
|
49 |
X = vectorizer.fit_transform(sentences)
|
50 |
lda = LatentDirichletAllocation(n_components=3, random_state=42)
|
51 |
lda.fit(X)
|
52 |
topics = lda.transform(X)
|
|
|
53 |
# Display topics
|
54 |
st.subheader("Topic Modeling")
|
55 |
for i, topic in enumerate(topics):
|
56 |
st.write(f"Topic {i+1}:")
|
57 |
topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
|
58 |
st.write(topic_words)
|
|
|
59 |
# Word Frequency
|
60 |
word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
|
61 |
st.subheader("Word Frequency")
|
62 |
st.bar_chart(word_freq)
|
63 |
|
64 |
+
def show_files_in_directory():
|
65 |
+
st.subheader("Files in Current Directory")
|
66 |
+
files = []
|
67 |
+
for file in os.listdir("."):
|
68 |
+
if file.endswith((".md", ".xlsx", ".csv")):
|
69 |
+
file_size = os.path.getsize(file)
|
70 |
+
file_modified_time = datetime.datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d %H:%M:%S")
|
71 |
+
files.append({"File Name": file, "Size (bytes)": file_size, "Last Modified": file_modified_time})
|
72 |
+
files_df = pd.DataFrame(files)
|
73 |
+
files_df["File Name"] = files_df["File Name"].apply(lambda x: f'<a href="{x}" download>{x}</a>')
|
74 |
+
st.write(files_df.to_html(escape=False, index=False), unsafe_allow_html=True)
|
75 |
+
|
76 |
def main():
|
77 |
st.title("AI UI for Text Processing")
|
|
|
78 |
text_input = st.text_area("Paste your text here")
|
79 |
+
|
80 |
if st.button("Process Text"):
|
81 |
if text_input.strip() == "":
|
82 |
st.warning("Please paste some text.")
|
|
|
84 |
file_name = None
|
85 |
if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
|
86 |
file_name = save_list_as_excel(text_input)
|
87 |
+
save_text_as_file(text_input, "csv")
|
88 |
+
save_text_as_file(text_input, "md")
|
89 |
elif "." in text_input or "!" in text_input or "?" in text_input:
|
90 |
file_name = save_text_as_file(text_input, "txt")
|
91 |
+
save_text_as_file(text_input, "csv")
|
92 |
+
save_text_as_file(text_input, "md")
|
93 |
perform_nlp(text_input)
|
94 |
else:
|
95 |
file_name = save_text_as_file(text_input, "txt")
|
96 |
+
save_text_as_file(text_input, "csv")
|
97 |
+
save_text_as_file(text_input, "md")
|
98 |
+
|
99 |
if file_name:
|
100 |
try:
|
101 |
df = pd.read_excel(file_name)
|
102 |
st.subheader("Saved Data")
|
103 |
st.dataframe(df)
|
104 |
st.markdown(get_download_link(file_name), unsafe_allow_html=True)
|
105 |
+
st.markdown(get_download_link(file_name.replace(".xlsx", ".csv")), unsafe_allow_html=True)
|
106 |
+
st.markdown(get_download_link(file_name.replace(".xlsx", ".md")), unsafe_allow_html=True)
|
107 |
except:
|
108 |
pass
|
109 |
|
110 |
+
show_files_in_directory()
|
111 |
+
|
112 |
if __name__ == "__main__":
|
113 |
main()
|