Spaces:

awacke1
/

NLPTopicModelerProcess-xlsx-csv-md

Sleeping

App Files Files Community

awacke1 commited on May 7, 2024

Commit

820215f

verified ·

1 Parent(s): 671c237

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -8

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import datetime
 import io
 import nltk
 import base64
 from nltk.tokenize import sent_tokenize
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
@@ -37,36 +38,45 @@ def save_list_as_excel(text):
 def get_download_link(file_path):
     with open(file_path, 'rb') as f:
         data = f.read()
-    b64 = base64.b64encode(data).decode()
-    href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_path}">Download {file_path}</a>'
-    return href
 def perform_nlp(text):
     sentences = sent_tokenize(text)
     # Topic Modeling
     vectorizer = CountVectorizer(stop_words='english')
     X = vectorizer.fit_transform(sentences)
     lda = LatentDirichletAllocation(n_components=3, random_state=42)
     lda.fit(X)
     topics = lda.transform(X)
     # Display topics
     st.subheader("Topic Modeling")
     for i, topic in enumerate(topics):
         st.write(f"Topic {i+1}:")
         topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
         st.write(topic_words)
     # Word Frequency
     word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
     st.subheader("Word Frequency")
     st.bar_chart(word_freq)
 def main():
     st.title("AI UI for Text Processing")
     text_input = st.text_area("Paste your text here")
     if st.button("Process Text"):
         if text_input.strip() == "":
             st.warning("Please paste some text.")
@@ -74,20 +84,30 @@ def main():
             file_name = None
             if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
                 file_name = save_list_as_excel(text_input)
             elif "." in text_input or "!" in text_input or "?" in text_input:
                 file_name = save_text_as_file(text_input, "txt")
                 perform_nlp(text_input)
             else:
                 file_name = save_text_as_file(text_input, "txt")
             if file_name:
                 try:
                     df = pd.read_excel(file_name)
                     st.subheader("Saved Data")
                     st.dataframe(df)
                     st.markdown(get_download_link(file_name), unsafe_allow_html=True)
                 except:
                     pass
 if __name__ == "__main__":
     main()

 import io
 import nltk
 import base64
+import os
 from nltk.tokenize import sent_tokenize
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
 def get_download_link(file_path):
     with open(file_path, 'rb') as f:
         data = f.read()
+        b64 = base64.b64encode(data).decode()
+        href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_path}">Download {file_path}</a>'
+        return href
 def perform_nlp(text):
     sentences = sent_tokenize(text)
     # Topic Modeling
     vectorizer = CountVectorizer(stop_words='english')
     X = vectorizer.fit_transform(sentences)
     lda = LatentDirichletAllocation(n_components=3, random_state=42)
     lda.fit(X)
     topics = lda.transform(X)
     # Display topics
     st.subheader("Topic Modeling")
     for i, topic in enumerate(topics):
         st.write(f"Topic {i+1}:")
         topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
         st.write(topic_words)
     # Word Frequency
     word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
     st.subheader("Word Frequency")
     st.bar_chart(word_freq)
+def show_files_in_directory():
+    st.subheader("Files in Current Directory")
+    files = []
+    for file in os.listdir("."):
+        if file.endswith((".md", ".xlsx", ".csv")):
+            file_size = os.path.getsize(file)
+            file_modified_time = datetime.datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d %H:%M:%S")
+            files.append({"File Name": file, "Size (bytes)": file_size, "Last Modified": file_modified_time})
+    files_df = pd.DataFrame(files)
+    files_df["File Name"] = files_df["File Name"].apply(lambda x: f'<a href="{x}" download>{x}</a>')
+    st.write(files_df.to_html(escape=False, index=False), unsafe_allow_html=True)
 def main():
     st.title("AI UI for Text Processing")
     text_input = st.text_area("Paste your text here")
     if st.button("Process Text"):
         if text_input.strip() == "":
             st.warning("Please paste some text.")
             file_name = None
             if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
                 file_name = save_list_as_excel(text_input)
+                save_text_as_file(text_input, "csv")
+                save_text_as_file(text_input, "md")
             elif "." in text_input or "!" in text_input or "?" in text_input:
                 file_name = save_text_as_file(text_input, "txt")
+                save_text_as_file(text_input, "csv")
+                save_text_as_file(text_input, "md")
                 perform_nlp(text_input)
             else:
                 file_name = save_text_as_file(text_input, "txt")
+                save_text_as_file(text_input, "csv")
+                save_text_as_file(text_input, "md")
             if file_name:
                 try:
                     df = pd.read_excel(file_name)
                     st.subheader("Saved Data")
                     st.dataframe(df)
                     st.markdown(get_download_link(file_name), unsafe_allow_html=True)
+                    st.markdown(get_download_link(file_name.replace(".xlsx", ".csv")), unsafe_allow_html=True)
+                    st.markdown(get_download_link(file_name.replace(".xlsx", ".md")), unsafe_allow_html=True)
                 except:
                     pass
+    show_files_in_directory()
 if __name__ == "__main__":
     main()