awacke1 commited on
Commit
820215f
·
verified ·
1 Parent(s): 671c237

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -8
app.py CHANGED
@@ -4,6 +4,7 @@ import datetime
4
  import io
5
  import nltk
6
  import base64
 
7
  from nltk.tokenize import sent_tokenize
8
  from sklearn.feature_extraction.text import CountVectorizer
9
  from sklearn.decomposition import LatentDirichletAllocation
@@ -37,36 +38,45 @@ def save_list_as_excel(text):
37
  def get_download_link(file_path):
38
  with open(file_path, 'rb') as f:
39
  data = f.read()
40
- b64 = base64.b64encode(data).decode()
41
- href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_path}">Download {file_path}</a>'
42
- return href
43
 
44
  def perform_nlp(text):
45
  sentences = sent_tokenize(text)
46
-
47
  # Topic Modeling
48
  vectorizer = CountVectorizer(stop_words='english')
49
  X = vectorizer.fit_transform(sentences)
50
  lda = LatentDirichletAllocation(n_components=3, random_state=42)
51
  lda.fit(X)
52
  topics = lda.transform(X)
53
-
54
  # Display topics
55
  st.subheader("Topic Modeling")
56
  for i, topic in enumerate(topics):
57
  st.write(f"Topic {i+1}:")
58
  topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
59
  st.write(topic_words)
60
-
61
  # Word Frequency
62
  word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
63
  st.subheader("Word Frequency")
64
  st.bar_chart(word_freq)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def main():
67
  st.title("AI UI for Text Processing")
68
-
69
  text_input = st.text_area("Paste your text here")
 
70
  if st.button("Process Text"):
71
  if text_input.strip() == "":
72
  st.warning("Please paste some text.")
@@ -74,20 +84,30 @@ def main():
74
  file_name = None
75
  if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
76
  file_name = save_list_as_excel(text_input)
 
 
77
  elif "." in text_input or "!" in text_input or "?" in text_input:
78
  file_name = save_text_as_file(text_input, "txt")
 
 
79
  perform_nlp(text_input)
80
  else:
81
  file_name = save_text_as_file(text_input, "txt")
82
-
 
 
83
  if file_name:
84
  try:
85
  df = pd.read_excel(file_name)
86
  st.subheader("Saved Data")
87
  st.dataframe(df)
88
  st.markdown(get_download_link(file_name), unsafe_allow_html=True)
 
 
89
  except:
90
  pass
91
 
 
 
92
  if __name__ == "__main__":
93
  main()
 
4
  import io
5
  import nltk
6
  import base64
7
+ import os
8
  from nltk.tokenize import sent_tokenize
9
  from sklearn.feature_extraction.text import CountVectorizer
10
  from sklearn.decomposition import LatentDirichletAllocation
 
38
  def get_download_link(file_path):
39
  with open(file_path, 'rb') as f:
40
  data = f.read()
41
+ b64 = base64.b64encode(data).decode()
42
+ href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_path}">Download {file_path}</a>'
43
+ return href
44
 
45
  def perform_nlp(text):
46
  sentences = sent_tokenize(text)
 
47
  # Topic Modeling
48
  vectorizer = CountVectorizer(stop_words='english')
49
  X = vectorizer.fit_transform(sentences)
50
  lda = LatentDirichletAllocation(n_components=3, random_state=42)
51
  lda.fit(X)
52
  topics = lda.transform(X)
 
53
  # Display topics
54
  st.subheader("Topic Modeling")
55
  for i, topic in enumerate(topics):
56
  st.write(f"Topic {i+1}:")
57
  topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
58
  st.write(topic_words)
 
59
  # Word Frequency
60
  word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
61
  st.subheader("Word Frequency")
62
  st.bar_chart(word_freq)
63
 
64
+ def show_files_in_directory():
65
+ st.subheader("Files in Current Directory")
66
+ files = []
67
+ for file in os.listdir("."):
68
+ if file.endswith((".md", ".xlsx", ".csv")):
69
+ file_size = os.path.getsize(file)
70
+ file_modified_time = datetime.datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d %H:%M:%S")
71
+ files.append({"File Name": file, "Size (bytes)": file_size, "Last Modified": file_modified_time})
72
+ files_df = pd.DataFrame(files)
73
+ files_df["File Name"] = files_df["File Name"].apply(lambda x: f'<a href="{x}" download>{x}</a>')
74
+ st.write(files_df.to_html(escape=False, index=False), unsafe_allow_html=True)
75
+
76
  def main():
77
  st.title("AI UI for Text Processing")
 
78
  text_input = st.text_area("Paste your text here")
79
+
80
  if st.button("Process Text"):
81
  if text_input.strip() == "":
82
  st.warning("Please paste some text.")
 
84
  file_name = None
85
  if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
86
  file_name = save_list_as_excel(text_input)
87
+ save_text_as_file(text_input, "csv")
88
+ save_text_as_file(text_input, "md")
89
  elif "." in text_input or "!" in text_input or "?" in text_input:
90
  file_name = save_text_as_file(text_input, "txt")
91
+ save_text_as_file(text_input, "csv")
92
+ save_text_as_file(text_input, "md")
93
  perform_nlp(text_input)
94
  else:
95
  file_name = save_text_as_file(text_input, "txt")
96
+ save_text_as_file(text_input, "csv")
97
+ save_text_as_file(text_input, "md")
98
+
99
  if file_name:
100
  try:
101
  df = pd.read_excel(file_name)
102
  st.subheader("Saved Data")
103
  st.dataframe(df)
104
  st.markdown(get_download_link(file_name), unsafe_allow_html=True)
105
+ st.markdown(get_download_link(file_name.replace(".xlsx", ".csv")), unsafe_allow_html=True)
106
+ st.markdown(get_download_link(file_name.replace(".xlsx", ".md")), unsafe_allow_html=True)
107
  except:
108
  pass
109
 
110
+ show_files_in_directory()
111
+
112
  if __name__ == "__main__":
113
  main()