awacke1 commited on
Commit
0a2414d
·
verified ·
1 Parent(s): 5defcd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -36
app.py CHANGED
@@ -2,6 +2,12 @@ import streamlit as st
2
  import pandas as pd
3
  import datetime
4
  import io
 
 
 
 
 
 
5
 
6
  def save_text_as_file(text, file_type):
7
  current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -21,41 +27,38 @@ def save_csv_as_excel(text):
21
  file_name = f"csv_without_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
22
  df.to_excel(file_name, index=False, header=header)
23
  st.success(f"CSV data saved as {file_name}")
 
24
  except pd.errors.EmptyDataError:
25
  st.error("The pasted text does not contain valid CSV data.")
26
 
27
- def create_demo_excel_file():
28
- data = {
29
- "Name": ["John", "Alice", "Bob"],
30
- "Age": [25, 30, 35],
31
- "City": ["New York", "London", "Paris"]
32
- }
33
- df = pd.DataFrame(data)
34
- file_name = "demo_excel_file.xlsx"
35
- df.to_excel(file_name, index=False)
36
- st.success(f"Demo Excel file created: {file_name}")
37
 
38
- def main():
39
- st.title("AI UI for Text Processing")
 
 
 
 
 
 
 
40
 
41
- st.markdown("""
42
- ## Pandas Cheat Sheet
 
 
 
 
43
 
44
- | Lesson | Code Example |
45
- |--------|-------------|
46
- | 1. Import xlsx | `df = pd.read_excel('data.xlsx')` |
47
- | 2. Export xlsx | `df.to_excel('output.xlsx', index=False)` |
48
- | 3. Create DataFrame | `df = pd.DataFrame(data)` |
49
- | 4. Apply function to DataFrame | `df['Column'] = df['Column'].apply(lambda x: x.capitalize())` |
50
- | 5. Reduce columns | `df = df[['Column1', 'Column2']]` |
51
- | 6. Aggregate data | `df_agg = df.groupby('Column').mean()` |
52
- | 7. Create Plotly graph | `fig = px.bar(df, x='Column1', y='Column2')` |
53
- | 8. Map values | `df['Column'] = df['Column'].map({'Old': 'New'})` |
54
- | 9. Capitalize column | `df['Column'] = df['Column'].str.capitalize()` |
55
- | 10. Filter DataFrame | `df_filtered = df[df['Column'] > 10]` |
56
- | 11. Sort DataFrame | `df_sorted = df.sort_values('Column')` |
57
- | 12. Merge DataFrames | `df_merged = pd.merge(df1, df2, on='Column')` |
58
- """)
59
 
60
  text_input = st.text_area("Paste your text here")
61
  if st.button("Process Text"):
@@ -64,15 +67,13 @@ def main():
64
  else:
65
  if "," in text_input or "\t" in text_input:
66
  save_csv_as_excel(text_input)
67
- elif text_input.startswith("#") or text_input.startswith("- "):
68
- save_text_as_file(text_input, "md")
69
- elif "def" in text_input or "import" in text_input:
70
- save_text_as_file(text_input, "py")
 
71
  else:
72
  save_text_as_file(text_input, "txt")
73
-
74
- if st.button("Create Demo Excel File"):
75
- create_demo_excel_file()
76
 
77
  if __name__ == "__main__":
78
  main()
 
2
  import pandas as pd
3
  import datetime
4
  import io
5
+ import nltk
6
+ from nltk.tokenize import sent_tokenize
7
+ from sklearn.feature_extraction.text import CountVectorizer
8
+ from sklearn.decomposition import LatentDirichletAllocation
9
+
10
+ nltk.download('punkt')
11
 
12
  def save_text_as_file(text, file_type):
13
  current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 
27
  file_name = f"csv_without_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
28
  df.to_excel(file_name, index=False, header=header)
29
  st.success(f"CSV data saved as {file_name}")
30
+ st.dataframe(df)
31
  except pd.errors.EmptyDataError:
32
  st.error("The pasted text does not contain valid CSV data.")
33
 
34
+ def split_sentences(text):
35
+ sentences = sent_tokenize(text)
36
+ return "\n".join(sentences)
 
 
 
 
 
 
 
37
 
38
+ def perform_nlp(text):
39
+ sentences = sent_tokenize(text)
40
+
41
+ # Topic Modeling
42
+ vectorizer = CountVectorizer(stop_words='english')
43
+ X = vectorizer.fit_transform(sentences)
44
+ lda = LatentDirichletAllocation(n_components=3, random_state=42)
45
+ lda.fit(X)
46
+ topics = lda.transform(X)
47
 
48
+ # Display topics
49
+ st.subheader("Topic Modeling")
50
+ for i, topic in enumerate(topics):
51
+ st.write(f"Topic {i+1}:")
52
+ topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
53
+ st.write(topic_words)
54
 
55
+ # Word Frequency
56
+ word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
57
+ st.subheader("Word Frequency")
58
+ st.bar_chart(word_freq)
59
+
60
+ def main():
61
+ st.title("AI UI for Text Processing")
 
 
 
 
 
 
 
 
62
 
63
  text_input = st.text_area("Paste your text here")
64
  if st.button("Process Text"):
 
67
  else:
68
  if "," in text_input or "\t" in text_input:
69
  save_csv_as_excel(text_input)
70
+ elif "." in text_input or "!" in text_input or "?" in text_input:
71
+ sentences = split_sentences(text_input)
72
+ st.subheader("Sentences")
73
+ st.write(sentences)
74
+ perform_nlp(text_input)
75
  else:
76
  save_text_as_file(text_input, "txt")
 
 
 
77
 
78
  if __name__ == "__main__":
79
  main()