Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,12 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import datetime
|
4 |
import io
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def save_text_as_file(text, file_type):
|
7 |
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
@@ -21,41 +27,38 @@ def save_csv_as_excel(text):
|
|
21 |
file_name = f"csv_without_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
22 |
df.to_excel(file_name, index=False, header=header)
|
23 |
st.success(f"CSV data saved as {file_name}")
|
|
|
24 |
except pd.errors.EmptyDataError:
|
25 |
st.error("The pasted text does not contain valid CSV data.")
|
26 |
|
27 |
-
def
|
28 |
-
|
29 |
-
|
30 |
-
"Age": [25, 30, 35],
|
31 |
-
"City": ["New York", "London", "Paris"]
|
32 |
-
}
|
33 |
-
df = pd.DataFrame(data)
|
34 |
-
file_name = "demo_excel_file.xlsx"
|
35 |
-
df.to_excel(file_name, index=False)
|
36 |
-
st.success(f"Demo Excel file created: {file_name}")
|
37 |
|
38 |
-
def
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
| 6. Aggregate data | `df_agg = df.groupby('Column').mean()` |
|
52 |
-
| 7. Create Plotly graph | `fig = px.bar(df, x='Column1', y='Column2')` |
|
53 |
-
| 8. Map values | `df['Column'] = df['Column'].map({'Old': 'New'})` |
|
54 |
-
| 9. Capitalize column | `df['Column'] = df['Column'].str.capitalize()` |
|
55 |
-
| 10. Filter DataFrame | `df_filtered = df[df['Column'] > 10]` |
|
56 |
-
| 11. Sort DataFrame | `df_sorted = df.sort_values('Column')` |
|
57 |
-
| 12. Merge DataFrames | `df_merged = pd.merge(df1, df2, on='Column')` |
|
58 |
-
""")
|
59 |
|
60 |
text_input = st.text_area("Paste your text here")
|
61 |
if st.button("Process Text"):
|
@@ -64,15 +67,13 @@ def main():
|
|
64 |
else:
|
65 |
if "," in text_input or "\t" in text_input:
|
66 |
save_csv_as_excel(text_input)
|
67 |
-
elif
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
71 |
else:
|
72 |
save_text_as_file(text_input, "txt")
|
73 |
-
|
74 |
-
if st.button("Create Demo Excel File"):
|
75 |
-
create_demo_excel_file()
|
76 |
|
77 |
if __name__ == "__main__":
|
78 |
main()
|
|
|
2 |
import pandas as pd
|
3 |
import datetime
|
4 |
import io
|
5 |
+
import nltk
|
6 |
+
from nltk.tokenize import sent_tokenize
|
7 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
8 |
+
from sklearn.decomposition import LatentDirichletAllocation
|
9 |
+
|
10 |
+
nltk.download('punkt')
|
11 |
|
12 |
def save_text_as_file(text, file_type):
|
13 |
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
27 |
file_name = f"csv_without_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
28 |
df.to_excel(file_name, index=False, header=header)
|
29 |
st.success(f"CSV data saved as {file_name}")
|
30 |
+
st.dataframe(df)
|
31 |
except pd.errors.EmptyDataError:
|
32 |
st.error("The pasted text does not contain valid CSV data.")
|
33 |
|
34 |
+
def split_sentences(text):
|
35 |
+
sentences = sent_tokenize(text)
|
36 |
+
return "\n".join(sentences)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
def perform_nlp(text):
|
39 |
+
sentences = sent_tokenize(text)
|
40 |
+
|
41 |
+
# Topic Modeling
|
42 |
+
vectorizer = CountVectorizer(stop_words='english')
|
43 |
+
X = vectorizer.fit_transform(sentences)
|
44 |
+
lda = LatentDirichletAllocation(n_components=3, random_state=42)
|
45 |
+
lda.fit(X)
|
46 |
+
topics = lda.transform(X)
|
47 |
|
48 |
+
# Display topics
|
49 |
+
st.subheader("Topic Modeling")
|
50 |
+
for i, topic in enumerate(topics):
|
51 |
+
st.write(f"Topic {i+1}:")
|
52 |
+
topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
|
53 |
+
st.write(topic_words)
|
54 |
|
55 |
+
# Word Frequency
|
56 |
+
word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
|
57 |
+
st.subheader("Word Frequency")
|
58 |
+
st.bar_chart(word_freq)
|
59 |
+
|
60 |
+
def main():
|
61 |
+
st.title("AI UI for Text Processing")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
text_input = st.text_area("Paste your text here")
|
64 |
if st.button("Process Text"):
|
|
|
67 |
else:
|
68 |
if "," in text_input or "\t" in text_input:
|
69 |
save_csv_as_excel(text_input)
|
70 |
+
elif "." in text_input or "!" in text_input or "?" in text_input:
|
71 |
+
sentences = split_sentences(text_input)
|
72 |
+
st.subheader("Sentences")
|
73 |
+
st.write(sentences)
|
74 |
+
perform_nlp(text_input)
|
75 |
else:
|
76 |
save_text_as_file(text_input, "txt")
|
|
|
|
|
|
|
77 |
|
78 |
if __name__ == "__main__":
|
79 |
main()
|