awacke1 commited on
Commit
a034352
·
1 Parent(s): 31125f2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import nltk
3
+ from nltk.corpus import stopwords
4
+ from nltk.tokenize import word_tokenize
5
+ from nltk.probability import FreqDist
6
+ import matplotlib.pyplot as plt
7
+
8
+
9
+ def process_text(text):
10
+ # Tokenize the text
11
+ tokens = word_tokenize(text)
12
+
13
+ # Remove stopwords
14
+ stop_words = set(stopwords.words("english"))
15
+ filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
16
+
17
+ # Calculate word frequency
18
+ fdist = FreqDist(filtered_tokens)
19
+
20
+ # Get the top 10 most common words
21
+ top_words = fdist.most_common(10)
22
+
23
+ return top_words
24
+
25
+
26
+ def main():
27
+ st.title("NLTK Graph Visualization")
28
+
29
+ # Upload file
30
+ uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
31
+
32
+ if uploaded_file is not None:
33
+ # Read file contents
34
+ text = uploaded_file.read().decode("utf-8")
35
+
36
+ # Process the text
37
+ top_words = process_text(text)
38
+
39
+ # Plot word frequency graph
40
+ words, frequencies = zip(*top_words)
41
+ plt.bar(words, frequencies)
42
+ plt.xticks(rotation=45)
43
+ plt.xlabel("Words")
44
+ plt.ylabel("Frequency")
45
+ plt.title("Top 10 Most Common Words")
46
+ st.pyplot()
47
+
48
+ # Display the top words
49
+ st.subheader("Top 10 Most Common Words")
50
+ for word, frequency in top_words:
51
+ st.write(f"- {word}: {frequency}")
52
+
53
+
54
+ if __name__ == "__main__":
55
+ main()