Spaces:
Runtime error
Runtime error
File size: 1,426 Bytes
a034352 b1f52ee a034352 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import streamlit as st
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import matplotlib.pyplot as plt
nltk.download('punkt')
def process_text(text):
# Tokenize the text
tokens = word_tokenize(text)
# Remove stopwords
stop_words = set(stopwords.words("english"))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
# Calculate word frequency
fdist = FreqDist(filtered_tokens)
# Get the top 10 most common words
top_words = fdist.most_common(10)
return top_words
def main():
st.title("NLTK Graph Visualization")
# Upload file
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
if uploaded_file is not None:
# Read file contents
text = uploaded_file.read().decode("utf-8")
# Process the text
top_words = process_text(text)
# Plot word frequency graph
words, frequencies = zip(*top_words)
plt.bar(words, frequencies)
plt.xticks(rotation=45)
plt.xlabel("Words")
plt.ylabel("Frequency")
plt.title("Top 10 Most Common Words")
st.pyplot()
# Display the top words
st.subheader("Top 10 Most Common Words")
for word, frequency in top_words:
st.write(f"- {word}: {frequency}")
if __name__ == "__main__":
main() |