Spaces:
Runtime error
Runtime error
import streamlit as st | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.probability import FreqDist | |
import matplotlib.pyplot as plt | |
nltk.download('punkt') | |
def process_text(text): | |
# Tokenize the text | |
tokens = word_tokenize(text) | |
# Remove stopwords | |
stop_words = set(stopwords.words("english")) | |
filtered_tokens = [word for word in tokens if word.lower() not in stop_words] | |
# Calculate word frequency | |
fdist = FreqDist(filtered_tokens) | |
# Get the top 10 most common words | |
top_words = fdist.most_common(10) | |
return top_words | |
def main(): | |
st.title("NLTK Graph Visualization") | |
# Upload file | |
uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) | |
if uploaded_file is not None: | |
# Read file contents | |
text = uploaded_file.read().decode("utf-8") | |
# Process the text | |
top_words = process_text(text) | |
# Plot word frequency graph | |
words, frequencies = zip(*top_words) | |
plt.bar(words, frequencies) | |
plt.xticks(rotation=45) | |
plt.xlabel("Words") | |
plt.ylabel("Frequency") | |
plt.title("Top 10 Most Common Words") | |
st.pyplot() | |
# Display the top words | |
st.subheader("Top 10 Most Common Words") | |
for word, frequency in top_words: | |
st.write(f"- {word}: {frequency}") | |
if __name__ == "__main__": | |
main() |