File size: 1,426 Bytes
a034352
 
 
 
 
 
 
b1f52ee
a034352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import streamlit as st
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import matplotlib.pyplot as plt

nltk.download('punkt')

def process_text(text):
    # Tokenize the text
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words("english"))
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

    # Calculate word frequency
    fdist = FreqDist(filtered_tokens)

    # Get the top 10 most common words
    top_words = fdist.most_common(10)

    return top_words


def main():
    st.title("NLTK Graph Visualization")

    # Upload file
    uploaded_file = st.file_uploader("Upload a text file", type=["txt"])

    if uploaded_file is not None:
        # Read file contents
        text = uploaded_file.read().decode("utf-8")

        # Process the text
        top_words = process_text(text)

        # Plot word frequency graph
        words, frequencies = zip(*top_words)
        plt.bar(words, frequencies)
        plt.xticks(rotation=45)
        plt.xlabel("Words")
        plt.ylabel("Frequency")
        plt.title("Top 10 Most Common Words")
        st.pyplot()

        # Display the top words
        st.subheader("Top 10 Most Common Words")
        for word, frequency in top_words:
            st.write(f"- {word}: {frequency}")


if __name__ == "__main__":
    main()