Spaces:

dhanikitkat
/

demo-topic-detection

Sleeping

App Files Files Community

dhanikitkat commited on Feb 1, 2024

Commit

9c624ca

verified ·

1 Parent(s): fb039e5

Upload 9 files

Browse files

Files changed (9) hide show

app.py +64 -0
corpus_tfidf_pos.mm +0 -0
corpus_tfidf_pos.mm.index +0 -0
dictionary.dict +0 -0
lda.model +3 -0
lda.model.expElogbeta.npy +3 -0
lda.model.id2word +0 -0
lda.model.state +0 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import streamlit as st
+import re
+import pandas as pd
+from transformers import pipeline
+from gensim.models import LdaModel
+from gensim.corpora import Dictionary
+# Function to preprocess text
+def text_preprocess(teks):
+    teks = teks.lower()
+    teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
+    teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
+    teks = re.sub(r"\\n", " ", teks)
+    teks = teks.strip()
+    teks = re.sub(r"http\S+", " ", teks)
+    teks = re.sub(r"www.\S+", " ", teks)
+    teks = re.sub("[^A-Za-z\s']", " ", teks)
+    return teks
+# Function to perform inference and get the topic with the highest probability
+def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names):
+    new_bow = dictionary.doc2bow(new_document.split())
+    topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0)
+    highest_probability_topic = max(topic_distribution, key=lambda x: x[1])
+    topic_id, probability = highest_probability_topic
+    topic_name = topic_names.get(topic_id, f"Topic {topic_id}")
+    return topic_name, probability
+# Load sentiment analysis model
+pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier"
+nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name)
+# Streamlit app
+def main():
+    st.title("Sentiment Analysis and Topic Inference App")
+    st.write("Enter your text below:")
+    input_text = st.text_area("Input Text")
+    if st.button("Analyze Sentiment"):
+        processed_text = text_preprocess(input_text)
+        result = nlp(processed_text)
+        sentiment = result[0]['label']
+        probability = result[0]['score']
+        st.write("Sentiment:", sentiment)
+        st.write("Probability:", probability)
+    if st.button("Infer Topic"):
+        lda_model = LdaModel.load("lda.model")
+        dictionary = Dictionary.load("dictionary.dict")
+        topic_names = {0: 'User Experience',
+               1: 'App Features',
+               2: 'Questions and Engagement',
+               3: 'Opinion on Banking App',
+               4: 'Mixed Feedback and Technical Issues',
+                }
+        inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names)
+        st.write("Inferred Topic:", inferred_topic)
+        st.write("Inference Probability:", inferred_probability)
+if __name__ == "__main__":
+    main()

corpus_tfidf_pos.mm ADDED Viewed

The diff for this file is too large to render. See raw diff

corpus_tfidf_pos.mm.index ADDED Viewed

Binary file (32.3 kB). View file

dictionary.dict ADDED Viewed

Binary file (33 kB). View file

lda.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:858f495dd7f6f45c574bf2b300a75249cc74a6154a4c5e3efed24c741a68f9d9
+size 8478

lda.model.expElogbeta.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5e046c650d7dc819e07427f7ae23d682683eed9ec63de27ce26ee81b4362c04
+size 22928

lda.model.id2word ADDED Viewed

Binary file (32.7 kB). View file

lda.model.state ADDED Viewed

Binary file (28 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+tensorflow
+pandas
+gensim
+transformers
+nltk