Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,66 @@ def calculate_star_rating(positive_percent):
|
|
17 |
else:
|
18 |
return 1
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def main():
|
21 |
st.set_page_config(page_title="Movie Review Analysis System", page_icon="π¬")
|
22 |
|
@@ -32,31 +92,6 @@ def main():
|
|
32 |
</style>
|
33 |
""", unsafe_allow_html=True)
|
34 |
|
35 |
-
# Model loading
|
36 |
-
with st.spinner("Loading all models, this may take a few minutes..."):
|
37 |
-
try:
|
38 |
-
# Sentiment analysis model
|
39 |
-
classifier = pipeline(
|
40 |
-
"text-classification",
|
41 |
-
model="KeonBlackwell/movie_sentiment_model",
|
42 |
-
tokenizer="distilbert-base-uncased"
|
43 |
-
)
|
44 |
-
|
45 |
-
# Keyphrase extraction model
|
46 |
-
keyphrase_extractor = pipeline(
|
47 |
-
"token-classification",
|
48 |
-
model="ml6team/keyphrase-extraction-distilbert-inspec",
|
49 |
-
aggregation_strategy="simple"
|
50 |
-
)
|
51 |
-
|
52 |
-
# Summarization model
|
53 |
-
summarizer = pipeline("summarization",
|
54 |
-
model="facebook/bart-large-cnn")
|
55 |
-
|
56 |
-
except Exception as e:
|
57 |
-
st.error(f"Model loading failed: {str(e)}")
|
58 |
-
return
|
59 |
-
|
60 |
# Page layout
|
61 |
st.title("π¬ Movie Review Batch Analysis System")
|
62 |
st.markdown("""
|
@@ -91,24 +126,11 @@ def main():
|
|
91 |
progress_bar = st.progress(0)
|
92 |
status_text = st.empty()
|
93 |
|
94 |
-
|
95 |
-
total = len(comments)
|
96 |
-
|
97 |
-
# Batch prediction
|
98 |
try:
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
progress_bar.progress(progress)
|
103 |
-
status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")
|
104 |
-
|
105 |
-
prediction = classifier(comment)[0]
|
106 |
-
results.append({
|
107 |
-
'comment': comment,
|
108 |
-
'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
|
109 |
-
'confidence': prediction['score']
|
110 |
-
})
|
111 |
-
|
112 |
# Convert to DataFrame
|
113 |
result_df = pd.DataFrame(results)
|
114 |
|
@@ -144,10 +166,8 @@ def main():
|
|
144 |
combined_text = " ".join(comments)
|
145 |
|
146 |
# Keyphrase extraction
|
147 |
-
with st.spinner("
|
148 |
-
|
149 |
-
# Sort by confidence and take the top 5
|
150 |
-
top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]
|
151 |
|
152 |
# Show keyphrases
|
153 |
st.markdown("**π Extracted Keyphrases:**")
|
@@ -168,20 +188,12 @@ def main():
|
|
168 |
""", unsafe_allow_html=True)
|
169 |
|
170 |
# Generate summary
|
171 |
-
with st.spinner("
|
172 |
-
|
173 |
-
max_length = 1024 # Maximum input length for the model
|
174 |
-
if len(combined_text) > max_length:
|
175 |
-
combined_text = combined_text[:max_length]
|
176 |
-
|
177 |
-
summary = summarizer(combined_text,
|
178 |
-
max_length=130,
|
179 |
-
min_length=30,
|
180 |
-
do_sample=False)
|
181 |
|
182 |
# Show summary
|
183 |
st.markdown("**π Review Summary:**")
|
184 |
-
st.info(summary
|
185 |
|
186 |
# Generate downloadable file
|
187 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
|
|
17 |
else:
|
18 |
return 1
|
19 |
|
20 |
+
@st.cache_resource
|
21 |
+
def analyze_sentiment(comments, progress_bar=None, status_text=None):
|
22 |
+
"""Perform sentiment analysis on a list of comments"""
|
23 |
+
# Load model inside the function
|
24 |
+
classifier = pipeline(
|
25 |
+
"text-classification",
|
26 |
+
model="KeonBlackwell/movie_sentiment_model",
|
27 |
+
tokenizer="distilbert-base-uncased"
|
28 |
+
)
|
29 |
+
|
30 |
+
results = []
|
31 |
+
total = len(comments)
|
32 |
+
|
33 |
+
for i, comment in enumerate(comments):
|
34 |
+
if progress_bar and status_text:
|
35 |
+
progress = (i+1)/total
|
36 |
+
progress_bar.progress(progress)
|
37 |
+
status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")
|
38 |
+
|
39 |
+
prediction = classifier(comment)[0]
|
40 |
+
results.append({
|
41 |
+
'comment': comment,
|
42 |
+
'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
|
43 |
+
'confidence': prediction['score']
|
44 |
+
})
|
45 |
+
|
46 |
+
return results
|
47 |
+
|
48 |
+
@st.cache_resource
|
49 |
+
def extract_keyphrases(text, top_n=5):
|
50 |
+
"""Extract top keyphrases from text"""
|
51 |
+
# Load model inside the function
|
52 |
+
keyphrase_extractor = pipeline(
|
53 |
+
"token-classification",
|
54 |
+
model="ml6team/keyphrase-extraction-distilbert-inspec",
|
55 |
+
aggregation_strategy="simple"
|
56 |
+
)
|
57 |
+
|
58 |
+
keyphrases = keyphrase_extractor(text)
|
59 |
+
# Sort by confidence and take the top N
|
60 |
+
top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:top_n]
|
61 |
+
return top_keyphrases
|
62 |
+
|
63 |
+
@st.cache_resource
|
64 |
+
def generate_summary(text, max_length=130, min_length=30):
|
65 |
+
"""Generate summary from text"""
|
66 |
+
# Load model inside the function
|
67 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
68 |
+
|
69 |
+
# Limit text length to avoid model limitations
|
70 |
+
max_input_length = 1024 # Maximum input length for the model
|
71 |
+
if len(text) > max_input_length:
|
72 |
+
text = text[:max_input_length]
|
73 |
+
|
74 |
+
summary = summarizer(text,
|
75 |
+
max_length=max_length,
|
76 |
+
min_length=min_length,
|
77 |
+
do_sample=False)
|
78 |
+
return summary[0]['summary_text']
|
79 |
+
|
80 |
def main():
|
81 |
st.set_page_config(page_title="Movie Review Analysis System", page_icon="π¬")
|
82 |
|
|
|
92 |
</style>
|
93 |
""", unsafe_allow_html=True)
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
# Page layout
|
96 |
st.title("π¬ Movie Review Batch Analysis System")
|
97 |
st.markdown("""
|
|
|
126 |
progress_bar = st.progress(0)
|
127 |
status_text = st.empty()
|
128 |
|
129 |
+
# Sentiment analysis
|
|
|
|
|
|
|
130 |
try:
|
131 |
+
with st.spinner("Loading sentiment analysis model..."):
|
132 |
+
results = analyze_sentiment(comments, progress_bar, status_text)
|
133 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
# Convert to DataFrame
|
135 |
result_df = pd.DataFrame(results)
|
136 |
|
|
|
166 |
combined_text = " ".join(comments)
|
167 |
|
168 |
# Keyphrase extraction
|
169 |
+
with st.spinner("Loading keyphrase extraction model..."):
|
170 |
+
top_keyphrases = extract_keyphrases(combined_text)
|
|
|
|
|
171 |
|
172 |
# Show keyphrases
|
173 |
st.markdown("**π Extracted Keyphrases:**")
|
|
|
188 |
""", unsafe_allow_html=True)
|
189 |
|
190 |
# Generate summary
|
191 |
+
with st.spinner("Loading summarization model..."):
|
192 |
+
summary = generate_summary(combined_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
# Show summary
|
195 |
st.markdown("**π Review Summary:**")
|
196 |
+
st.info(summary)
|
197 |
|
198 |
# Generate downloadable file
|
199 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|