ysuneu commited on
Commit
a92d9d6
Β·
verified Β·
1 Parent(s): fda07e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -57
app.py CHANGED
@@ -17,6 +17,66 @@ def calculate_star_rating(positive_percent):
17
  else:
18
  return 1
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def main():
21
  st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬")
22
 
@@ -32,31 +92,6 @@ def main():
32
  </style>
33
  """, unsafe_allow_html=True)
34
 
35
- # Model loading
36
- with st.spinner("Loading all models, this may take a few minutes..."):
37
- try:
38
- # Sentiment analysis model
39
- classifier = pipeline(
40
- "text-classification",
41
- model="KeonBlackwell/movie_sentiment_model",
42
- tokenizer="distilbert-base-uncased"
43
- )
44
-
45
- # Keyphrase extraction model
46
- keyphrase_extractor = pipeline(
47
- "token-classification",
48
- model="ml6team/keyphrase-extraction-distilbert-inspec",
49
- aggregation_strategy="simple"
50
- )
51
-
52
- # Summarization model
53
- summarizer = pipeline("summarization",
54
- model="facebook/bart-large-cnn")
55
-
56
- except Exception as e:
57
- st.error(f"Model loading failed: {str(e)}")
58
- return
59
-
60
  # Page layout
61
  st.title("🎬 Movie Review Batch Analysis System")
62
  st.markdown("""
@@ -91,24 +126,11 @@ def main():
91
  progress_bar = st.progress(0)
92
  status_text = st.empty()
93
 
94
- results = []
95
- total = len(comments)
96
-
97
- # Batch prediction
98
  try:
99
- # Sentiment analysis
100
- for i, comment in enumerate(comments):
101
- progress = (i+1)/total
102
- progress_bar.progress(progress)
103
- status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")
104
-
105
- prediction = classifier(comment)[0]
106
- results.append({
107
- 'comment': comment,
108
- 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
109
- 'confidence': prediction['score']
110
- })
111
-
112
  # Convert to DataFrame
113
  result_df = pd.DataFrame(results)
114
 
@@ -144,10 +166,8 @@ def main():
144
  combined_text = " ".join(comments)
145
 
146
  # Keyphrase extraction
147
- with st.spinner("Extracting keyphrases..."):
148
- keyphrases = keyphrase_extractor(combined_text)
149
- # Sort by confidence and take the top 5
150
- top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]
151
 
152
  # Show keyphrases
153
  st.markdown("**πŸ” Extracted Keyphrases:**")
@@ -168,20 +188,12 @@ def main():
168
  """, unsafe_allow_html=True)
169
 
170
  # Generate summary
171
- with st.spinner("Generating review summary..."):
172
- # Limit text length to avoid model limitations
173
- max_length = 1024 # Maximum input length for the model
174
- if len(combined_text) > max_length:
175
- combined_text = combined_text[:max_length]
176
-
177
- summary = summarizer(combined_text,
178
- max_length=130,
179
- min_length=30,
180
- do_sample=False)
181
 
182
  # Show summary
183
  st.markdown("**πŸ“ Review Summary:**")
184
- st.info(summary[0]['summary_text'])
185
 
186
  # Generate downloadable file
187
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
 
17
  else:
18
  return 1
19
 
20
+ @st.cache_resource
21
+ def analyze_sentiment(comments, progress_bar=None, status_text=None):
22
+ """Perform sentiment analysis on a list of comments"""
23
+ # Load model inside the function
24
+ classifier = pipeline(
25
+ "text-classification",
26
+ model="KeonBlackwell/movie_sentiment_model",
27
+ tokenizer="distilbert-base-uncased"
28
+ )
29
+
30
+ results = []
31
+ total = len(comments)
32
+
33
+ for i, comment in enumerate(comments):
34
+ if progress_bar and status_text:
35
+ progress = (i+1)/total
36
+ progress_bar.progress(progress)
37
+ status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")
38
+
39
+ prediction = classifier(comment)[0]
40
+ results.append({
41
+ 'comment': comment,
42
+ 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
43
+ 'confidence': prediction['score']
44
+ })
45
+
46
+ return results
47
+
48
+ @st.cache_resource
49
+ def extract_keyphrases(text, top_n=5):
50
+ """Extract top keyphrases from text"""
51
+ # Load model inside the function
52
+ keyphrase_extractor = pipeline(
53
+ "token-classification",
54
+ model="ml6team/keyphrase-extraction-distilbert-inspec",
55
+ aggregation_strategy="simple"
56
+ )
57
+
58
+ keyphrases = keyphrase_extractor(text)
59
+ # Sort by confidence and take the top N
60
+ top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:top_n]
61
+ return top_keyphrases
62
+
63
+ @st.cache_resource
64
+ def generate_summary(text, max_length=130, min_length=30):
65
+ """Generate summary from text"""
66
+ # Load model inside the function
67
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
68
+
69
+ # Limit text length to avoid model limitations
70
+ max_input_length = 1024 # Maximum input length for the model
71
+ if len(text) > max_input_length:
72
+ text = text[:max_input_length]
73
+
74
+ summary = summarizer(text,
75
+ max_length=max_length,
76
+ min_length=min_length,
77
+ do_sample=False)
78
+ return summary[0]['summary_text']
79
+
80
  def main():
81
  st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬")
82
 
 
92
  </style>
93
  """, unsafe_allow_html=True)
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # Page layout
96
  st.title("🎬 Movie Review Batch Analysis System")
97
  st.markdown("""
 
126
  progress_bar = st.progress(0)
127
  status_text = st.empty()
128
 
129
+ # Sentiment analysis
 
 
 
130
  try:
131
+ with st.spinner("Loading sentiment analysis model..."):
132
+ results = analyze_sentiment(comments, progress_bar, status_text)
133
+
 
 
 
 
 
 
 
 
 
 
134
  # Convert to DataFrame
135
  result_df = pd.DataFrame(results)
136
 
 
166
  combined_text = " ".join(comments)
167
 
168
  # Keyphrase extraction
169
+ with st.spinner("Loading keyphrase extraction model..."):
170
+ top_keyphrases = extract_keyphrases(combined_text)
 
 
171
 
172
  # Show keyphrases
173
  st.markdown("**πŸ” Extracted Keyphrases:**")
 
188
  """, unsafe_allow_html=True)
189
 
190
  # Generate summary
191
+ with st.spinner("Loading summarization model..."):
192
+ summary = generate_summary(combined_text)
 
 
 
 
 
 
 
 
193
 
194
  # Show summary
195
  st.markdown("**πŸ“ Review Summary:**")
196
+ st.info(summary)
197
 
198
  # Generate downloadable file
199
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: