ysuneu commited on
Commit
6878db4
Β·
verified Β·
1 Parent(s): a92d9d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -70
app.py CHANGED
@@ -17,66 +17,6 @@ def calculate_star_rating(positive_percent):
17
  else:
18
  return 1
19
 
20
- @st.cache_resource
21
- def analyze_sentiment(comments, progress_bar=None, status_text=None):
22
- """Perform sentiment analysis on a list of comments"""
23
- # Load model inside the function
24
- classifier = pipeline(
25
- "text-classification",
26
- model="KeonBlackwell/movie_sentiment_model",
27
- tokenizer="distilbert-base-uncased"
28
- )
29
-
30
- results = []
31
- total = len(comments)
32
-
33
- for i, comment in enumerate(comments):
34
- if progress_bar and status_text:
35
- progress = (i+1)/total
36
- progress_bar.progress(progress)
37
- status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")
38
-
39
- prediction = classifier(comment)[0]
40
- results.append({
41
- 'comment': comment,
42
- 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
43
- 'confidence': prediction['score']
44
- })
45
-
46
- return results
47
-
48
- @st.cache_resource
49
- def extract_keyphrases(text, top_n=5):
50
- """Extract top keyphrases from text"""
51
- # Load model inside the function
52
- keyphrase_extractor = pipeline(
53
- "token-classification",
54
- model="ml6team/keyphrase-extraction-distilbert-inspec",
55
- aggregation_strategy="simple"
56
- )
57
-
58
- keyphrases = keyphrase_extractor(text)
59
- # Sort by confidence and take the top N
60
- top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:top_n]
61
- return top_keyphrases
62
-
63
- @st.cache_resource
64
- def generate_summary(text, max_length=130, min_length=30):
65
- """Generate summary from text"""
66
- # Load model inside the function
67
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
68
-
69
- # Limit text length to avoid model limitations
70
- max_input_length = 1024 # Maximum input length for the model
71
- if len(text) > max_input_length:
72
- text = text[:max_input_length]
73
-
74
- summary = summarizer(text,
75
- max_length=max_length,
76
- min_length=min_length,
77
- do_sample=False)
78
- return summary[0]['summary_text']
79
-
80
  def main():
81
  st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬")
82
 
@@ -92,6 +32,31 @@ def main():
92
  </style>
93
  """, unsafe_allow_html=True)
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # Page layout
96
  st.title("🎬 Movie Review Batch Analysis System")
97
  st.markdown("""
@@ -126,11 +91,24 @@ def main():
126
  progress_bar = st.progress(0)
127
  status_text = st.empty()
128
 
129
- # Sentiment analysis
 
 
 
130
  try:
131
- with st.spinner("Loading sentiment analysis model..."):
132
- results = analyze_sentiment(comments, progress_bar, status_text)
133
-
 
 
 
 
 
 
 
 
 
 
134
  # Convert to DataFrame
135
  result_df = pd.DataFrame(results)
136
 
@@ -163,11 +141,13 @@ def main():
163
  st.subheader("πŸ“Œ Keyphrase Extraction and Summary of Reviews")
164
 
165
  # Combine all comments into a single text
166
- combined_text = " ".join(comments)
167
 
168
  # Keyphrase extraction
169
- with st.spinner("Loading keyphrase extraction model..."):
170
- top_keyphrases = extract_keyphrases(combined_text)
 
 
171
 
172
  # Show keyphrases
173
  st.markdown("**πŸ” Extracted Keyphrases:**")
@@ -188,12 +168,20 @@ def main():
188
  """, unsafe_allow_html=True)
189
 
190
  # Generate summary
191
- with st.spinner("Loading summarization model..."):
192
- summary = generate_summary(combined_text)
 
 
 
 
 
 
 
 
193
 
194
  # Show summary
195
  st.markdown("**πŸ“ Review Summary:**")
196
- st.info(summary)
197
 
198
  # Generate downloadable file
199
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
 
17
  else:
18
  return 1
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def main():
21
  st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬")
22
 
 
32
  </style>
33
  """, unsafe_allow_html=True)
34
 
35
+ # Model loading
36
+ with st.spinner("Loading all models, this may take a few minutes..."):
37
+ try:
38
+ # Sentiment analysis model
39
+ classifier = pipeline(
40
+ "text-classification",
41
+ model="KeonBlackwell/movie_sentiment_model",
42
+ tokenizer="distilbert-base-uncased"
43
+ )
44
+
45
+ # Keyphrase extraction model
46
+ keyphrase_extractor = pipeline(
47
+ "token-classification",
48
+ model="ml6team/keyphrase-extraction-distilbert-inspec",
49
+ aggregation_strategy="simple"
50
+ )
51
+
52
+ # Summarization model
53
+ summarizer = pipeline("summarization",
54
+ model="facebook/bart-large-cnn")
55
+
56
+ except Exception as e:
57
+ st.error(f"Model loading failed: {str(e)}")
58
+ return
59
+
60
  # Page layout
61
  st.title("🎬 Movie Review Batch Analysis System")
62
  st.markdown("""
 
91
  progress_bar = st.progress(0)
92
  status_text = st.empty()
93
 
94
+ results = []
95
+ total = len(comments)
96
+
97
+ # Batch prediction
98
  try:
99
+ # Sentiment analysis
100
+ for i, comment in enumerate(comments):
101
+ progress = (i+1)/total
102
+ progress_bar.progress(progress)
103
+ status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")
104
+
105
+ prediction = classifier(comment)[0]
106
+ results.append({
107
+ 'comment': comment,
108
+ 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
109
+ 'confidence': prediction['score']
110
+ })
111
+
112
  # Convert to DataFrame
113
  result_df = pd.DataFrame(results)
114
 
 
141
  st.subheader("πŸ“Œ Keyphrase Extraction and Summary of Reviews")
142
 
143
  # Combine all comments into a single text
144
+ combined_text = " ".join(comment)
145
 
146
  # Keyphrase extraction
147
+ with st.spinner("Extracting keyphrases..."):
148
+ keyphrases = keyphrase_extractor(combined_text)
149
+ # Sort by confidence and take the top 5
150
+ top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]
151
 
152
  # Show keyphrases
153
  st.markdown("**πŸ” Extracted Keyphrases:**")
 
168
  """, unsafe_allow_html=True)
169
 
170
  # Generate summary
171
+ with st.spinner("Generating review summary..."):
172
+ # Limit text length to avoid model limitations
173
+ max_length = 1024 # Maximum input length for the model
174
+ if len(combined_text) > max_length:
175
+ combined_text = combined_text[:max_length]
176
+
177
+ summary = summarizer(combined_text,
178
+ max_length=130,
179
+ min_length=30,
180
+ do_sample=False)
181
 
182
  # Show summary
183
  st.markdown("**πŸ“ Review Summary:**")
184
+ st.info(summary[0]['summary_text'])
185
 
186
  # Generate downloadable file
187
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: