ysuneu commited on
Commit
244d99e
·
verified ·
1 Parent(s): c5302a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +386 -10
app.py CHANGED
@@ -1,20 +1,396 @@
1
  import streamlit as st
 
2
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def main():
5
- sentiment_pipeline = pipeline(model="isom5240/2025SpringL2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- st.title("Sentiment Analysis with HuggingFace Spaces")
8
- st.write("Enter a sentence to analyze its sentiment:")
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- user_input = st.text_input("")
11
- if user_input:
12
- result = sentiment_pipeline(user_input)
13
- sentiment = result[0]["label"]
14
- confidence = result[0]["score"]
15
 
16
- st.write(f"Sentiment: {sentiment}")
17
- st.write(f"Confidence: {confidence:.2f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  if __name__ == "__main__":
20
  main()
 
1
  import streamlit as st
2
+ import pandas as pd
3
  from transformers import pipeline
4
+ import tempfile
5
+ import os
6
+ from typing import List, Dict
7
+ import matplotlib.pyplot as plt
8
+
9
+ @st.cache_resource
10
+ def load_model():
11
+ """Load and cache the sentiment analysis model"""
12
+ try:
13
+ return pipeline(
14
+ "text-classification",
15
+ model="KeonBlackwell/movie_sentiment_model",
16
+ tokenizer="distilbert-base-uncased"
17
+ )
18
+ except Exception as e:
19
+ st.error(f"模型加载失败: {str(e)}")
20
+ return None
21
+
22
+ def analyze_comments(comments: List[str], classifier) -> List[Dict]:
23
+ """Analyze a list of comments and return sentiment results"""
24
+ results = []
25
+ for comment in comments:
26
+ prediction = classifier(comment)[0]
27
+ results.append({
28
+ 'comment': comment,
29
+ 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
30
+ 'confidence': prediction['score']
31
+ })
32
+ return results
33
+
34
+ def calculate_star_rating(positive_percent: float) -> int:
35
+ """Convert positive percentage to star rating (1-5)"""
36
+ if positive_percent >= 80:
37
+ return 5
38
+ elif positive_percent >= 60:
39
+ return 4
40
+ elif positive_percent >= 40:
41
+ return 3
42
+ elif positive_percent >= 20:
43
+ return 2
44
+ return 1
45
+
46
+ def show_sentiment_distribution(positive_percent: float):
47
+ """Display a pie chart of sentiment distribution"""
48
+ fig, ax = plt.subplots()
49
+ ax.pie([positive_percent, 100-positive_percent],
50
+ labels=['Positive', 'Negative'],
51
+ autopct='%1.1f%%',
52
+ colors=['#4CAF50', '#F44336'])
53
+ ax.axis('equal') # Equal aspect ratio ensures pie is drawn as a circle
54
+ st.pyplot(fig)
55
+
56
+ def main():
57
+ st.set_page_config(page_title="电影评论分析系统", page_icon="🎬")
58
+
59
+ # Custom CSS
60
+ st.markdown("""
61
+ <style>
62
+ .reportview-container {
63
+ background: #f0f2f6;
64
+ }
65
+ .stProgress > div > div > div > div {
66
+ background-color: #4CAF50;
67
+ }
68
+ </style>
69
+ """, unsafe_allow_html=True)
70
+
71
+ # Load model
72
+ classifier = load_model()
73
+ if classifier is None:
74
+ return
75
+
76
+ # Page layout
77
+ st.title("🎬 电影评论批量分析系统")
78
+ st.markdown("""
79
+ ### 使用说明:
80
+ 1. 上传包含电影评论的CSV文件(需包含'comment'列)
81
+ 2. 系统自动分析每条评论的情感倾向
82
+ 3. 生成整体评分和分析报告
83
+ """)
84
+
85
+ # Sample file download
86
+ with st.expander("下载示例文件"):
87
+ sample_data = pd.DataFrame({'comment': [
88
+ "This movie was fantastic! The acting was superb.",
89
+ "I didn't like the plot. It was too predictable.",
90
+ "The cinematography was beautiful but the story was weak."
91
+ ]})
92
+ st.download_button(
93
+ label="下载示例CSV",
94
+ data=sample_data.to_csv(index=False).encode('utf-8'),
95
+ file_name="sample_reviews.csv",
96
+ mime="text/csv"
97
+ )
98
+
99
+ # File upload
100
+ uploaded_file = st.file_uploader("上传CSV文件", type=["csv"])
101
+
102
+ if uploaded_file is not None:
103
+ try:
104
+ df = pd.read_csv(uploaded_file)
105
+ if 'comment' not in df.columns:
106
+ st.error("CSV文件必须包含'comment'列")
107
+ return
108
+
109
+ comments = df['comment'].dropna().tolist()
110
+
111
+ with st.expander("原始数据预览(前5行)"):
112
+ st.dataframe(df.head())
113
+
114
+ if st.button("开始分析", type="primary"):
115
+ if len(comments) > 1000:
116
+ st.warning(f"检测到大量评论 ({len(comments)} 条),分析可能需要较长时间...")
117
+
118
+ with st.spinner("分析中,请稍候..."):
119
+ results = analyze_comments(comments, classifier)
120
+ result_df = pd.DataFrame(results)
121
+
122
+ # Calculate statistics
123
+ positive_count = result_df['sentiment'].sum()
124
+ total_reviews = len(result_df)
125
+ positive_percent = (positive_count / total_reviews) * 100
126
+ star_rating = calculate_star_rating(positive_percent)
127
+
128
+ # Display results
129
+ st.success("分析完成!")
130
+
131
+ # Metrics
132
+ col1, col2, col3 = st.columns(3)
133
+ with col1:
134
+ st.metric("⭐ 综合评分", f"{star_rating} 星")
135
+ with col2:
136
+ st.metric("👍 正面评价", f"{positive_count}/{total_reviews}")
137
+ with col3:
138
+ st.metric("📈 正面比例", f"{positive_percent:.1f}%")
139
+
140
+ # Visualizations
141
+ show_sentiment_distribution(positive_percent)
142
+
143
+ # Sample results
144
+ with st.expander("查看详细分析结果(前10条)"):
145
+ st.dataframe(result_df.head(10))
146
+
147
+ # Download
148
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
149
+ result_df.to_csv(tmp.name, index=False)
150
+ with open(tmp.name, "rb") as f:
151
+ st.download_button(
152
+ label="下载完整结果",
153
+ data=f,
154
+ file_name="analysis_results.csv",
155
+ mime="text/csv"
156
+ )
157
+ os.unlink(tmp.name)
158
+
159
+ except Exception as e:
160
+ st.error(f"处理文件时出错: {str(e)}")
161
+
162
+ if __name__ == "__main__":
163
+ main()
164
+ Key improvements made:
165
+
166
+ Added model caching
167
+
168
+ Included a sample file download
169
+
170
+ Added a pie chart visualization
171
+
172
+ Better organized the code into functions
173
+
174
+ Added type hints
175
+
176
+ Improved error handling and user feedback
177
+
178
+ Added warning for large files
179
+
180
+ Would you like me to explain any specific part of these improvements in more detail?
181
+
182
+ 我不需要优化,在源代码上添加以下几步,合并CSV里的每行评论,合并成一个文本,利用huggingface上的模型keyphrase_extractor = pipeline(
183
+ "token-classification",
184
+ model="ml6team/keyphrase-extraction-distilbert-inspec",
185
+ aggregation_strategy="simple" # 用于合并子词/标记
186
+ 和summarizer = pipeline("summarization",
187
+ model="facebook/bart-large-cnn")
188
+ 进行提取5个关键词,和生成总结,我是在huggingface上设计app
189
+ 我将在你的原始代码基础上添加关键词提取和总结生成功能。以下是修改后的代码,保留了原有功能,同时增加了你要求的新特性:
190
+
191
+ python
192
+ import streamlit as st
193
+ import pandas as pd
194
+ from transformers import pipeline
195
+ import tempfile
196
+ import os
197
+
198
+ def calculate_star_rating(positive_percent):
199
+ """将正面评价百分比转换为星级评分"""
200
+ if positive_percent >= 80:
201
+ return 5
202
+ elif positive_percent >= 60:
203
+ return 4
204
+ elif positive_percent >= 40:
205
+ return 3
206
+ elif positive_percent >= 20:
207
+ return 2
208
+ else:
209
+ return 1
210
 
211
  def main():
212
+ st.set_page_config(page_title="电影评论分析系统", page_icon="🎬")
213
+
214
+ # 自定义样式
215
+ st.markdown("""
216
+ <style>
217
+ .reportview-container {
218
+ background: #f0f2f6;
219
+ }
220
+ .stProgress > div > div > div > div {
221
+ background-color: #4CAF50;
222
+ }
223
+ </style>
224
+ """, unsafe_allow_html=True)
225
+
226
+ # 模型加载
227
+ with st.spinner("正在加载所有模型,这可能需要几分钟..."):
228
+ try:
229
+ # 情感分析模型
230
+ classifier = pipeline(
231
+ "text-classification",
232
+ model="KeonBlackwell/movie_sentiment_model",
233
+ tokenizer="distilbert-base-uncased"
234
+ )
235
+
236
+ # 关键词提取模型
237
+ keyphrase_extractor = pipeline(
238
+ "token-classification",
239
+ model="ml6team/keyphrase-extraction-distilbert-inspec",
240
+ aggregation_strategy="simple"
241
+ )
242
+
243
+ # 摘要生成模型
244
+ summarizer = pipeline("summarization",
245
+ model="facebook/bart-large-cnn")
246
+
247
+ except Exception as e:
248
+ st.error(f"模型加载失败: {str(e)}")
249
+ return
250
+
251
+ # 页面布局
252
+ st.title("🎬 电影评论批量分析系统")
253
+ st.markdown("""
254
+ ### 使用说明:
255
+ 1. 上传包含电影评论的CSV文件(需包含'comment'列)
256
+ 2. 系统自动分析每条评论的情感倾向
257
+ 3. 生成整体评分、关键词提取和总结报告
258
+ """)
259
 
260
+ # 文件上传
261
+ uploaded_file = st.file_uploader("上传CSV文件", type=["csv"])
262
+
263
+ if uploaded_file is not None:
264
+ # 读取数据
265
+ try:
266
+ df = pd.read_csv(uploaded_file)
267
+ if 'comment' not in df.columns:
268
+ st.error("CSV文件必须包含'comment'列")
269
+ return
270
+
271
+ comments = df['comment'].tolist()
272
+ except Exception as e:
273
+ st.error(f"文件读取失败: {str(e)}")
274
+ return
275
 
276
+ # 显示预览
277
+ with st.expander("原始数据预览(前5行)"):
278
+ st.dataframe(df.head())
 
 
279
 
280
+ if st.button("开始分���"):
281
+ # 进度条设置
282
+ progress_bar = st.progress(0)
283
+ status_text = st.empty()
284
+
285
+ results = []
286
+ total = len(comments)
287
+
288
+ # 批量预测
289
+ try:
290
+ # 情感分析
291
+ for i, comment in enumerate(comments):
292
+ progress = (i+1)/total
293
+ progress_bar.progress(progress)
294
+ status_text.text(f"正在分析情感 {i+1}/{total} 条评论...")
295
+
296
+ prediction = classifier(comment)[0]
297
+ results.append({
298
+ 'comment': comment,
299
+ 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
300
+ 'confidence': prediction['score']
301
+ })
302
+
303
+ # 转换为DataFrame
304
+ result_df = pd.DataFrame(results)
305
+
306
+ # 计算统计指标
307
+ positive_count = result_df['sentiment'].sum()
308
+ total_reviews = len(result_df)
309
+ positive_percent = (positive_count / total_reviews) * 100
310
+ star_rating = calculate_star_rating(positive_percent)
311
+
312
+ # 显示结果
313
+ st.success("情感分析完成!")
314
+
315
+ # 评分展示
316
+ col1, col2, col3 = st.columns(3)
317
+ with col1:
318
+ st.metric("⭐ 综合评分", f"{star_rating} 星")
319
+ with col2:
320
+ st.metric("👍 正面评价", f"{positive_count}/{total_reviews}")
321
+ with col3:
322
+ st.metric("📈 正面比例", f"{positive_percent:.1f}%")
323
+
324
+ # 进度条可视化
325
+ st.progress(positive_percent/100)
326
+
327
+ # 显示示例结果
328
+ with st.expander("查看详细分析结果(前10条)"):
329
+ st.dataframe(result_df.head(10))
330
+
331
+ # 关键词提取和总结
332
+ st.subheader("📌 评论关键词提取与总结")
333
+
334
+ # 合并所有评论为一个文本
335
+ combined_text = " ".join(comments)
336
+
337
+ # 关键词提取
338
+ with st.spinner("正在提取关键词..."):
339
+ keyphrases = keyphrase_extractor(combined_text)
340
+ # 按置信度排序并取前5个
341
+ top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]
342
+
343
+ # 显示关键词
344
+ st.markdown("**🔍 提取的关键词:**")
345
+ cols = st.columns(5)
346
+ for i, phrase in enumerate(top_keyphrases):
347
+ cols[i].markdown(f"""
348
+ <div style="
349
+ border: 1px solid #ddd;
350
+ border-radius: 5px;
351
+ padding: 10px;
352
+ text-align: center;
353
+ margin: 5px;
354
+ background-color: #f9f9f9;
355
+ ">
356
+ <b>{phrase['word']}</b><br>
357
+ <small>置信度: {phrase['score']:.2f}</small>
358
+ </div>
359
+ """, unsafe_allow_html=True)
360
+
361
+ # 生成总结
362
+ with st.spinner("正在生成评论总结..."):
363
+ # 限制文本长度以避免模型限制
364
+ max_length = 1024 # 模型的最大输入长度
365
+ if len(combined_text) > max_length:
366
+ combined_text = combined_text[:max_length]
367
+
368
+ summary = summarizer(combined_text,
369
+ max_length=130,
370
+ min_length=30,
371
+ do_sample=False)
372
+
373
+ # 显示总结
374
+ st.markdown("**📝 评论总结:**")
375
+ st.info(summary[0]['summary_text'])
376
+
377
+ # 生成可下载文件
378
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
379
+ result_df.to_csv(tmp.name, index=False)
380
+ with open(tmp.name, "rb") as f:
381
+ st.download_button(
382
+ label="下载完整结果",
383
+ data=f,
384
+ file_name="analysis_results.csv",
385
+ mime="text/csv"
386
+ )
387
+ os.unlink(tmp.name)
388
+
389
+ except Exception as e:
390
+ st.error(f"分析过程中出现错误: {str(e)}")
391
+ finally:
392
+ progress_bar.empty()
393
+ status_text.empty()
394
 
395
  if __name__ == "__main__":
396
  main()