GCLing commited on
Commit
283d228
·
verified ·
1 Parent(s): 5e42cc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -111
app.py CHANGED
@@ -1,133 +1,127 @@
1
  import gradio as gr
2
- print("Gradio version:", gr.__version__)
3
- import os, time, re
4
  import numpy as np
5
  import joblib
6
  import librosa
 
7
  from huggingface_hub import hf_hub_download
8
- from deepface import DeepFace
9
- from transformers import pipeline
10
- # 如果不手动用 AutoTokenizer/AutoModel,就不必 import AutoTokenizer, AutoModelForSequenceClassification
11
-
12
 
 
 
 
 
 
 
 
13
 
14
- # --- 1. 加载 SVM 语音模型 ---
15
  print("Downloading SVM model from Hugging Face Hub...")
16
  model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
17
- print(f"SVM model downloaded to: {model_path}")
18
  svm_model = joblib.load(model_path)
19
  print("SVM model loaded.")
20
 
21
- # --- 2. 文本情绪分析:规则+zero-shot ---
22
- try:
23
- zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
24
- except Exception as e:
25
- print("加载 zero-shot pipeline 失败:", e)
26
- zero_shot = None
 
 
27
 
28
- candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
29
- label_map_en2cn = {
30
- "joy": "高興", "sadness": "悲傷", "anger": "憤怒",
31
- "fear": "恐懼", "surprise": "驚訝", "disgust": "厭惡"
32
- }
33
 
34
- # 关键词列表:注意繁简体一致,或可添加两种形式
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  emo_keywords = {
36
  "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
37
  "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
38
  "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
39
  "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
40
  "fear": ["怕","恐懼","緊張","懼","膽怯","畏"],
41
- "disgust": ["噁心","厭惡","反感"] # 如需“厭惡”等
42
  }
43
- # 否定词列表
44
  negations = ["不","沒","沒有","別","勿","非"]
45
-
46
  def keyword_emotion(text: str):
47
- """
48
- 规则方法:扫描 emo_keywords,处理前置否定词。
49
- 返回 None 或 {} 表示规则未命中;否则返回非空 dict,例如 {'angry': 2, 'sad':1} 或归一化 {'angry':0.67,'sad':0.33}。
50
- """
51
- if not text or text.strip() == "":
52
- return None
53
- text_proc = text.strip() # 中文不需要 lower
54
- counts = {emo: 0 for emo in emo_keywords}
55
  for emo, kws in emo_keywords.items():
56
  for w in kws:
57
  idx = text_proc.find(w)
58
- if idx != -1:
59
- # 检查前一到两字符是否否定词
60
- neg = False
61
  for neg_word in negations:
62
  plen = len(neg_word)
63
- if idx - plen >= 0 and text_proc[idx-plen:idx] == neg_word:
64
- neg = True
65
- break
66
  if not neg:
67
- counts[emo] += 1
68
- else:
69
- # 若否定,可选择减分或忽略;这里忽略
70
- pass
71
  total = sum(counts.values())
72
- if total > 0:
73
- # 归一化
74
- return {emo: counts[emo] / total for emo in counts if counts[emo] > 0}
75
- else:
76
- return None
77
 
78
  def predict_text_mixed(text: str):
79
- """
80
- 文本情绪分析:先规则,若规则命中返回最高情绪及其比例;否则fallback zero-shot返回多类别分布。
81
- 返回 dict[str, float],供 Gradio Label 显示。
82
- """
83
- print("predict_text_mixed called, text:", repr(text))
84
- if not text or text.strip() == "":
85
- print("輸入為空,返回空")
86
  return {}
87
- # 规则优先
88
  res = keyword_emotion(text)
89
- print("keyword_emotion result:", res)
90
  if res:
91
- # 只返回最高项:也可返回完整分布 res
92
- top_emo = max(res, key=res.get) # 例如 "angry"
93
  mapping = {
94
- "happy": "高興",
95
- "angry": "憤怒",
96
- "sad": "悲傷",
97
- "surprise": "驚訝",
98
- "fear": "恐懼",
99
- "disgust": "厭惡"
100
  }
101
- cn = mapping.get(top_emo, top_emo)
102
- prob = float(res[top_emo])
103
- print(f"使用規則方法,返回: {{'{cn}': {prob}}}")
104
  return {cn: prob}
105
- # 规则未命中,zero-shot fallback
106
- if zero_shot is None:
107
- print("zero_shot pipeline 未加载,返回中性")
108
- return {"中性": 1.0}
109
- try:
110
- out = zero_shot(text, candidate_labels=candidate_labels,
111
- hypothesis_template="这句話表達了{}情緒")
112
- print("zero-shot 返回:", out)
113
- result = {}
114
- for lab, sc in zip(out["labels"], out["scores"]):
115
- cn = label_map_en2cn.get(lab.lower(), lab)
116
- result[cn] = float(sc)
117
- print("zero-shot 结果映射中文:", result)
118
- return result
119
- except Exception as e:
120
- print("zero-shot error:", e)
121
- return {"中性": 1.0}
122
 
123
- # --- 3. 语音情绪预测函数 ---
124
  def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
125
  mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
126
  return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
127
 
128
  def predict_voice(audio_path: str):
129
  if not audio_path:
130
- print("predict_voice: 无 audio_path,跳过")
131
  return {}
132
  try:
133
  signal, sr = librosa.load(audio_path, sr=None)
@@ -139,10 +133,9 @@ def predict_voice(audio_path: str):
139
  print("predict_voice error:", e)
140
  return {}
141
 
142
- # --- 4. 人脸情绪预测函数 ---
143
  def predict_face(img: np.ndarray):
144
- print("predict_face called, img is None?", img is None)
145
- if img is None:
146
  return {}
147
  try:
148
  res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
@@ -151,28 +144,27 @@ def predict_face(img: np.ndarray):
151
  emo = first.get("emotion", {}) if isinstance(first, dict) else {}
152
  else:
153
  emo = res.get("emotion", {}) if isinstance(res, dict) else {}
154
- # float,确保 JSON 可序列化
155
- emo_fixed = {k: float(v) for k, v in emo.items()}
156
- print("predict_face result:", emo_fixed)
157
- return emo_fixed
158
  except Exception as e:
159
  print("DeepFace.analyze error:", e)
160
  return {}
161
 
162
- # --- 5. Gradio 界面 ---
163
  def build_interface():
164
  with gr.Blocks() as demo:
165
  gr.Markdown("## 多模態情緒分析示例")
166
  with gr.Tabs():
167
  # 臉部 Tab
168
- with gr.TabItem("臉部情緒"):
169
- gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
170
- with gr.Row():
171
- # 改为 gr.Camera
172
- webcam = gr.Camera(streaming=True, type="numpy", label="攝像頭畫面")
173
- face_out = gr.Label(label="情緒分布")
174
- webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
175
-
 
 
176
 
177
  # 語音 Tab
178
  with gr.TabItem("語音情緒"):
@@ -180,25 +172,18 @@ def build_interface():
180
  with gr.Row():
181
  audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
182
  voice_out = gr.Label(label="語音情緒結果")
183
- audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)
184
 
185
  # 文字 Tab
186
  with gr.TabItem("文字情緒"):
187
- gr.Markdown("### 文字情緒 分析 (規則+zero-shot)")
188
  with gr.Row():
189
  text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
190
  text_out = gr.Label(label="文字情緒結果")
191
  text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
192
- # 或者按钮:
193
- # btn = gr.Button("分析")
194
- # btn.click(fn=predict_text_mixed, inputs=text, outputs=text_out)
195
-
196
- # 或按鈕:
197
- # btn = gr.Button("分析")
198
- # btn.click(fn=predict_text_mixed, inputs=text, outputs=text_out)
199
  return demo
200
 
201
- # --- 4. 啟動 ---
202
  if __name__ == "__main__":
203
  demo = build_interface()
 
204
  demo.launch(share=True)
 
1
  import gradio as gr
2
+ import os
 
3
  import numpy as np
4
  import joblib
5
  import librosa
6
+ import requests
7
  from huggingface_hub import hf_hub_download
 
 
 
 
8
 
9
+ # --- DeepFace 条件导入 ---
10
+ try:
11
+ from deepface import DeepFace
12
+ has_deepface = True
13
+ except ImportError:
14
+ print("本地未安装 deepface,将在本地跳过臉部情緒;Space 上会安装 deepface。")
15
+ has_deepface = False
16
 
17
+ # --- 1. 语音 SVM 加载 ---
18
  print("Downloading SVM model from Hugging Face Hub...")
19
  model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
 
20
  svm_model = joblib.load(model_path)
21
  print("SVM model loaded.")
22
 
23
+ # --- 2. 文本情绪分析:改用 Inference API ---
24
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN")
25
+ if HF_API_TOKEN is None:
26
+ print("警告:未检测到 HF_API_TOKEN,Inference API 可能失败。")
27
+ # 选用公开存在的中文情感分类模型
28
+ HF_TEXT_MODEL = "uer/roberta-base-finetuned-dianping-chinese"
29
+ HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}"
30
+ headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
31
 
 
 
 
 
 
32
 
33
+ def predict_text_via_api(text: str):
34
+ if not text or text.strip()=="":
35
+ return {}
36
+ payload = {"inputs": text}
37
+ try:
38
+ resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30)
39
+ if resp.status_code != 200:
40
+ print(f"Inference API 返回状态码 {resp.status_code}: {resp.text}")
41
+ # 退回到简单规则或中性
42
+ return {"中性": 1.0}
43
+ data = resp.json()
44
+ # 根据模型返回格式解析:假设返回 [{"label": "...", "score": ...}, ...]
45
+ if isinstance(data, list) and len(data)>0 and isinstance(data[0], dict):
46
+ # 选 top 3 展示
47
+ result = {}
48
+ for item in data[:3]:
49
+ lbl = item.get("label", "")
50
+ score = item.get("score", 0.0)
51
+ # 若标签是英文,可映射到中文;若就是中文可直接用
52
+ # 例如模型返回 "positive"/"negative"/"neutral",可映射:
53
+ if lbl.lower() in ["positive","pos","正面"]:
54
+ cn = "正面"
55
+ elif lbl.lower() in ["negative","neg","负面","負面"]:
56
+ cn = "負面"
57
+ elif lbl.lower() in ["neutral","中性"]:
58
+ cn = "中性"
59
+ else:
60
+ cn = lbl
61
+ result[cn] = float(score)
62
+ return result
63
+ else:
64
+ print("Inference API 返回格式异常:", data)
65
+ return {"中性": 1.0}
66
+ except Exception as e:
67
+ print("调用 Inference API 出错:", e)
68
+ return {"中性": 1.0}
69
+
70
+ # 可保留简单规则优先,若规则命中则返回规则,否则调用 API
71
  emo_keywords = {
72
  "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
73
  "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
74
  "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
75
  "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
76
  "fear": ["怕","恐懼","緊張","懼","膽怯","畏"],
77
+ "disgust": ["噁心","厭惡","反感"]
78
  }
 
79
  negations = ["不","沒","沒有","別","勿","非"]
 
80
  def keyword_emotion(text: str):
81
+ text_proc = text.strip()
82
+ counts = {emo:0 for emo in emo_keywords}
 
 
 
 
 
 
83
  for emo, kws in emo_keywords.items():
84
  for w in kws:
85
  idx = text_proc.find(w)
86
+ if idx!=-1:
87
+ neg=False
 
88
  for neg_word in negations:
89
  plen = len(neg_word)
90
+ if idx-plen>=0 and text_proc[idx-plen:idx]==neg_word:
91
+ neg=True; break
 
92
  if not neg:
93
+ counts[emo]+=1
 
 
 
94
  total = sum(counts.values())
95
+ if total>0:
96
+ # 归一化并取最高
97
+ top = max(counts, key=lambda k: counts[k])
98
+ return {top: counts[top]/total}
99
+ return None
100
 
101
  def predict_text_mixed(text: str):
102
+ print("predict_text_mixed:", text)
103
+ if not text or text.strip()=="":
 
 
 
 
 
104
  return {}
 
105
  res = keyword_emotion(text)
 
106
  if res:
107
+ # 映射中文标签
 
108
  mapping = {
109
+ "happy":"高興","angry":"憤怒","sad":"悲傷",
110
+ "surprise":"驚訝","fear":"恐懼","disgust":"厭惡"
 
 
 
 
111
  }
112
+ emo = list(res.keys())[0]; prob = float(res[emo])
113
+ cn = mapping.get(emo, emo)
 
114
  return {cn: prob}
115
+ # 规则未命中,调用 Inference API
116
+ return predict_text_via_api(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ # --- 3. 语音情绪预测 ---
119
  def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
120
  mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
121
  return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
122
 
123
  def predict_voice(audio_path: str):
124
  if not audio_path:
 
125
  return {}
126
  try:
127
  signal, sr = librosa.load(audio_path, sr=None)
 
133
  print("predict_voice error:", e)
134
  return {}
135
 
136
+ # --- 4. 人脸情绪预测 ---
137
  def predict_face(img: np.ndarray):
138
+ if not has_deepface or img is None:
 
139
  return {}
140
  try:
141
  res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
 
144
  emo = first.get("emotion", {}) if isinstance(first, dict) else {}
145
  else:
146
  emo = res.get("emotion", {}) if isinstance(res, dict) else {}
147
+ return {k: float(v) for k,v in emo.items()}
 
 
 
148
  except Exception as e:
149
  print("DeepFace.analyze error:", e)
150
  return {}
151
 
152
+ # --- 5. Gradio 界面:用 gr.components.Camera ---
153
  def build_interface():
154
  with gr.Blocks() as demo:
155
  gr.Markdown("## 多模態情緒分析示例")
156
  with gr.Tabs():
157
  # 臉部 Tab
158
+ if has_deepface:
159
+ with gr.TabItem("臉部情緒"):
160
+ gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
161
+ with gr.Row():
162
+ webcam = gr.components.Camera(streaming=True, type="numpy", label="攝像頭畫面")
163
+ face_out = gr.Label(label="情緒分布")
164
+ webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
165
+ else:
166
+ with gr.TabItem("臉部情緒 (本地跳过)"):
167
+ gr.Markdown("本地未安装 deepface,此功能本地跳过;Space 上可正常运行。")
168
 
169
  # 語音 Tab
170
  with gr.TabItem("語音情緒"):
 
172
  with gr.Row():
173
  audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
174
  voice_out = gr.Label(label="語音情緒結果")
175
+ audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)
176
 
177
  # 文字 Tab
178
  with gr.TabItem("文字情緒"):
179
+ gr.Markdown("### 文字情緒 分析 (规则+Inference API)")
180
  with gr.Row():
181
  text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
182
  text_out = gr.Label(label="文字情緒結果")
183
  text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
 
 
 
 
 
 
 
184
  return demo
185
 
 
186
  if __name__ == "__main__":
187
  demo = build_interface()
188
+ # share=True 可在本地测试时生成临时公网链接
189
  demo.launch(share=True)