GCLing commited on
Commit
c0e97f6
·
verified ·
1 Parent(s): c21c266

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -67
app.py CHANGED
@@ -1,42 +1,18 @@
1
- # app.py
2
-
3
  import gradio as gr
4
- import cv2, numpy as np
5
- import base64, io, os
6
  import librosa, joblib
7
  from deepface import DeepFace
8
- def analyze_frame(frame):
9
- # frame 是一張 RGB numpy 圖
10
- result = DeepFace.analyze(frame, actions=['emotion'])
11
- return result['dominant_emotion']
12
-
13
- iface = gr.Interface(
14
- fn=analyze_frame,
15
- inputs=gr.inputs.Image(source="webcam", tool=None),
16
- outputs="text",
17
- title="多模態即時情緒分析"
18
- )
19
 
20
- if __name__ == "__main__":
21
- iface.launch()
22
-
23
- # —— 1. 预加载 DeepFace、语音模型 ——
24
- # DeepFace 会自动把权重缓存到 DEEPFACE_HOME 下的 /weights
25
- os.environ["DEEPFACE_HOME"] = "/tmp/.deepface"
26
- def load_models():
27
- # a) 热身 DeepFace
28
- DeepFace.analyze(
29
- img_path = np.zeros((224,224,3), dtype=np.uint8),
30
- actions = ['emotion'],
31
- enforce_detection=False
32
- )
33
- # b) 加载本地训练好的语音模型
34
  return joblib.load("src/voice_model.joblib")
35
 
36
- audio_model = load_models()
37
 
38
- # —— 2. 文本情绪函数 ——
39
- def analyze_text_fn(text):
40
  if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]): return "😊 happy"
41
  if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]): return "😠 angry"
42
  if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]): return "😢 sad"
@@ -44,45 +20,45 @@ def analyze_text_fn(text):
44
  if any(w in text for w in ["怕","恐懼","緊張","懼","膽怯","畏"]): return "😨 fear"
45
  return "😐 neutral"
46
 
47
- # —— 3. 语音情绪函数 ——
48
- def analyze_audio_fn(wav_bytes):
49
- y, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
50
  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
51
  mf = np.mean(mfccs.T, axis=0)
52
- return audio_model.predict([mf])[0]
 
53
 
54
- # —— 4. 人脸情绪函数 ——
55
- def analyze_face_fn(img: np.ndarray):
56
- res = DeepFace.analyze(
57
- img, actions=['emotion'], enforce_detection=False
58
- )
59
- # 兼容 list/dict 返回
60
- emo = res[0]['dominant_emotion'] if isinstance(res, list) else res['dominant_emotion']
61
- return emo
 
 
 
 
 
 
62
 
63
- # —— 5. 用 Gradio Blocks 组织界面 ——
64
  with gr.Blocks() as demo:
65
- gr.Markdown("## 📱 多模態即時情緒分析")
66
  tabs = gr.Tabs()
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- with tabs.add_tab("🔴 Face(Browser→Webcam)"):
69
- camera = gr.Image(source="webcam", tool="editor", label="對準你的臉")
70
- out_face = gr.Textbox(label="偵測到的情緒")
71
- camera.change(analyze_face_fn, camera, out_face)
72
-
73
- with tabs.add_tab("🎤 上傳 WAV 檔"):
74
- wav = gr.File(label="上傳 .wav")
75
- out_audio = gr.Textbox(label="語音檢測情緒")
76
- wav.upload(analyze_audio_fn, wav, out_audio)
77
-
78
- with tabs.add_tab("⌨️ 輸入文字"):
79
- txt = gr.Textbox(label="在此輸入文字")
80
- btn = gr.Button("開始分析")
81
- out_text = gr.Textbox(label="文字檢測情緒")
82
- btn.click(analyze_text_fn, txt, out_text)
83
-
84
- gr.Markdown("---")
85
- gr.Markdown("ℹ️ 內建 DeepFace、librosa & sklearn 進行多模態情緒分析")
86
-
87
- if __name__ == "__main__":
88
- demo.launch()
 
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ import cv2
4
  import librosa, joblib
5
  from deepface import DeepFace
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # ——— 加载语音模型 ———
8
+ @gr.cache_resource()
9
+ def load_audio_model():
 
 
 
 
 
 
 
 
 
 
 
10
  return joblib.load("src/voice_model.joblib")
11
 
12
+ audio_model = load_audio_model()
13
 
14
+ # 文本分析
15
+ def analyze_text(text):
16
  if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]): return "😊 happy"
17
  if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]): return "😠 angry"
18
  if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]): return "😢 sad"
 
20
  if any(w in text for w in ["怕","恐懼","緊張","懼","膽怯","畏"]): return "😨 fear"
21
  return "😐 neutral"
22
 
23
+ # 语音分析
24
+ def analyze_audio(wav_file):
25
+ y, sr = librosa.load(wav_file.name, sr=None)
26
  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
27
  mf = np.mean(mfccs.T, axis=0)
28
+ emo = audio_model.predict([mf])[0]
29
+ return f"🎧 {emo}"
30
 
31
+ # 人脸实时情绪
32
+ def analyze_face(frame):
33
+ # frame: numpy array from webcam
34
+ # BGR->RGB
35
+ img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
36
+ resp = DeepFace.analyze(img, actions=['emotion'], enforce_detection=False)
37
+ emo = resp["dominant_emotion"]
38
+ # 在人脸框上画 emotion
39
+ box = resp["region"]
40
+ x,y,w,h = box["x"], box["y"], box["w"], box["h"]
41
+ cv2.rectangle(frame, (x,y),(x+w,y+h), (0,255,0), 2)
42
+ cv2.putText(frame, emo, (x, y-10),
43
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
44
+ return frame
45
 
46
+ # ——— 构建界面 ———
47
  with gr.Blocks() as demo:
48
+ gr.Markdown("# 多模態即時情緒分析 🤖")
49
  tabs = gr.Tabs()
50
+ with tabs:
51
+ with gr.TabItem("📷 Live Face"):
52
+ camera = gr.Image(source="webcam", streaming=True, tool=None)
53
+ out_img = gr.Image()
54
+ camera.change(analyze_face, camera, out_img)
55
+ with gr.TabItem("🎤 上傳語音檔"):
56
+ audio = gr.Audio(type="filepath")
57
+ out_a = gr.Text()
58
+ audio.submit(analyze_audio, audio, out_a)
59
+ with gr.TabItem("⌨️ 文本輸入"):
60
+ txt = gr.Textbox(lines=3, placeholder="在此輸入文字…")
61
+ out_t = gr.Text()
62
+ txt.submit(analyze_text, txt, out_t)
63
 
64
+ demo.launch()