Spaces:

GCLing
/

multimodal-emotion

Configuration error

App Files Files Community

GCLing commited on Jun 15

Commit

c0e97f6

verified ·

1 Parent(s): c21c266

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -67

app.py CHANGED Viewed

@@ -1,42 +1,18 @@
-# app.py
 import gradio as gr
-import cv2, numpy as np
-import base64, io, os
 import librosa, joblib
 from deepface import DeepFace
-def analyze_frame(frame):
-    # frame 是一張 RGB numpy 圖
-    result = DeepFace.analyze(frame, actions=['emotion'])
-    return result['dominant_emotion']
-iface = gr.Interface(
-    fn=analyze_frame,
-    inputs=gr.inputs.Image(source="webcam", tool=None),
-    outputs="text",
-    title="多模態即時情緒分析"
-)
-if __name__ == "__main__":
-    iface.launch()
-# —— 1. 预加载 DeepFace、语音模型 ——
-#    DeepFace 会自动把权重缓存到 DEEPFACE_HOME 下的 /weights
-os.environ["DEEPFACE_HOME"] = "/tmp/.deepface"
-def load_models():
-    # a) 热身 DeepFace
-    DeepFace.analyze(
-        img_path = np.zeros((224,224,3), dtype=np.uint8),
-        actions  = ['emotion'],
-        enforce_detection=False
-    )
-    # b) 加载本地训练好的语音模型
     return joblib.load("src/voice_model.joblib")
-audio_model = load_models()
-# —— 2. 文本情绪函数 ——
-def analyze_text_fn(text):
     if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]):       return "😊 happy"
     if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]):             return "😠 angry"
     if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]):               return "😢 sad"
@@ -44,45 +20,45 @@ def analyze_text_fn(text):
     if any(w in text for w in  ["怕","恐懼","緊張","懼","膽怯","畏"]):               return "😨 fear"
     return "😐 neutral"
-# —— 3. 语音情绪函数 ——
-def analyze_audio_fn(wav_bytes):
-    y, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     mf = np.mean(mfccs.T, axis=0)
-    return audio_model.predict([mf])[0]
-# —— 4. 人脸情绪函数 ——
-def analyze_face_fn(img: np.ndarray):
-    res = DeepFace.analyze(
-        img, actions=['emotion'], enforce_detection=False
-    )
-    # 兼容 list/dict 返回
-    emo = res[0]['dominant_emotion'] if isinstance(res, list) else res['dominant_emotion']
-    return emo
-# —— 5. 用 Gradio Blocks 组织界面 ——
 with gr.Blocks() as demo:
-    gr.Markdown("## 📱 多模態即時情緒分析")
     tabs = gr.Tabs()
-    with tabs.add_tab("🔴 Face（Browser→Webcam）"):
-        camera = gr.Image(source="webcam", tool="editor", label="對準你的臉")
-        out_face = gr.Textbox(label="偵測到的情緒")
-        camera.change(analyze_face_fn, camera, out_face)
-    with tabs.add_tab("🎤 上傳 WAV 檔"):
-        wav = gr.File(label="上傳 .wav")
-        out_audio = gr.Textbox(label="語音檢測情緒")
-        wav.upload(analyze_audio_fn, wav, out_audio)
-    with tabs.add_tab("⌨️ 輸入文字"):
-        txt = gr.Textbox(label="在此輸入文字")
-        btn = gr.Button("開始分析")
-        out_text = gr.Textbox(label="文字檢測情緒")
-        btn.click(analyze_text_fn, txt, out_text)
-    gr.Markdown("---")
-    gr.Markdown("ℹ️ 內建 DeepFace、librosa & sklearn 進行多模態情緒分析")
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import numpy as np
+import cv2
 import librosa, joblib
 from deepface import DeepFace
+# ——— 加载语音模型 ———
+@gr.cache_resource()
+def load_audio_model():
     return joblib.load("src/voice_model.joblib")
+audio_model = load_audio_model()
+# 文本分析
+def analyze_text(text):
     if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]):       return "😊 happy"
     if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]):             return "😠 angry"
     if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]):               return "😢 sad"
     if any(w in text for w in  ["怕","恐懼","緊張","懼","膽怯","畏"]):               return "😨 fear"
     return "😐 neutral"
+# 语音分析
+def analyze_audio(wav_file):
+    y, sr = librosa.load(wav_file.name, sr=None)
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     mf = np.mean(mfccs.T, axis=0)
+    emo = audio_model.predict([mf])[0]
+    return f"🎧 {emo}"
+# 人脸实时情绪
+def analyze_face(frame):
+    # frame: numpy array from webcam
+    # 转 BGR->RGB
+    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    resp = DeepFace.analyze(img, actions=['emotion'], enforce_detection=False)
+    emo = resp["dominant_emotion"]
+    # 在人脸框上画 emotion
+    box = resp["region"]
+    x,y,w,h = box["x"], box["y"], box["w"], box["h"]
+    cv2.rectangle(frame, (x,y),(x+w,y+h), (0,255,0), 2)
+    cv2.putText(frame, emo, (x, y-10),
+                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
+    return frame
+# ——— 构建界面 ———
 with gr.Blocks() as demo:
+    gr.Markdown("# 多模態即時情緒分析 🤖")
     tabs = gr.Tabs()
+    with tabs:
+        with gr.TabItem("📷 Live Face"):
+            camera = gr.Image(source="webcam", streaming=True, tool=None)
+            out_img = gr.Image()
+            camera.change(analyze_face, camera, out_img)
+        with gr.TabItem("🎤 上傳語音檔"):
+            audio = gr.Audio(type="filepath")
+            out_a = gr.Text()
+            audio.submit(analyze_audio, audio, out_a)
+        with gr.TabItem("⌨️ 文本輸入"):
+            txt = gr.Textbox(lines=3, placeholder="在此輸入文字…")
+            out_t = gr.Text()
+            txt.submit(analyze_text, txt, out_t)
+demo.launch()