Spaces:

GCLing
/

multimodal-emotion

Configuration error

App Files Files Community

GCLing commited on Jun 15

Commit

8bda002

verified ·

1 Parent(s): 1d9e853

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -50

app.py CHANGED Viewed

@@ -1,64 +1,58 @@
 import gradio as gr
 import numpy as np
-import cv2
-import librosa, joblib
 from deepface import DeepFace
-# ——— 加载语音模型 ———
-@gr.cache_resource()
-def load_audio_model():
-    return joblib.load("src/voice_model.joblib")
-audio_model = load_audio_model()
-# 文本分析
-def analyze_text(text):
-    if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]):       return "😊 happy"
-    if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]):             return "😠 angry"
-    if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]):               return "😢 sad"
-    if any(w in text for w in ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"]):                  return "😲 surprise"
-    if any(w in text for w in  ["怕","恐懼","緊張","懼","膽怯","畏"]):               return "😨 fear"
-    return "😐 neutral"
-# 语音分析
 def analyze_audio(wav_file):
-    y, sr = librosa.load(wav_file.name, sr=None)
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     mf = np.mean(mfccs.T, axis=0)
-    emo = audio_model.predict([mf])[0]
-    return f"🎧 {emo}"
-# 人脸实时情绪
-def analyze_face(frame):
-    # frame: numpy array from webcam
-    # 转 BGR->RGB
-    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    resp = DeepFace.analyze(img, actions=['emotion'], enforce_detection=False)
-    emo = resp["dominant_emotion"]
-    # 在人脸框上画 emotion
-    box = resp["region"]
-    x,y,w,h = box["x"], box["y"], box["w"], box["h"]
-    cv2.rectangle(frame, (x,y),(x+w,y+h), (0,255,0), 2)
-    cv2.putText(frame, emo, (x, y-10),
-                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
-    return frame
-# ——— 构建界面 ———
 with gr.Blocks() as demo:
-    gr.Markdown("# 多模態即時情緒分析 🤖")
-    tabs = gr.Tabs()
-    with tabs:
         with gr.TabItem("📷 Live Face"):
-            camera = gr.Image(source="webcam", streaming=True, tool=None)
-            out_img = gr.Image()
-            camera.change(analyze_face, camera, out_img)
-        with gr.TabItem("🎤 上傳語音檔"):
-            audio = gr.Audio(type="filepath")
-            out_a = gr.Text()
-            audio.submit(analyze_audio, audio, out_a)
-        with gr.TabItem("⌨️ 文本輸入"):
-            txt = gr.Textbox(lines=3, placeholder="在此輸入文字…")
-            out_t = gr.Text()
-            txt.submit(analyze_text, txt, out_t)
-demo.launch()

 import gradio as gr
 import numpy as np
+import joblib, io, base64
+import librosa
 from deepface import DeepFace
+# —— 1. 載入模型 ——
+#    如果只要上傳 wav/text，就先把 face 部分包在 fn 裡動態 load
+audio_model = joblib.load("src/voice_model.joblib")
+def analyze_face(frame: np.ndarray):
+    # DeepFace 回傳 dict，裡面有 'dominant_emotion'
+    res = DeepFace.analyze(frame, actions=["emotion"], enforce_detection=False)
+    emo = res["dominant_emotion"]
+    return frame, emo
 def analyze_audio(wav_file):
+    wav_bytes = wav_file.read()
+    y, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     mf = np.mean(mfccs.T, axis=0)
+    return audio_model.predict([mf])[0]
+def analyze_text(txt):
+    mapping = {
+        "😊happy":["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"],
+        "😠angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
+        "😢sad":   ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
+        "😲surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
+        "😨fear":  ["怕","恐懼","緊張","懼","膽怯","畏"],
+    }
+    for emo, kws in mapping.items():
+        if any(w in txt for w in kws):
+            return emo
+    return "neutral"
 with gr.Blocks() as demo:
+    gr.Markdown("## 多模態即時情緒分析")
+    with gr.Tabs():
         with gr.TabItem("📷 Live Face"):
+            camera = gr.Image(source="webcam", tool="editor", label="對準鏡頭")
+            out_img = gr.Image(label="畫面")
+            out_label = gr.Label(label="情緒")
+            camera.change(fn=analyze_face, inputs=camera, outputs=[out_img, out_label], live=True)
+        with gr.TabItem("🎤 上傳語音"):
+            wav = gr.File(label="選擇 .wav 檔", file_types=[".wav"])
+            wav_btn = gr.Button("分析")
+            wav_out = gr.Text(label="偵測到的情緒")
+            wav_btn.click(fn=analyze_audio, inputs=wav, outputs=wav_out)
+        with gr.TabItem("⌨️ 輸入文字"):
+            txt = gr.Textbox(label="在此輸入文字", lines=3)
+            txt_btn = gr.Button("分析")
+            txt_out = gr.Text(label="偵測到的情緒")
+            txt_btn.click(fn=analyze_text, inputs=txt, outputs=txt_out)
+    demo.launch()