Spaces:

GCLing
/

multimodal-emotion

Configuration error

App Files Files Community

GCLing commited on Jun 15

Commit

0b5ee74

verified ·

1 Parent(s): 77ec4ba

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -71

app.py CHANGED Viewed

@@ -1,89 +1,74 @@
 # app.py
 import gradio as gr
 import numpy as np
-import joblib, io
-import librosa
 from deepface import DeepFace
-# —— 1. 预加载模型 ——
-# DeepFace 热身 + 载入语音模型
-audio_model = joblib.load("src/voice_model.joblib")
-# 你也可以包一层 try/except
-def analyze_face(frame: np.ndarray):
-    """
-    输入：摄像头采到的 RGB numpy 数组
-    输出：DeepFace 分析出的 dominant_emotion（字符串）
-    """
-    # DeepFace.analyze 接受 RGB np.array
-    res = DeepFace.analyze(
-        img_path=frame,
-        actions=['emotion'],
         enforce_detection=False
     )
-    # 兼容 dict / list 返回
-    if isinstance(res, list):
-        emo = res[0].get('dominant_emotion', 'unknown')
-    else:
-        emo = res.get('dominant_emotion', 'unknown')
-    return emo
-def analyze_audio(wav_file):
-    """
-    输入：上传的 wav（二进制）
-    输出：语音情绪分类标签
-    """
-    data = wav_file.read()
-    y, sr = librosa.load(io.BytesIO(data), sr=None)
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     mf = np.mean(mfccs.T, axis=0)
     return audio_model.predict([mf])[0]
-def analyze_text(txt):
-    """
-    简单的基于关键词的中文情绪分析
-    """
-    if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]): return "happy"
-    if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]): return "angry"
-    if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]): return "sad"
-    if any(w in text for w in ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"]):       return "surprise"
-    if any(w in text for w in ["怕","恐懼","緊張","懼","膽怯","畏"]):       return "fear"
-    return "neutral"
-with gr.Blocks(title="多模態即時情緒分析") as demo:
     gr.Markdown("## 📱 多模態即時情緒分析")
     tabs = gr.Tabs()
-    with tabs:
-        with gr.TabItem("🔴 Face（即時）"):
-            gr.Markdown("⚠️ Spaces 無法直接打開攝像頭，請本機 `python app.py` 測試；手機／電腦瀏覽器可用以下方式：")
-            camera = gr.Image(
-                source="webcam",
-                type="numpy",
-                label="請對準鏡頭"
-            )
-            face_out = gr.Textbox(label="偵測結果")
-            camera.change(fn=analyze_face, inputs=camera, outputs=face_out)
-        with gr.TabItem("🎤 上傳語音"):
-            wav = gr.File(
-                label="請選擇 .wav 音檔",
-                file_types=["wav"]
-            )
-            audio_out = gr.Textbox(label="偵測結果")
-            wav.upload(fn=analyze_audio, inputs=wav, outputs=audio_out)
-        with gr.TabItem("⌨️ 文本輸入"):
-            txt = gr.Textbox(
-                label="在此輸入文字",
-                placeholder="輸入想要分析的句子…"
-            )
-            text_btn = gr.Button("開始分析")
-            text_out = gr.Textbox(label="偵測結果")
-            text_btn.click(fn=analyze_text, inputs=txt, outputs=text_out)
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

 # app.py
 import gradio as gr
 import numpy as np
+import base64, io, os
+import librosa, joblib
 from deepface import DeepFace
+# —— 1. 预加载 DeepFace、语音模型 ——
+#    DeepFace 会自动把权重缓存到 DEEPFACE_HOME 下的 /weights
+os.environ["DEEPFACE_HOME"] = "/tmp/.deepface"
+def load_models():
+    # a) 热身 DeepFace
+    DeepFace.analyze(
+        img_path = np.zeros((224,224,3), dtype=np.uint8),
+        actions  = ['emotion'],
         enforce_detection=False
     )
+    # b) 加载本地训练好的语音模型
+    return joblib.load("src/voice_model.joblib")
+audio_model = load_models()
+# —— 2. 文本情绪函数 ——
+def analyze_text_fn(text):
+    if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]):       return "😊 happy"
+    if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]):             return "😠 angry"
+    if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]):               return "😢 sad"
+    if any(w in text for w in ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"]):                  return "😲 surprise"
+    if any(w in text for w in  ["怕","恐懼","緊張","懼","膽怯","畏"]):               return "😨 fear"
+    return "😐 neutral"
+# —— 3. 语音情绪函数 ——
+def analyze_audio_fn(wav_bytes):
+    y, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     mf = np.mean(mfccs.T, axis=0)
     return audio_model.predict([mf])[0]
+# —— 4. 人脸情绪函数 ——
+def analyze_face_fn(img: np.ndarray):
+    res = DeepFace.analyze(
+        img, actions=['emotion'], enforce_detection=False
+    )
+    # 兼容 list/dict 返回
+    emo = res[0]['dominant_emotion'] if isinstance(res, list) else res['dominant_emotion']
+    return emo
+# —— 5. 用 Gradio Blocks 组织界面 ——
+with gr.Blocks() as demo:
     gr.Markdown("## 📱 多模態即時情緒分析")
     tabs = gr.Tabs()
+    with tabs.add_tab("🔴 Face（Browser→Webcam）"):
+        camera = gr.Image(source="webcam", tool="editor", label="對準你的臉")
+        out_face = gr.Textbox(label="偵測到的情緒")
+        camera.change(analyze_face_fn, camera, out_face)
+    with tabs.add_tab("🎤 上傳 WAV 檔"):
+        wav = gr.File(label="上傳 .wav")
+        out_audio = gr.Textbox(label="語音檢測情緒")
+        wav.upload(analyze_audio_fn, wav, out_audio)
+    with tabs.add_tab("⌨️ 輸入文字"):
+        txt = gr.Textbox(label="在此輸入文字")
+        btn = gr.Button("開始分析")
+        out_text = gr.Textbox(label="文字檢測情緒")
+        btn.click(analyze_text_fn, txt, out_text)
+    gr.Markdown("---")
+    gr.Markdown("ℹ️ 內建 DeepFace、librosa & sklearn 進行多模態情緒分析")
+if __name__ == "__main__":
+    demo.launch()