Spaces:

GCLing
/

multimodal-emotion

Configuration error

File size: 2,939 Bytes

357796f
0b5ee74
357796f
 
0b5ee74
 
357796f
 
0b5ee74
 
 
 
 
 
 
 
357796f
 
0b5ee74
 
357796f
0b5ee74
 
 
 
 
 
 
 
 
 
 
 
 
 
a2a4ab6
 
 
 
0b5ee74
 
 
 
 
 
 
 
a2a4ab6
0b5ee74
 
a2a4ab6
 
357796f
0b5ee74
 
 
 
a2a4ab6
0b5ee74
 
 
 
a2a4ab6
0b5ee74

# app.py

import gradio as gr
import numpy as np
import base64, io, os
import librosa, joblib
from deepface import DeepFace

# —— 1. 预加载 DeepFace、语音模型 —— 
#    DeepFace 会自动把权重缓存到 DEEPFACE_HOME 下的 /weights
os.environ["DEEPFACE_HOME"] = "/tmp/.deepface"
def load_models():
    # a) 热身 DeepFace
    DeepFace.analyze(
        img_path = np.zeros((224,224,3), dtype=np.uint8),
        actions  = ['emotion'],
        enforce_detection=False
    )
    # b) 加载本地训练好的语音模型
    return joblib.load("src/voice_model.joblib")

audio_model = load_models()

# —— 2. 文本情绪函数 —— 
def analyze_text_fn(text):
    if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]):       return "😊 happy"
    if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]):             return "😠 angry"
    if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]):               return "😢 sad"
    if any(w in text for w in ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"]):                  return "😲 surprise"
    if any(w in text for w in  ["怕","恐懼","緊張","懼","膽怯","畏"]):               return "😨 fear"
    return "😐 neutral"

# —— 3. 语音情绪函数 —— 
def analyze_audio_fn(wav_bytes):
    y, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mf = np.mean(mfccs.T, axis=0)
    return audio_model.predict([mf])[0]

# —— 4. 人脸情绪函数 —— 
def analyze_face_fn(img: np.ndarray):
    res = DeepFace.analyze(
        img, actions=['emotion'], enforce_detection=False
    )
    # 兼容 list/dict 返回
    emo = res[0]['dominant_emotion'] if isinstance(res, list) else res['dominant_emotion']
    return emo

# —— 5. 用 Gradio Blocks 组织界面 —— 
with gr.Blocks() as demo:
    gr.Markdown("## 📱 多模態即時情緒分析")
    tabs = gr.Tabs()

    with tabs.add_tab("🔴 Face（Browser→Webcam）"):
        camera = gr.Image(source="webcam", tool="editor", label="對準你的臉")
        out_face = gr.Textbox(label="偵測到的情緒")
        camera.change(analyze_face_fn, camera, out_face)

    with tabs.add_tab("🎤 上傳 WAV 檔"):
        wav = gr.File(label="上傳 .wav")
        out_audio = gr.Textbox(label="語音檢測情緒")
        wav.upload(analyze_audio_fn, wav, out_audio)

    with tabs.add_tab("⌨️ 輸入文字"):
        txt = gr.Textbox(label="在此輸入文字")
        btn = gr.Button("開始分析")
        out_text = gr.Textbox(label="文字檢測情緒")
        btn.click(analyze_text_fn, txt, out_text)

    gr.Markdown("---")
    gr.Markdown("ℹ️ 內建 DeepFace、librosa & sklearn 進行多模態情緒分析")

if __name__ == "__main__":
    demo.launch()