Spaces:

GCLing
/

emotion

Runtime error

File size: 5,832 Bytes

b8ff14e
 
7b9bebe
c7ec63e
7b9bebe
c7ec63e
 
 
7b9bebe
 
36c201f
7b9bebe
c7ec63e
 
 
 
 
 
7b9bebe
36c201f
 
 
 
 
 
 
 
 
 
 
 
 
 
c7ec63e
7b9bebe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfd5bb6
36c201f
4eafa91
 
36c201f
 
 
7b9bebe
36c201f
 
dd2bb14
36c201f
 
 
 
 
 
dd2bb14
 
36c201f
 
 
7b9bebe
 
 
 
dfd5bb6
7b9bebe
 
 
 
 
 
 
 
 
 
 
 
 
c7ec63e
7b9bebe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
babd923
36c201f
 
 
 
 
7b9bebe
c7ec63e
92eb8b6

import gradio as gr
print("Gradio version:", gr.__version__)
import os, time, re
import numpy as np
import joblib
import librosa
from huggingface_hub import hf_hub_download
from deepface import DeepFace
from transformers import pipeline
# 如果不手动用 AutoTokenizer/AutoModel，就不必 import AutoTokenizer, AutoModelForSequenceClassification

# --- 1. 加载 SVM 语音模型 ---
print("Downloading SVM model from Hugging Face Hub...")
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
print(f"SVM model downloaded to: {model_path}")
svm_model = joblib.load(model_path)
print("SVM model loaded.")

# --- 2. 文本情绪分析：规则+zero-shot ---
zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
label_map_en2cn = {
    "joy": "高興", "sadness": "悲傷", "anger": "憤怒",
    "fear": "恐懼", "surprise": "驚訝", "disgust": "厭惡"
}
emo_keywords = {
    "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
    "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
    "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
    "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
    "fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
}
negations = ["不","沒","沒有","別","勿","非"]

def keyword_emotion(text: str):
    counts = {emo: 0 for emo in emo_keywords}
    for emo, kws in emo_keywords.items():
        for w in kws:
            idx = text.find(w)
            if idx != -1:
                # 简单否定检测
                neg = False
                for neg_word in negations:
                    plen = len(neg_word)
                    if idx - plen >= 0 and text[idx-plen:idx] == neg_word:
                        neg = True
                        break
                if not neg:
                    counts[emo] += 1
    total = sum(counts.values())
    if total > 0:
        return {emo: counts[emo]/total for emo in counts}
    else:
        return None

def predict_text_mixed(text: str):
    if not text or text.strip() == "":
        return {}
    res = keyword_emotion(text)
    if res:
        top_emo = max(res, key=res.get)
        mapping = {"happy":"高兴","angry":"愤怒","sad":"悲伤","surprise":"惊讶","fear":"恐惧"}
        cn = mapping.get(top_emo, top_emo)
        return {cn: res[top_emo]}
    try:
        out = zero_shot(text, candidate_labels=candidate_labels,
                        hypothesis_template="这句话表达了{}情绪")
        result = {}
        for lab, sc in zip(out["labels"], out["scores"]):
            cn = label_map_en2cn.get(lab.lower(), lab)
            result[cn] = float(sc)
        return result
    except Exception as e:
        print("zero-shot error:", e)
        return {"中性": 1.0}

# --- 3. 语音情绪预测函数 ---
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])

def predict_voice(audio_path: str):
    if not audio_path:
        print("predict_voice: 无 audio_path，跳过")
        return {}
    try:
        signal, sr = librosa.load(audio_path, sr=None)
        feat = extract_feature(signal, sr)
        probs = svm_model.predict_proba([feat])[0]
        labels = svm_model.classes_
        return {labels[i]: float(probs[i]) for i in range(len(labels))}
    except Exception as e:
        print("predict_voice error:", e)
        return {}

# --- 4. 人脸情绪预测函数 ---
def predict_face(img: np.ndarray):
    print("predict_face called, img is None?", img is None)
    if img is None:
        return {}
    try:
        res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
        if isinstance(res, list):
            first = res[0] if res else {}
            emo = first.get("emotion", {}) if isinstance(first, dict) else {}
        else:
            emo = res.get("emotion", {}) if isinstance(res, dict) else {}
        # 转 float，确保 JSON 可序列化
        emo_fixed = {k: float(v) for k, v in emo.items()}
        print("predict_face result:", emo_fixed)
        return emo_fixed
    except Exception as e:
        print("DeepFace.analyze error:", e)
        return {}

# --- 5. Gradio 界面 ---
with gr.Blocks() as demo:
    gr.Markdown("## 多模態情緒分析示例")
    with gr.Tabs():
        # 臉部情緒 Tab
        with gr.TabItem("臉部情緒"):
            gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
            with gr.Row():
                webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
                face_out = gr.Label(label="情緒分布")
            webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
        # 語音情緒 Tab
        with gr.TabItem("語音情緒"):
            gr.Markdown("### 語音情緒 分析")
            with gr.Row():
                audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
                voice_out = gr.Label(label="語音情緒結果")
            audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)
        # 文字情緒 Tab
        with gr.TabItem("文字情緒"):
            gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
            with gr.Row():
                text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
                text_out = gr.Label(label="文字情緒結果")
            text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)

if __name__ == "__main__":
    demo.launch()