File size: 5,832 Bytes
b8ff14e
 
7b9bebe
c7ec63e
7b9bebe
c7ec63e
 
 
7b9bebe
 
36c201f
7b9bebe
c7ec63e
 
 
 
 
 
7b9bebe
36c201f
 
 
 
 
 
 
 
 
 
 
 
 
 
c7ec63e
7b9bebe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfd5bb6
36c201f
4eafa91
 
36c201f
 
 
7b9bebe
36c201f
 
dd2bb14
36c201f
 
 
 
 
 
dd2bb14
 
36c201f
 
 
7b9bebe
 
 
 
dfd5bb6
7b9bebe
 
 
 
 
 
 
 
 
 
 
 
 
c7ec63e
7b9bebe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
babd923
36c201f
 
 
 
 
7b9bebe
c7ec63e
92eb8b6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import gradio as gr
print("Gradio version:", gr.__version__)
import os, time, re
import numpy as np
import joblib
import librosa
from huggingface_hub import hf_hub_download
from deepface import DeepFace
from transformers import pipeline
# 如果不手动用 AutoTokenizer/AutoModel,就不必 import AutoTokenizer, AutoModelForSequenceClassification

# --- 1. 加载 SVM 语音模型 ---
print("Downloading SVM model from Hugging Face Hub...")
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
print(f"SVM model downloaded to: {model_path}")
svm_model = joblib.load(model_path)
print("SVM model loaded.")

# --- 2. 文本情绪分析:规则+zero-shot ---
zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
label_map_en2cn = {
    "joy": "高興", "sadness": "悲傷", "anger": "憤怒",
    "fear": "恐懼", "surprise": "驚訝", "disgust": "厭惡"
}
emo_keywords = {
    "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
    "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
    "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
    "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
    "fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
}
negations = ["不","沒","沒有","別","勿","非"]

def keyword_emotion(text: str):
    counts = {emo: 0 for emo in emo_keywords}
    for emo, kws in emo_keywords.items():
        for w in kws:
            idx = text.find(w)
            if idx != -1:
                # 简单否定检测
                neg = False
                for neg_word in negations:
                    plen = len(neg_word)
                    if idx - plen >= 0 and text[idx-plen:idx] == neg_word:
                        neg = True
                        break
                if not neg:
                    counts[emo] += 1
    total = sum(counts.values())
    if total > 0:
        return {emo: counts[emo]/total for emo in counts}
    else:
        return None

def predict_text_mixed(text: str):
    if not text or text.strip() == "":
        return {}
    res = keyword_emotion(text)
    if res:
        top_emo = max(res, key=res.get)
        mapping = {"happy":"高兴","angry":"愤怒","sad":"悲伤","surprise":"惊讶","fear":"恐惧"}
        cn = mapping.get(top_emo, top_emo)
        return {cn: res[top_emo]}
    try:
        out = zero_shot(text, candidate_labels=candidate_labels,
                        hypothesis_template="这句话表达了{}情绪")
        result = {}
        for lab, sc in zip(out["labels"], out["scores"]):
            cn = label_map_en2cn.get(lab.lower(), lab)
            result[cn] = float(sc)
        return result
    except Exception as e:
        print("zero-shot error:", e)
        return {"中性": 1.0}

# --- 3. 语音情绪预测函数 ---
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])

def predict_voice(audio_path: str):
    if not audio_path:
        print("predict_voice: 无 audio_path,跳过")
        return {}
    try:
        signal, sr = librosa.load(audio_path, sr=None)
        feat = extract_feature(signal, sr)
        probs = svm_model.predict_proba([feat])[0]
        labels = svm_model.classes_
        return {labels[i]: float(probs[i]) for i in range(len(labels))}
    except Exception as e:
        print("predict_voice error:", e)
        return {}

# --- 4. 人脸情绪预测函数 ---
def predict_face(img: np.ndarray):
    print("predict_face called, img is None?", img is None)
    if img is None:
        return {}
    try:
        res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
        if isinstance(res, list):
            first = res[0] if res else {}
            emo = first.get("emotion", {}) if isinstance(first, dict) else {}
        else:
            emo = res.get("emotion", {}) if isinstance(res, dict) else {}
        # 转 float,确保 JSON 可序列化
        emo_fixed = {k: float(v) for k, v in emo.items()}
        print("predict_face result:", emo_fixed)
        return emo_fixed
    except Exception as e:
        print("DeepFace.analyze error:", e)
        return {}

# --- 5. Gradio 界面 ---
with gr.Blocks() as demo:
    gr.Markdown("## 多模態情緒分析示例")
    with gr.Tabs():
        # 臉部情緒 Tab
        with gr.TabItem("臉部情緒"):
            gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
            with gr.Row():
                webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
                face_out = gr.Label(label="情緒分布")
            webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
        # 語音情緒 Tab
        with gr.TabItem("語音情緒"):
            gr.Markdown("### 語音情緒 分析")
            with gr.Row():
                audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
                voice_out = gr.Label(label="語音情緒結果")
            audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)
        # 文字情緒 Tab
        with gr.TabItem("文字情緒"):
            gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
            with gr.Row():
                text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
                text_out = gr.Label(label="文字情緒結果")
            text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)

if __name__ == "__main__":
    demo.launch()