Spaces:

GCLing
/

emotion

Runtime error

File size: 7,750 Bytes

b8ff14e
283d228
c7ec63e
7b9bebe
c7ec63e
283d228
c7ec63e
287ab51
283d228
 
 
 
 
 
 
287ab51
283d228
c7ec63e
 
 
 
 
283d228
 
 
 
 
 
 
 
2a89f5d
 
283d228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36c201f
 
 
 
 
2a89f5d
283d228
36c201f
 
7b9bebe
283d228
 
7b9bebe
 
2a89f5d
283d228
 
7b9bebe
 
283d228
 
7b9bebe
283d228
7b9bebe
283d228
 
 
 
 
dfd5bb6
36c201f
283d228
 
4eafa91
36c201f
 
283d228
2a89f5d
283d228
 
2a89f5d
283d228
 
2a89f5d
283d228
 
36c201f
283d228
7b9bebe
 
 
dfd5bb6
7b9bebe
 
 
 
 
 
 
 
 
 
 
 
c7ec63e
283d228
7b9bebe
283d228
7b9bebe
 
 
 
 
 
 
 
283d228
7b9bebe
 
 
 
283d228
287ab51
 
 
 
034eede
 
 
 
 
 
 
 
 
 
 
 
 
6d244f4
78bdf71
287ab51
 
 
 
 
 
283d228
82dd2b7
287ab51
 
283d228
b9af6a3
 
 
287ab51
 
7b9bebe
c7ec63e
b9af6a3
283d228
b1ced09

import gradio as gr
import os
import numpy as np
import joblib
import librosa
import requests
from huggingface_hub import hf_hub_download

# --- DeepFace 条件导入 ---
try:
    from deepface import DeepFace
    has_deepface = True
except ImportError:
    print("本地未安装 deepface，将在本地跳过臉部情緒；Space 上会安装 deepface。")
    has_deepface = False

# --- 1. 语音 SVM 加载 ---
print("Downloading SVM model from Hugging Face Hub...")
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
svm_model = joblib.load(model_path)
print("SVM model loaded.")

# --- 2. 文本情绪分析：改用 Inference API ---
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
if HF_API_TOKEN is None:
    print("警告：未检测到 HF_API_TOKEN，Inference API 可能失败。")
# 选用公开存在的中文情感分类模型
HF_TEXT_MODEL = "uer/roberta-base-finetuned-dianping-chinese"
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}"
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}


def predict_text_via_api(text: str):
    if not text or text.strip()=="":
        return {}
    payload = {"inputs": text}
    try:
        resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30)
        if resp.status_code != 200:
            print(f"Inference API 返回状态码 {resp.status_code}: {resp.text}")
            # 退回到简单规则或中性
            return {"中性": 1.0}
        data = resp.json()
        # 根据模型返回格式解析：假设返回 [{"label": "...", "score": ...}, ...]
        if isinstance(data, list) and len(data)>0 and isinstance(data[0], dict):
            # 选 top 3 展示
            result = {}
            for item in data[:3]:
                lbl = item.get("label", "")
                score = item.get("score", 0.0)
                # 若标签是英文，可映射到中文；若就是中文可直接用
                # 例如模型返回 "positive"/"negative"/"neutral"，可映射：
                if lbl.lower() in ["positive","pos","正面"]:
                    cn = "正面"
                elif lbl.lower() in ["negative","neg","负面","負面"]:
                    cn = "負面"
                elif lbl.lower() in ["neutral","中性"]:
                    cn = "中性"
                else:
                    cn = lbl
                result[cn] = float(score)
            return result
        else:
            print("Inference API 返回格式异常:", data)
            return {"中性": 1.0}
    except Exception as e:
        print("调用 Inference API 出错:", e)
        return {"中性": 1.0}

# 可保留简单规则优先，若规则命中则返回规则，否则调用 API
emo_keywords = {
    "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
    "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
    "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
    "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
    "fear": ["怕","恐懼","緊張","懼","膽怯","畏"],
    "disgust": ["噁心","厭惡","反感"]
}
negations = ["不","沒","沒有","別","勿","非"]
def keyword_emotion(text: str):
    text_proc = text.strip()
    counts = {emo:0 for emo in emo_keywords}
    for emo, kws in emo_keywords.items():
        for w in kws:
            idx = text_proc.find(w)
            if idx!=-1:
                neg=False
                for neg_word in negations:
                    plen = len(neg_word)
                    if idx-plen>=0 and text_proc[idx-plen:idx]==neg_word:
                        neg=True; break
                if not neg:
                    counts[emo]+=1
    total = sum(counts.values())
    if total>0:
        # 归一化并取最高
        top = max(counts, key=lambda k: counts[k])
        return {top: counts[top]/total}
    return None

def predict_text_mixed(text: str):
    print("predict_text_mixed:", text)
    if not text or text.strip()=="":
        return {}
    res = keyword_emotion(text)
    if res:
        # 映射中文标签
        mapping = {
            "happy":"高興","angry":"憤怒","sad":"悲傷",
            "surprise":"驚訝","fear":"恐懼","disgust":"厭惡"
        }
        emo = list(res.keys())[0]; prob = float(res[emo])
        cn = mapping.get(emo, emo)
        return {cn: prob}
    # 规则未命中，调用 Inference API
    return predict_text_via_api(text)

# --- 3. 语音情绪预测 ---
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])

def predict_voice(audio_path: str):
    if not audio_path:
        return {}
    try:
        signal, sr = librosa.load(audio_path, sr=None)
        feat = extract_feature(signal, sr)
        probs = svm_model.predict_proba([feat])[0]
        labels = svm_model.classes_
        return {labels[i]: float(probs[i]) for i in range(len(labels))}
    except Exception as e:
        print("predict_voice error:", e)
        return {}

# --- 4. 人脸情绪预测 ---
def predict_face(img: np.ndarray):
    if not has_deepface or img is None:
        return {}
    try:
        res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
        if isinstance(res, list):
            first = res[0] if res else {}
            emo = first.get("emotion", {}) if isinstance(first, dict) else {}
        else:
            emo = res.get("emotion", {}) if isinstance(res, dict) else {}
        return {k: float(v) for k,v in emo.items()}
    except Exception as e:
        print("DeepFace.analyze error:", e)
        return {}

# --- 5. Gradio 界面：用 gr.components.Camera ---
def build_interface():
    with gr.Blocks() as demo:
        gr.Markdown("## 多模態情緒分析示例")
        with gr.Tabs():
            # 臉部 Tab，仅当 has_deepface=True 时显示
            if has_deepface:
                with gr.TabItem("臉部情緒"):
                    gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
                    with gr.Row():
                        # 用 Video 捕获 Webcam
                        webcam = gr.Video(source="webcam", streaming=True, label="攝像頭畫面")
                        face_out = gr.Label(label="情緒分布")
                    webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
            else:
                # 如果本地缺少 deepface，可给用户提示
                with gr.TabItem("臉部情緒 (跳過)"):
                    gr.Markdown


            # 語音 Tab
            with gr.TabItem("語音情緒"):
                gr.Markdown("### 語音情緒 分析")
                with gr.Row():
                    audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
                    voice_out = gr.Label(label="語音情緒結果")
                audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)

            # 文字 Tab
            with gr.TabItem("文字情緒"):
                gr.Markdown("### 文字情緒 分析 (规则+Inference API)")
                with gr.Row():
                    text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
                    text_out = gr.Label(label="文字情緒結果")
                text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
        return demo

if __name__ == "__main__":
    demo = build_interface()
    # share=True 可在本地测试时生成临时公网链接
    demo.launch(share=True)