File size: 5,832 Bytes
b8ff14e 7b9bebe c7ec63e 7b9bebe c7ec63e 7b9bebe 36c201f 7b9bebe c7ec63e 7b9bebe 36c201f c7ec63e 7b9bebe dfd5bb6 36c201f 4eafa91 36c201f 7b9bebe 36c201f dd2bb14 36c201f dd2bb14 36c201f 7b9bebe dfd5bb6 7b9bebe c7ec63e 7b9bebe babd923 36c201f 7b9bebe c7ec63e 92eb8b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
print("Gradio version:", gr.__version__)
import os, time, re
import numpy as np
import joblib
import librosa
from huggingface_hub import hf_hub_download
from deepface import DeepFace
from transformers import pipeline
# 如果不手动用 AutoTokenizer/AutoModel,就不必 import AutoTokenizer, AutoModelForSequenceClassification
# --- 1. 加载 SVM 语音模型 ---
print("Downloading SVM model from Hugging Face Hub...")
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
print(f"SVM model downloaded to: {model_path}")
svm_model = joblib.load(model_path)
print("SVM model loaded.")
# --- 2. 文本情绪分析:规则+zero-shot ---
zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
label_map_en2cn = {
"joy": "高興", "sadness": "悲傷", "anger": "憤怒",
"fear": "恐懼", "surprise": "驚訝", "disgust": "厭惡"
}
emo_keywords = {
"happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
"angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
"sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
"surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
"fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
}
negations = ["不","沒","沒有","別","勿","非"]
def keyword_emotion(text: str):
counts = {emo: 0 for emo in emo_keywords}
for emo, kws in emo_keywords.items():
for w in kws:
idx = text.find(w)
if idx != -1:
# 简单否定检测
neg = False
for neg_word in negations:
plen = len(neg_word)
if idx - plen >= 0 and text[idx-plen:idx] == neg_word:
neg = True
break
if not neg:
counts[emo] += 1
total = sum(counts.values())
if total > 0:
return {emo: counts[emo]/total for emo in counts}
else:
return None
def predict_text_mixed(text: str):
if not text or text.strip() == "":
return {}
res = keyword_emotion(text)
if res:
top_emo = max(res, key=res.get)
mapping = {"happy":"高兴","angry":"愤怒","sad":"悲伤","surprise":"惊讶","fear":"恐惧"}
cn = mapping.get(top_emo, top_emo)
return {cn: res[top_emo]}
try:
out = zero_shot(text, candidate_labels=candidate_labels,
hypothesis_template="这句话表达了{}情绪")
result = {}
for lab, sc in zip(out["labels"], out["scores"]):
cn = label_map_en2cn.get(lab.lower(), lab)
result[cn] = float(sc)
return result
except Exception as e:
print("zero-shot error:", e)
return {"中性": 1.0}
# --- 3. 语音情绪预测函数 ---
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
def predict_voice(audio_path: str):
if not audio_path:
print("predict_voice: 无 audio_path,跳过")
return {}
try:
signal, sr = librosa.load(audio_path, sr=None)
feat = extract_feature(signal, sr)
probs = svm_model.predict_proba([feat])[0]
labels = svm_model.classes_
return {labels[i]: float(probs[i]) for i in range(len(labels))}
except Exception as e:
print("predict_voice error:", e)
return {}
# --- 4. 人脸情绪预测函数 ---
def predict_face(img: np.ndarray):
print("predict_face called, img is None?", img is None)
if img is None:
return {}
try:
res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
if isinstance(res, list):
first = res[0] if res else {}
emo = first.get("emotion", {}) if isinstance(first, dict) else {}
else:
emo = res.get("emotion", {}) if isinstance(res, dict) else {}
# 转 float,确保 JSON 可序列化
emo_fixed = {k: float(v) for k, v in emo.items()}
print("predict_face result:", emo_fixed)
return emo_fixed
except Exception as e:
print("DeepFace.analyze error:", e)
return {}
# --- 5. Gradio 界面 ---
with gr.Blocks() as demo:
gr.Markdown("## 多模態情緒分析示例")
with gr.Tabs():
# 臉部情緒 Tab
with gr.TabItem("臉部情緒"):
gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
with gr.Row():
webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
face_out = gr.Label(label="情緒分布")
webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
# 語音情緒 Tab
with gr.TabItem("語音情緒"):
gr.Markdown("### 語音情緒 分析")
with gr.Row():
audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
voice_out = gr.Label(label="語音情緒結果")
audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)
# 文字情緒 Tab
with gr.TabItem("文字情緒"):
gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
with gr.Row():
text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
text_out = gr.Label(label="文字情緒結果")
text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
if __name__ == "__main__":
demo.launch()
|