Spaces:

GCLing
/

emotion

Runtime error

App Files Files Community

emotion / app.py

GCLing

Update app.py

dd2bb14 verified 2 months ago

raw

history blame

4.65 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import os
	import joblib
	import numpy as np
	import librosa
	import gradio as gr
	from huggingface_hub import hf_hub_download
	from deepface import DeepFace
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

	# --- 1. 下載並載入 SVM 模型 ---
	# 這裡 repo_id 填你的模型倉庫路徑，例如 "GCLing/emotion-svm-model"
	# filename 填上傳到該倉庫的檔案名，例如 "svm_emotion_model.joblib"
	print("Downloading SVM model from Hugging Face Hub...")
	model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
	print(f"SVM model downloaded to: {model_path}")
	svm_model = joblib.load(model_path)
	print("SVM model loaded.")

	# --- 2. 載入文字情緒分析模型 ---
	# 以 uer/roberta-base-finetuned-chinanews-chinese 為例；可替換成其他合適的中文情感分類模型
	print("Loading text sentiment model...")
	tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
	model_txt = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
	text_emotion = pipeline("sentiment-analysis", model=model_txt, tokenizer=tokenizer)
	print("Text sentiment model loaded.")

	# --- 3. 聲音特徵擷取函式 ---
	def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
	"""
	從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維)，
	並回傳平均與變異組成的特徵向量 (共 26 維)。
	"""
	# librosa 載入後 signal 為 float numpy array
	mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
	# axis=1: 每個 MFCC 維度對時間做平均與變異數
	return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])

	# --- 4. 三種預測函式 ---

	def predict_face(img: np.ndarray):
	print("predict_face called, img is None?", img is None)
	# 你的限频和 DeepFace 分析逻辑...
	try:
	result = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
	emo = result.get("emotion", {})
	print("DeepFace result:", emo)
	return emo
	except Exception as e:
	print("DeepFace.analyze error:", e)
	return {}


	def predict_voice(audio):
	"""
	語音情緒分析：audio 由 Gradio 傳入，形式為暫存檔路徑字串 (str)。
	用 librosa.load 讀取，再提取 MFCC 特徵，最後用 SVM 模型 predict_proba。
	回傳格式為 dict，例如 {"angry":0.1, "happy":0.7, ...}
	"""
	# audio 參數為 Gradio Audio 組件給的檔案路徑
	signal, sr = librosa.load(audio, sr=None)
	feat = extract_feature(signal, sr)
	probs = svm_model.predict_proba([feat])[0]
	labels = svm_model.classes_
	return {labels[i]: float(probs[i]) for i in range(len(labels))}

	def predict_text(text: str):

	def predict_text(text: str):
	print("predict_text called, text:", text)
	if not text or text.strip()=="":
	return {}
	try:
	pred = text_emotion(text)[0]
	result = {pred["label"]: float(pred["score"])}
	print("Text sentiment result:", result)
	return result
	except Exception as e:
	print("predict_text error:", e)
	return {}


	# --- 5. 建立 Gradio 介面 ---
	with gr.Blocks() as demo:
	gr.Markdown("## 多模態即時情緒分析")
	with gr.Tabs():
	# 臉部情緒 Tab
	with gr.TabItem("臉部情緒"):
	gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
	with gr.Row():
	webcam = gr.Image(sources="webcam", streaming=True, type="numpy", label="攝像頭畫面")
	emotion_output = gr.Label(label="情緒分布")
	webcam.stream(fn=predict_face, inputs=webcam, outputs=emotion_output)


	# 其餘 Tab 可按原先寫法，或用 Blocks 方式
	with gr.TabItem("語音情緒"):
	audio = gr.Audio(sources="microphone", streaming=False, type="filepath", label="錄音")
	audio_output = gr.Label(label="語音情緒結果")
	# 用 change/submit 触发：录音结束后调用 predict_voice
	audio.change(fn=predict_voice, inputs=audio, outputs=audio_output)

	with gr.TabItem("文字情緒"):
	text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
	text_output = gr.Label(label="文字情緒結果")
	btn = gr.Button("分析文字")
	btn.click(fn=predict_text, inputs=text, outputs=text_output)




	if __name__ == "__main__":

	demo.launch()
	# 不要传 server_name 或 server_port