Spaces:

GCLing
/

emotion

Runtime error

App Files Files Community

emotion / app.py

GCLing

Upload app.py

5a32219 verified 2 months ago

raw

history blame

5.29 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import os
	import joblib
	import numpy as np
	import librosa
	import gradio as gr
	from huggingface_hub import hf_hub_download
	from deepface import DeepFace
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

	# --- 1. 下載並載入 SVM 模型 ---
	# 這裡 repo_id 填你的模型倉庫路徑，例如 "GCLing/emotion-svm-model"
	# filename 填上傳到該倉庫的檔案名，例如 "svm_emotion_model.joblib"
	print("Downloading SVM model from Hugging Face Hub...")
	model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
	print(f"SVM model downloaded to: {model_path}")
	svm_model = joblib.load(model_path)
	print("SVM model loaded.")

	# --- 2. 載入文字情緒分析模型 ---
	# 以 uer/roberta-base-finetuned-chinanews-chinese 為例；可替換成其他合適的中文情感分類模型
	print("Loading text sentiment model...")
	tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
	model_txt = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
	text_emotion = pipeline("sentiment-analysis", model=model_txt, tokenizer=tokenizer)
	print("Text sentiment model loaded.")

	# --- 3. 聲音特徵擷取函式 ---
	def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
	"""
	從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維)，
	並回傳平均與變異組成的特徵向量 (共 26 維)。
	"""
	# librosa 載入後 signal 為 float numpy array
	mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
	# axis=1: 每個 MFCC 維度對時間做平均與變異數
	return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])

	# --- 4. 三種預測函式 ---

	def predict_face(img: np.ndarray):
	"""
	臉部情緒分析：使用 DeepFace 分析單張影像 (numpy array, HxWx3)。
	強制使用 OpenCV 後端以避免 retinaface/tf 版本衝突。
	回傳格式為 dict，例如 {"happy": 0.80, "sad": 0.05, ...}
	"""
	# DeepFace.analyze 可能較耗時，建議在 Space 上需有適當硬體
	result = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
	# result["emotion"] 是字典
	return result["emotion"]

	def predict_voice(audio):
	"""
	語音情緒分析：audio 由 Gradio 傳入，形式為暫存檔路徑字串 (str)。
	用 librosa.load 讀取，再提取 MFCC 特徵，最後用 SVM 模型 predict_proba。
	回傳格式為 dict，例如 {"angry":0.1, "happy":0.7, ...}
	"""
	# audio 參數為 Gradio Audio 組件給的檔案路徑
	signal, sr = librosa.load(audio, sr=None)
	feat = extract_feature(signal, sr)
	probs = svm_model.predict_proba([feat])[0]
	labels = svm_model.classes_
	return {labels[i]: float(probs[i]) for i in range(len(labels))}

	def predict_text(text: str):
	"""
	文字情緒分析：使用 transformers pipeline，
	輸入中文字串，回傳 dict，例如 {"POSITIVE":0.95} 或模型輸出標籤與信心分數。
	"""
	if not text or text.strip() == "":
	return {}
	pred = text_emotion(text)[0]
	# pred 形如 {"label": "...", "score": ...}
	return {pred["label"]: float(pred["score"])}

	# --- 5. 建立 Gradio 介面 ---
	def build_interface():
	"""
	建立一個 TabbedInterface，包含三個子 Interface：
	- 臉部情緒 (Webcam 拍照或上傳)
	- 語音情緒 (錄音或上傳音檔)
	- 文字情緒 (文字輸入)
	"""
	# 臉部情緒：使用 gr.Interface 或 Blocks?
	face_interface = gr.Interface(
	fn=predict_face,
	inputs=gr.Image(sources="webcam", streaming=True, type="numpy"),
	outputs=gr.Label(num_top_classes=1),
	title="臉部情緒 (即時 Webcam)",
	description="允許攝影機拍照後自動分析當前表情的情緒分佈。"
	)

	# 語音情緒：錄音或上傳
	voice_interface = gr.Interface(
	fn=predict_voice,
	inputs=gr.Audio(sources="microphone", type="filepath"),
	outputs=gr.Label(num_top_classes=1),
	title="語音情緒",
	description="錄製語音或上傳音訊檔，模型會回傳「驚訝/生氣/開心/悲傷/害怕」五種情緒機率。"
	)

	# 文字情緒：輸入中文
	text_interface = gr.Interface(
	fn=predict_text,
	inputs=gr.Textbox(lines=3, placeholder="請輸入中文文字…"),
	outputs=gr.Label(num_top_classes=1),
	title="文字情緒",
	description="輸入中文文字，即時判斷文字情緒並回傳標籤與信心分數。"
	)

	# 三合一 Tabs
	app = gr.TabbedInterface(
	interface_list=[face_interface, voice_interface, text_interface],
	tab_names=["臉部情緒", "語音情緒", "文字情緒"]
	)
	return app

	if __name__ == "__main__":
	# 可修改 port，如有多個服務可選不同 port
	demo = build_interface()
	# share=True 會產生臨時公開連結；若部署到 Spaces，可去掉 share 或留 False
	demo.launch(server_name="0.0.0.0", server_port=7861, share=True)