my-emotion-app / app.py
GCLing's picture
requirements.txt
250b967 verified
raw
history blame
1.75 kB
import gradio as gr
import librosa
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib
from deepface import DeepFace
# —— 假设你的语音模型已经训练好并存为 voice_model.joblib ——
voice_clf = joblib.load("voice_model.joblib")
# 文字分析
def analyze_text(text):
if any(w in text for w in ["開心","快樂"]): return "happy"
if any(w in text for w in ["生氣","憤怒"]): return "angry"
if any(w in text for w in ["傷心","難過","哭"]): return "sad"
if any(w in text for w in ["驚","意外"]): return "surprise"
if any(w in text for w in ["怕","恐懼"]): return "fear"
return "neutral"
# 語音分析
def analyze_audio(path):
y_audio, sr = librosa.load(path, sr=None)
mfccs = np.mean(librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=13).T, axis=0).reshape(1, -1)
return voice_clf.predict(mfccs)[0]
# 臉部分析
def analyze_face(img):
res = DeepFace.analyze(img, actions=['emotion'], enforce_detection=False)
return res[0]['dominant_emotion']
# 定義 Gradio 介面
with gr.Blocks() as demo:
gr.Markdown("# 多模態情緒分析")
with gr.Tab("📝 文字"):
t = gr.Textbox(placeholder="輸入中文…")
bt = gr.Button("分析文字")
out_t = gr.Textbox()
bt.click(analyze_text, inputs=t, outputs=out_t)
with gr.Tab("🎤 語音"):
a = gr.Audio(type="filepath")
ba = gr.Button("分析語音")
out_a = gr.Textbox()
ba.click(analyze_audio, inputs=a, outputs=out_a)
with gr.Tab("📷 臉部"):
im = gr.Image(source="webcam")
bi = gr.Button("分析表情")
out_i = gr.Textbox()
bi.click(analyze_face, inputs=im, outputs=out_i)