File size: 1,750 Bytes
250b967
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import librosa
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib
from deepface import DeepFace

# —— 假设你的语音模型已经训练好并存为 voice_model.joblib ——  
voice_clf = joblib.load("voice_model.joblib")

# 文字分析
def analyze_text(text):
    if any(w in text for w in ["開心","快樂"]): return "happy"
    if any(w in text for w in ["生氣","憤怒"]): return "angry"
    if any(w in text for w in ["傷心","難過","哭"]): return "sad"
    if any(w in text for w in ["驚","意外"]): return "surprise"
    if any(w in text for w in ["怕","恐懼"]): return "fear"
    return "neutral"

# 語音分析
def analyze_audio(path):
    y_audio, sr = librosa.load(path, sr=None)
    mfccs = np.mean(librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=13).T, axis=0).reshape(1, -1)
    return voice_clf.predict(mfccs)[0]

# 臉部分析
def analyze_face(img):
    res = DeepFace.analyze(img, actions=['emotion'], enforce_detection=False)
    return res[0]['dominant_emotion']

# 定義 Gradio 介面
with gr.Blocks() as demo:
    gr.Markdown("# 多模態情緒分析")
    with gr.Tab("📝 文字"):
        t = gr.Textbox(placeholder="輸入中文…")
        bt = gr.Button("分析文字")
        out_t = gr.Textbox()
        bt.click(analyze_text, inputs=t, outputs=out_t)
    with gr.Tab("🎤 語音"):
        a = gr.Audio(type="filepath")
        ba = gr.Button("分析語音")
        out_a = gr.Textbox()
        ba.click(analyze_audio, inputs=a, outputs=out_a)
    with gr.Tab("📷 臉部"):
        im = gr.Image(source="webcam")
        bi = gr.Button("分析表情")
        out_i = gr.Textbox()
        bi.click(analyze_face, inputs=im, outputs=out_i)