Spaces:
Configuration error
Configuration error
Update app.py
Browse files
app.py
CHANGED
@@ -1,42 +1,18 @@
|
|
1 |
-
# app.py
|
2 |
-
|
3 |
import gradio as gr
|
4 |
-
import
|
5 |
-
import
|
6 |
import librosa, joblib
|
7 |
from deepface import DeepFace
|
8 |
-
def analyze_frame(frame):
|
9 |
-
# frame 是一張 RGB numpy 圖
|
10 |
-
result = DeepFace.analyze(frame, actions=['emotion'])
|
11 |
-
return result['dominant_emotion']
|
12 |
-
|
13 |
-
iface = gr.Interface(
|
14 |
-
fn=analyze_frame,
|
15 |
-
inputs=gr.inputs.Image(source="webcam", tool=None),
|
16 |
-
outputs="text",
|
17 |
-
title="多模態即時情緒分析"
|
18 |
-
)
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
# —— 1. 预加载 DeepFace、语音模型 ——
|
24 |
-
# DeepFace 会自动把权重缓存到 DEEPFACE_HOME 下的 /weights
|
25 |
-
os.environ["DEEPFACE_HOME"] = "/tmp/.deepface"
|
26 |
-
def load_models():
|
27 |
-
# a) 热身 DeepFace
|
28 |
-
DeepFace.analyze(
|
29 |
-
img_path = np.zeros((224,224,3), dtype=np.uint8),
|
30 |
-
actions = ['emotion'],
|
31 |
-
enforce_detection=False
|
32 |
-
)
|
33 |
-
# b) 加载本地训练好的语音模型
|
34 |
return joblib.load("src/voice_model.joblib")
|
35 |
|
36 |
-
audio_model =
|
37 |
|
38 |
-
#
|
39 |
-
def
|
40 |
if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]): return "😊 happy"
|
41 |
if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]): return "😠 angry"
|
42 |
if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]): return "😢 sad"
|
@@ -44,45 +20,45 @@ def analyze_text_fn(text):
|
|
44 |
if any(w in text for w in ["怕","恐懼","緊張","懼","膽怯","畏"]): return "😨 fear"
|
45 |
return "😐 neutral"
|
46 |
|
47 |
-
#
|
48 |
-
def
|
49 |
-
y, sr = librosa.load(
|
50 |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
51 |
mf = np.mean(mfccs.T, axis=0)
|
52 |
-
|
|
|
53 |
|
54 |
-
#
|
55 |
-
def
|
56 |
-
|
57 |
-
|
58 |
-
)
|
59 |
-
|
60 |
-
emo =
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
#
|
64 |
with gr.Blocks() as demo:
|
65 |
-
gr.Markdown("
|
66 |
tabs = gr.Tabs()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
-
|
69 |
-
camera = gr.Image(source="webcam", tool="editor", label="對準你的臉")
|
70 |
-
out_face = gr.Textbox(label="偵測到的情緒")
|
71 |
-
camera.change(analyze_face_fn, camera, out_face)
|
72 |
-
|
73 |
-
with tabs.add_tab("🎤 上傳 WAV 檔"):
|
74 |
-
wav = gr.File(label="上傳 .wav")
|
75 |
-
out_audio = gr.Textbox(label="語音檢測情緒")
|
76 |
-
wav.upload(analyze_audio_fn, wav, out_audio)
|
77 |
-
|
78 |
-
with tabs.add_tab("⌨️ 輸入文字"):
|
79 |
-
txt = gr.Textbox(label="在此輸入文字")
|
80 |
-
btn = gr.Button("開始分析")
|
81 |
-
out_text = gr.Textbox(label="文字檢測情緒")
|
82 |
-
btn.click(analyze_text_fn, txt, out_text)
|
83 |
-
|
84 |
-
gr.Markdown("---")
|
85 |
-
gr.Markdown("ℹ️ 內建 DeepFace、librosa & sklearn 進行多模態情緒分析")
|
86 |
-
|
87 |
-
if __name__ == "__main__":
|
88 |
-
demo.launch()
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import cv2
|
4 |
import librosa, joblib
|
5 |
from deepface import DeepFace
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
# ——— 加载语音模型 ———
|
8 |
+
@gr.cache_resource()
|
9 |
+
def load_audio_model():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
return joblib.load("src/voice_model.joblib")
|
11 |
|
12 |
+
audio_model = load_audio_model()
|
13 |
|
14 |
+
# 文本分析
|
15 |
+
def analyze_text(text):
|
16 |
if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]): return "😊 happy"
|
17 |
if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]): return "😠 angry"
|
18 |
if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]): return "😢 sad"
|
|
|
20 |
if any(w in text for w in ["怕","恐懼","緊張","懼","膽怯","畏"]): return "😨 fear"
|
21 |
return "😐 neutral"
|
22 |
|
23 |
+
# 语音分析
|
24 |
+
def analyze_audio(wav_file):
|
25 |
+
y, sr = librosa.load(wav_file.name, sr=None)
|
26 |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
27 |
mf = np.mean(mfccs.T, axis=0)
|
28 |
+
emo = audio_model.predict([mf])[0]
|
29 |
+
return f"🎧 {emo}"
|
30 |
|
31 |
+
# 人脸实时情绪
|
32 |
+
def analyze_face(frame):
|
33 |
+
# frame: numpy array from webcam
|
34 |
+
# 转 BGR->RGB
|
35 |
+
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
36 |
+
resp = DeepFace.analyze(img, actions=['emotion'], enforce_detection=False)
|
37 |
+
emo = resp["dominant_emotion"]
|
38 |
+
# 在人脸框上画 emotion
|
39 |
+
box = resp["region"]
|
40 |
+
x,y,w,h = box["x"], box["y"], box["w"], box["h"]
|
41 |
+
cv2.rectangle(frame, (x,y),(x+w,y+h), (0,255,0), 2)
|
42 |
+
cv2.putText(frame, emo, (x, y-10),
|
43 |
+
cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
|
44 |
+
return frame
|
45 |
|
46 |
+
# ——— 构建界面 ———
|
47 |
with gr.Blocks() as demo:
|
48 |
+
gr.Markdown("# 多模態即時情緒分析 🤖")
|
49 |
tabs = gr.Tabs()
|
50 |
+
with tabs:
|
51 |
+
with gr.TabItem("📷 Live Face"):
|
52 |
+
camera = gr.Image(source="webcam", streaming=True, tool=None)
|
53 |
+
out_img = gr.Image()
|
54 |
+
camera.change(analyze_face, camera, out_img)
|
55 |
+
with gr.TabItem("🎤 上傳語音檔"):
|
56 |
+
audio = gr.Audio(type="filepath")
|
57 |
+
out_a = gr.Text()
|
58 |
+
audio.submit(analyze_audio, audio, out_a)
|
59 |
+
with gr.TabItem("⌨️ 文本輸入"):
|
60 |
+
txt = gr.Textbox(lines=3, placeholder="在此輸入文字…")
|
61 |
+
out_t = gr.Text()
|
62 |
+
txt.submit(analyze_text, txt, out_t)
|
63 |
|
64 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|