Spaces:
Configuration error
Configuration error
Update app.py
Browse files
app.py
CHANGED
@@ -1,64 +1,58 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
-
import
|
4 |
-
import librosa
|
5 |
from deepface import DeepFace
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
return joblib.load("src/voice_model.joblib")
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
# 文本分析
|
15 |
-
def analyze_text(text):
|
16 |
-
if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]): return "😊 happy"
|
17 |
-
if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]): return "😠 angry"
|
18 |
-
if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]): return "😢 sad"
|
19 |
-
if any(w in text for w in ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"]): return "😲 surprise"
|
20 |
-
if any(w in text for w in ["怕","恐懼","緊張","懼","膽怯","畏"]): return "😨 fear"
|
21 |
-
return "😐 neutral"
|
22 |
-
|
23 |
-
# 语音分析
|
24 |
def analyze_audio(wav_file):
|
25 |
-
|
|
|
26 |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
27 |
mf = np.mean(mfccs.T, axis=0)
|
28 |
-
|
29 |
-
return f"🎧 {emo}"
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
|
44 |
-
return frame
|
45 |
|
46 |
-
# ——— 构建界面 ———
|
47 |
with gr.Blocks() as demo:
|
48 |
-
gr.Markdown("
|
49 |
-
|
50 |
-
with tabs:
|
51 |
with gr.TabItem("📷 Live Face"):
|
52 |
-
camera = gr.Image(source="webcam",
|
53 |
-
out_img = gr.Image()
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
+
import joblib, io, base64
|
4 |
+
import librosa
|
5 |
from deepface import DeepFace
|
6 |
|
7 |
+
# —— 1. 載入模型 ——
|
8 |
+
# 如果只要上傳 wav/text,就先把 face 部分包在 fn 裡動態 load
|
9 |
+
audio_model = joblib.load("src/voice_model.joblib")
|
|
|
10 |
|
11 |
+
def analyze_face(frame: np.ndarray):
|
12 |
+
# DeepFace 回傳 dict,裡面有 'dominant_emotion'
|
13 |
+
res = DeepFace.analyze(frame, actions=["emotion"], enforce_detection=False)
|
14 |
+
emo = res["dominant_emotion"]
|
15 |
+
return frame, emo
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def analyze_audio(wav_file):
|
18 |
+
wav_bytes = wav_file.read()
|
19 |
+
y, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
|
20 |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
21 |
mf = np.mean(mfccs.T, axis=0)
|
22 |
+
return audio_model.predict([mf])[0]
|
|
|
23 |
|
24 |
+
def analyze_text(txt):
|
25 |
+
mapping = {
|
26 |
+
"😊happy":["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"],
|
27 |
+
"😠angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
|
28 |
+
"😢sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
|
29 |
+
"😲surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
|
30 |
+
"😨fear": ["怕","恐懼","緊張","懼","膽怯","畏"],
|
31 |
+
}
|
32 |
+
for emo, kws in mapping.items():
|
33 |
+
if any(w in txt for w in kws):
|
34 |
+
return emo
|
35 |
+
return "neutral"
|
|
|
|
|
36 |
|
|
|
37 |
with gr.Blocks() as demo:
|
38 |
+
gr.Markdown("## 多模態即時情緒分析")
|
39 |
+
with gr.Tabs():
|
|
|
40 |
with gr.TabItem("📷 Live Face"):
|
41 |
+
camera = gr.Image(source="webcam", tool="editor", label="對準鏡頭")
|
42 |
+
out_img = gr.Image(label="畫面")
|
43 |
+
out_label = gr.Label(label="情緒")
|
44 |
+
camera.change(fn=analyze_face, inputs=camera, outputs=[out_img, out_label], live=True)
|
45 |
+
|
46 |
+
with gr.TabItem("🎤 上傳語音"):
|
47 |
+
wav = gr.File(label="選擇 .wav 檔", file_types=[".wav"])
|
48 |
+
wav_btn = gr.Button("分析")
|
49 |
+
wav_out = gr.Text(label="偵測到的情緒")
|
50 |
+
wav_btn.click(fn=analyze_audio, inputs=wav, outputs=wav_out)
|
51 |
+
|
52 |
+
with gr.TabItem("⌨️ 輸入文字"):
|
53 |
+
txt = gr.Textbox(label="在此輸入文字", lines=3)
|
54 |
+
txt_btn = gr.Button("分析")
|
55 |
+
txt_out = gr.Text(label="偵測到的情緒")
|
56 |
+
txt_btn.click(fn=analyze_text, inputs=txt, outputs=txt_out)
|
57 |
|
58 |
+
demo.launch()
|