GCLing commited on
Commit
8bda002
·
verified ·
1 Parent(s): 1d9e853

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -50
app.py CHANGED
@@ -1,64 +1,58 @@
1
  import gradio as gr
2
  import numpy as np
3
- import cv2
4
- import librosa, joblib
5
  from deepface import DeepFace
6
 
7
- # ——— 加载语音模型 ———
8
- @gr.cache_resource()
9
- def load_audio_model():
10
- return joblib.load("src/voice_model.joblib")
11
 
12
- audio_model = load_audio_model()
 
 
 
 
13
 
14
- # 文本分析
15
- def analyze_text(text):
16
- if any(w in text for w in ["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"]): return "😊 happy"
17
- if any(w in text for w in ["生氣","憤怒","不爽","發火","火大","氣憤"]): return "😠 angry"
18
- if any(w in text for w in ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"]): return "😢 sad"
19
- if any(w in text for w in ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"]): return "😲 surprise"
20
- if any(w in text for w in ["怕","恐懼","緊張","懼","膽怯","畏"]): return "😨 fear"
21
- return "😐 neutral"
22
-
23
- # 语音分析
24
  def analyze_audio(wav_file):
25
- y, sr = librosa.load(wav_file.name, sr=None)
 
26
  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
27
  mf = np.mean(mfccs.T, axis=0)
28
- emo = audio_model.predict([mf])[0]
29
- return f"🎧 {emo}"
30
 
31
- # 人脸实时情绪
32
- def analyze_face(frame):
33
- # frame: numpy array from webcam
34
- # 转 BGR->RGB
35
- img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
36
- resp = DeepFace.analyze(img, actions=['emotion'], enforce_detection=False)
37
- emo = resp["dominant_emotion"]
38
- # 在人脸框上画 emotion
39
- box = resp["region"]
40
- x,y,w,h = box["x"], box["y"], box["w"], box["h"]
41
- cv2.rectangle(frame, (x,y),(x+w,y+h), (0,255,0), 2)
42
- cv2.putText(frame, emo, (x, y-10),
43
- cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
44
- return frame
45
 
46
- # ——— 构建界面 ———
47
  with gr.Blocks() as demo:
48
- gr.Markdown("# 多模態即時情緒分析 🤖")
49
- tabs = gr.Tabs()
50
- with tabs:
51
  with gr.TabItem("📷 Live Face"):
52
- camera = gr.Image(source="webcam", streaming=True, tool=None)
53
- out_img = gr.Image()
54
- camera.change(analyze_face, camera, out_img)
55
- with gr.TabItem("🎤 上傳語音檔"):
56
- audio = gr.Audio(type="filepath")
57
- out_a = gr.Text()
58
- audio.submit(analyze_audio, audio, out_a)
59
- with gr.TabItem("⌨️ 文本輸入"):
60
- txt = gr.Textbox(lines=3, placeholder="在此輸入文字…")
61
- out_t = gr.Text()
62
- txt.submit(analyze_text, txt, out_t)
 
 
 
 
 
63
 
64
- demo.launch()
 
1
  import gradio as gr
2
  import numpy as np
3
+ import joblib, io, base64
4
+ import librosa
5
  from deepface import DeepFace
6
 
7
+ # —— 1. 載入模型 ——
8
+ # 如果只要上傳 wav/text,就先把 face 部分包在 fn 裡動態 load
9
+ audio_model = joblib.load("src/voice_model.joblib")
 
10
 
11
+ def analyze_face(frame: np.ndarray):
12
+ # DeepFace 回傳 dict,裡面有 'dominant_emotion'
13
+ res = DeepFace.analyze(frame, actions=["emotion"], enforce_detection=False)
14
+ emo = res["dominant_emotion"]
15
+ return frame, emo
16
 
 
 
 
 
 
 
 
 
 
 
17
  def analyze_audio(wav_file):
18
+ wav_bytes = wav_file.read()
19
+ y, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
20
  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
21
  mf = np.mean(mfccs.T, axis=0)
22
+ return audio_model.predict([mf])[0]
 
23
 
24
+ def analyze_text(txt):
25
+ mapping = {
26
+ "😊happy":["開心","快樂","愉快","喜悅","歡喜","興奮","歡","高興"],
27
+ "😠angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
28
+ "😢sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
29
+ "😲surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
30
+ "😨fear": ["","恐懼","緊張","懼","膽怯","畏"],
31
+ }
32
+ for emo, kws in mapping.items():
33
+ if any(w in txt for w in kws):
34
+ return emo
35
+ return "neutral"
 
 
36
 
 
37
  with gr.Blocks() as demo:
38
+ gr.Markdown("## 多模態即時情緒分析")
39
+ with gr.Tabs():
 
40
  with gr.TabItem("📷 Live Face"):
41
+ camera = gr.Image(source="webcam", tool="editor", label="對準鏡頭")
42
+ out_img = gr.Image(label="畫面")
43
+ out_label = gr.Label(label="情緒")
44
+ camera.change(fn=analyze_face, inputs=camera, outputs=[out_img, out_label], live=True)
45
+
46
+ with gr.TabItem("🎤 上傳語音"):
47
+ wav = gr.File(label="選擇 .wav 檔", file_types=[".wav"])
48
+ wav_btn = gr.Button("分析")
49
+ wav_out = gr.Text(label="偵測到的情緒")
50
+ wav_btn.click(fn=analyze_audio, inputs=wav, outputs=wav_out)
51
+
52
+ with gr.TabItem("⌨️ 輸入文字"):
53
+ txt = gr.Textbox(label="在此輸入文字", lines=3)
54
+ txt_btn = gr.Button("分析")
55
+ txt_out = gr.Text(label="偵測到的情緒")
56
+ txt_btn.click(fn=analyze_text, inputs=txt, outputs=txt_out)
57
 
58
+ demo.launch()