Spaces:

pcreem
/

emo

Sleeping

App Files Files Community

pcreem commited on Jun 6

Commit

2b7c233

1 Parent(s): b5ff674

hi

Browse files

Files changed (8) hide show

.gradio/certificate.pem +31 -0
__pycache__/feature.cpython-312.pyc +0 -0
app-origin.py +42 -0
app.py +87 -0
apt.txt +1 -0
feature.py +48 -0
model_weights.pth +3 -0
requirements.txt +9 -0

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

__pycache__/feature.cpython-312.pyc ADDED Viewed

Binary file (3.63 kB). View file

app-origin.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import gradio as gr
+import whisper
+from feature import AudioTextEmotionModel, extract_audio_features
+import torch
+# 設定設備
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# 載入情緒辨識模型
+emotion_model = AudioTextEmotionModel(audio_input_dim=180, text_input_dim=768, hidden_dim=128, output_dim=3)
+emotion_model.load_state_dict(torch.load("model_weights.pth", map_location=device))
+emotion_model.to(device)
+emotion_model.eval()
+# 載入 Whisper 模型進行語音轉文字
+whisper_model = whisper.load_model("base")
+EMOTION_LABELS = {0: '正面', 1: '中性', 2: '負面'}
+def predict_emotion(audio_path):
+    result = whisper_model.transcribe(audio_path, language="zh")
+    text = result["text"]
+    audio_feat = extract_audio_features(audio_path)
+    audio_tensor = torch.tensor(audio_feat, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)
+    with torch.no_grad():
+        output = emotion_model(audio_tensor, torch.zeros(1, 1, 768).to(device))  # dummy text input
+        pred = torch.argmax(output, dim=1).item()
+    return f"語音轉文字結果：{text}\n預測情緒：{EMOTION_LABELS[pred]}"
+def create_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("### 🎧 中文語音情緒辨識（EATD）\n說一段話，我會判斷你的情緒（正面 / 中性 / 負面）")
+        audio_input = gr.Audio(sources=["microphone"], type="filepath", label="請錄音")
+        output = gr.Textbox()
+        btn = gr.Button("分析")
+        btn.click(fn=predict_emotion, inputs=audio_input, outputs=output)
+    return demo
+demo = create_interface()
+demo.launch(share=True)

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import gradio as gr
+import whisper
+import torch
+import numpy as np
+from feature import (
+    AudioTextEmotionModel,
+    extract_audio_features,
+    extract_text_features
+)
+# 設定設備
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# 載入模型
+emotion_model = AudioTextEmotionModel(audio_input_dim=180, text_input_dim=768, hidden_dim=128, output_dim=3)
+emotion_model.load_state_dict(torch.load("model_weights.pth", map_location=device))
+emotion_model.to(device)
+emotion_model.eval()
+# Whisper 模型
+whisper_model = whisper.load_model("base")
+EMOTION_LABELS = {0: '正面', 1: '中性', 2: '負面'}
+# 情緒預測主函式（支援語音 / 文字 / 雙模）
+def analyze_input(audio, text_input):
+    audio_feat = None
+    text_feat = None
+    result_text = ""
+    # 若有語音輸入
+    if audio:
+        result = whisper_model.transcribe(audio, language="zh")
+        transcribed_text = result["text"]
+        result_text += f"🎧 語音轉文字：「{transcribed_text}」\n"
+        audio_feat = extract_audio_features(audio)
+    else:
+        transcribed_text = None
+    # 若有文字輸入（用戶輸入或語音轉出）
+    text = text_input or transcribed_text
+    if text:
+        text_feat = extract_text_features(text)
+        result_text += f"✏️ 文字內容：「{text}」\n"
+    if audio_feat is None and text_feat is None:
+        return "請提供語音或文字輸入進行情緒辨識。"
+    # 製作 tensor 輸入
+    audio_tensor = (
+        torch.tensor(audio_feat, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)
+        if audio_feat is not None else
+        torch.zeros(1, 1, 180).to(device)
+    )
+    text_tensor = (
+        torch.tensor(text_feat, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)
+        if text_feat is not None else
+        torch.zeros(1, 1, 768).to(device)
+    )
+    with torch.no_grad():
+        output = emotion_model(audio_tensor, text_tensor)
+        pred = torch.argmax(output, dim=1).item()
+    result_text += f"📊 預測情緒：{EMOTION_LABELS[pred]}"
+    return result_text
+# Gradio Chat UI
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎧 中文語音情緒辨識聊天機器人\n支援語音輸入、文字輸入，或兩者結合分析")
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="語音")
+        text_input = gr.Textbox(lines=2, placeholder="輸入文字內容...", label="文字")
+    send_btn = gr.Button("送出分析")
+    def chat_handler(audio, text, history):
+        response = analyze_input(audio, text)
+        history = history or []
+        history.append(("👤", response))
+        return history, None, ""
+    send_btn.click(fn=chat_handler,
+                   inputs=[audio_input, text_input, chatbot],
+                   outputs=[chatbot, audio_input, text_input])
+demo.launch(share=True)

apt.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

feature.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# feature.py
+import torch
+import torch.nn as nn
+import numpy as np
+import librosa
+from transformers import BertTokenizer, BertModel
+# === 模型結構 ===
+class AudioTextEmotionModel(nn.Module):
+    def __init__(self, audio_input_dim, text_input_dim, hidden_dim, output_dim):
+        super(AudioTextEmotionModel, self).__init__()
+        self.audio_gru = nn.GRU(audio_input_dim, hidden_dim, batch_first=True)
+        self.audio_bilstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True)
+        self.text_bilstm = nn.LSTM(text_input_dim, hidden_dim, batch_first=True, bidirectional=True)
+        self.fc = nn.Linear(hidden_dim * 4, output_dim)
+        self.softmax = nn.Softmax(dim=1)
+    def forward(self, audio_input, text_input):
+        audio_out, _ = self.audio_gru(audio_input)
+        audio_out, _ = self.audio_bilstm(audio_out)
+        text_out, _ = self.text_bilstm(text_input)
+        combined = torch.cat((audio_out[:, -1, :], text_out[:, -1, :]), dim=1)
+        output = self.fc(combined)
+        return self.softmax(output)
+# === 音訊特徵萃取 ===
+def extract_audio_features(file_path):
+    y, sr = librosa.load(file_path, sr=None)
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
+    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
+    spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
+    features = np.concatenate((
+        np.mean(mfcc, axis=1),
+        np.mean(chroma, axis=1),
+        np.mean(spec, axis=1)
+    ))
+    return features
+# === 文字特徵萃取（使用 BERT） ===
+tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
+bert_model = BertModel.from_pretrained("bert-base-chinese")
+def extract_text_features(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    outputs = bert_model(**inputs)
+    cls_embedding = outputs.last_hidden_state[:, 0, :]
+    return cls_embedding.squeeze().detach().numpy()

model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02f3ffdd54b161379089ddfb318f3b231de4e6754a186962459c38178d305627
+size 5225033

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch
+torchaudio
+git+https://github.com/openai/whisper.git
+gradio>=4.44.0
+librosa
+numpy
+transformers
+pydantic>=2.0.0