DeepLearning101 commited on
Commit
a4e56dc
·
verified ·
1 Parent(s): f84f039

First Blood

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ import whisper
4
+ import os
5
+
6
+ #其實可以用 Label-Studio 哦 XD
7
+
8
+ # 加載 Whisper 模型
9
+ model = whisper.load_model("large-v2", device="cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ def transcribe(audio_file):
12
+ # 從 Gradio 文件輸入獲取文件路徑
13
+ audio_path = audio_file
14
+
15
+ # 使用 Whisper 進行語音識別,這裏指定 language="Mandarin" 以優化中文語音識別
16
+ result = model.transcribe(audio_path, language="Mandarin")
17
+ text = result["text"]
18
+
19
+ # 提取上載的音頻文件的基本名字,用作保存轉錄文本的文件名
20
+ base_name = os.path.splitext(os.path.basename(audio_path))[0]
21
+ # 定義保存轉錄結果的文件路徑
22
+ transcript_file_path = f"txt/{base_name}_transcript.txt"
23
+
24
+ # 將轉錄文本保存到文件
25
+ with open(transcript_file_path, "w") as file:
26
+ file.write(text)
27
+
28
+ # 可以選擇返回文件路徑或直接返回文本
29
+ return text, f"Transcription saved to {transcript_file_path}"
30
+
31
+ # 創建 Gradio 界麵
32
+ with gr.Blocks(css=".container { max-width: 800px; margin: auto; } .gradio-app { background-color: #f0f0f0; } button { background-color: #4CAF50; color: white; }") as demo:
33
+ gr.Markdown("ASR 語音語料辨識修正工具")
34
+ with gr.Row():
35
+ audio_input = gr.Audio(source="upload", type="filepath", label="上傳你的音檔")
36
+ submit_button = gr.Button("語音識別")
37
+ output_text = gr.TextArea(label="識別結果")
38
+ save_status = gr.Text(label="儲存結果")
39
+
40
+ submit_button.click(fn=transcribe, inputs=audio_input, outputs=[output_text, save_status])
41
+
42
+ demo.launch()