Spaces:
Running
Running
import torch | |
import gradio as gr | |
import whisper | |
import os | |
#其實可以用 Label-Studio 哦 XD | |
# 加載 Whisper 模型 | |
model = whisper.load_model("large-v2", device="cuda" if torch.cuda.is_available() else "cpu") | |
def transcribe(audio_file): | |
# 從 Gradio 文件輸入獲取文件路徑 | |
audio_path = audio_file | |
# 使用 Whisper 進行語音識別,這裏指定 language="Mandarin" 以優化中文語音識別 | |
result = model.transcribe(audio_path, language="Mandarin") | |
text = result["text"] | |
# 提取上載的音頻文件的基本名字,用作保存轉錄文本的文件名 | |
base_name = os.path.splitext(os.path.basename(audio_path))[0] | |
# 定義保存轉錄結果的文件路徑 | |
transcript_file_path = f"txt/{base_name}_transcript.txt" | |
# 將轉錄文本保存到文件 | |
with open(transcript_file_path, "w") as file: | |
file.write(text) | |
# 可以選擇返回文件路徑或直接返回文本 | |
return text, f"Transcription saved to {transcript_file_path}" | |
# 創建 Gradio 界麵 | |
with gr.Blocks(css=".container { max-width: 800px; margin: auto; } .gradio-app { background-color: #f0f0f0; } button { background-color: #4CAF50; color: white; }") as demo: | |
gr.Markdown("ASR 語音語料辨識修正工具") | |
with gr.Row(): | |
audio_input = gr.Audio(source="upload", type="filepath", label="上傳你的音檔") | |
submit_button = gr.Button("語音識別") | |
output_text = gr.TextArea(label="識別結果") | |
save_status = gr.Text(label="儲存結果") | |
submit_button.click(fn=transcribe, inputs=audio_input, outputs=[output_text, save_status]) | |
demo.launch() |