Spaces:

DeepLearning101
/

ASR-ANNOTATION

Running

File size: 1,308 Bytes

a4e56dc
 
 
 
 
7094a64
 
 
a4e56dc
10c215a
a4e56dc
 
 
824f441
a4e56dc
 
 
824f441
a4e56dc
 
824f441
a4e56dc
 
 
 
af25a31
 
1e25bc2
 
 
 
 
7094a64

import torch
import gradio as gr
import whisper
import os

# 確保 Whisper 模塊被正確加載
print("Whisper module contents:", dir(whisper))

# 加載 Whisper 模型
model = whisper.load_model("large-v2", device="cuda" if torch.cuda.is_available() else "cpu")

def transcribe(audio_file):
    audio_path = audio_file
    result = model.transcribe(audio_path)
    text = result["text"]
    base_name = os.path.splitext(os.path.basename(audio_path))[0]
    transcript_file_path = f"txt/{base_name}_transcript.txt"
    os.makedirs("txt", exist_ok=True)
    with open(transcript_file_path, "w") as file:
        file.write(text)
    return text, transcript_file_path

with gr.Blocks(css=".container { max-width: 800px; margin: auto; } .gradio-app { background-color: #f0f0f0; } button { background-color: #4CAF50; color: white; }") as demo:
    gr.Markdown("ASR 語音語料辨識修正工具")
    with gr.Row():
        # 修改了 Audio 組件的宣告方式
        audio_input = gr.Audio(label="上載你的音頻", type="filepath")
        submit_button = gr.Button("語音識別")
    output_text = gr.TextArea(label="識別結果")
    download_link = gr.File(label="下載轉錄文件")
    submit_button.click(fn=transcribe, inputs=audio_input, outputs=[output_text, download_link])

demo.launch()