import torch import gradio as gr import whisper import os # 確保 Whisper 模塊被正確加載 print("Whisper module contents:", dir(whisper)) # 加載 Whisper 模型 model = whisper.load_model("large-v2", device="cuda" if torch.cuda.is_available() else "cpu") def transcribe(audio_file): audio_path = audio_file result = model.transcribe(audio_path) text = result["text"] base_name = os.path.splitext(os.path.basename(audio_path))[0] transcript_file_path = f"txt/{base_name}_transcript.txt" os.makedirs("txt", exist_ok=True) with open(transcript_file_path, "w") as file: file.write(text) return text, transcript_file_path TITLE = """

ASR 語音語料辨識修正工具

""" SUBTITLE = """

TonTon Huang Ph.D. @ 2024/04

""" LINKS = """ 手把手帶你一起踩AI坑
ComfyUI + Stable Diffuision
白話文手把手帶你科普 GenAI | 大型語言模型直接就打完收工？
什麼是大語言模型，它是什麼？想要嗎？ | 那些檢索增強生成要踩的坑
那些語音處理 (Speech Processing) 踩的坑 | 那些自然語言處理 (Natural Language Processing, NLP) 踩的坑
那些ASR和TTS可能會踩的坑 | 那些大模型開發會踩的坑
用PPOCRLabel來幫PaddleOCR做OCR的微調和標註 | 基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析
""" with gr.Blocks(css=".container { max-width: 800px; margin: auto; } .gradio-app { background-color: #f0f0f0; } button { background-color: #4CAF50; color: white; }") as demo: gr.HTML(TITLE) gr.HTML(SUBTITLE) gr.HTML(LINKS) with gr.Row(): # 修改了 Audio 組件的宣告方式 audio_input = gr.Audio(label="上載你的音頻", type="filepath") submit_button = gr.Button("語音識別") output_text = gr.TextArea(label="識別結果") download_link = gr.File(label="下載轉錄文件") submit_button.click(fn=transcribe, inputs=audio_input, outputs=[output_text, download_link]) demo.launch()