Spaces:

DeepLearning101
/

Corrector101zhTWT5

Sleeping

File size: 6,338 Bytes

import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch
import os


# 指定模型路徑或 Hugging Face Model Hub 上的模型 ID
model_name_or_path = "DeepLearning101/Corrector101zhTWT5"
auth_token = os.getenv("HF_HOME") 

# 嘗試加載模型和分詞器
try:
    tokenizer = T5Tokenizer.from_pretrained(model_name_or_path, use_auth_token=auth_token)
    model = T5ForConditionalGeneration.from_pretrained(model_name_or_path, use_auth_token=auth_token)
    model.eval()
except Exception as e:
    print(f"加載模型或分詞器失敗，錯誤信息：{e}")
    exit(1)

if torch.cuda.is_available():
    model.cuda()  # 如果可用，將模型移至 GPU

def correct_text(text):
    """將輸入的文本通過 T5 模型進行修正"""
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding=True)
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}  # 將輸入移至 GPU
    with torch.no_grad():
        outputs = model.generate(**inputs)
    corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return corrected_text

def main():
    interface = gr.Interface(
        fn=correct_text,
        inputs=gr.Textbox(lines=5, placeholder="請輸入需要修正的中文文本..."),
        outputs=gr.Textbox(label="修正後的文本"),
        title="<h1>客服ASR文本AI糾錯系統</h1>",
        description="""<h2><a href='https://deep-learning-101.github.io' target='_blank'>deep-learning-101.github.io</a> | <a href='https://www.twman.org/AI' target='_blank'> AI </a> | <a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D.</a> | <a href='https://blog.twman.org/p/deeplearning101.html' target='_blank'>手把手帶你一起踩AI坑</a><br></h2><br>
                    輸入ASR文本，糾正同音字/詞錯誤<br>
                    <a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> | <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> | <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> | <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br>
                    <a href='https://blog.twman.org/2025/04/AI-Robot.html' target='_blank'>AI 陪伴機器人：2025 趨勢分析技術突破、市場潛力與未來展望</a> | <a href='https://blog.twman.org/2025/04/FinanceGenAI.html' target='_blank'>金融科技新浪潮：生成式 AI (GenAI) 應用場景、效益與導入挑戰</a><br>
                    <a href='https://blog.twman.org/2025/03/AIAgent.html' target='_blank'>避開 AI Agent 開發陷阱：常見問題、挑戰與解決方案 (實戰經驗)</a>：探討多種 AI 代理人工具的應用經驗與挑戰，分享實用經驗與工具推薦。<br>
                    <a href='https://blog.twman.org/2024/08/LLM.html' target='_blank'>白話文手把手帶你科普 GenAI</a>：淺顯介紹生成式人工智慧核心概念，強調硬體資源和數據的重要性。<br>
                    <a href='https://blog.twman.org/2024/09/LLM.html' target='_blank'>大型語言模型直接就打完收工？</a>：回顧 LLM 領域探索歷程，討論硬體升級對 AI 開發的重要性。<br>
                    <a href='https://blog.twman.org/2024/07/RAG.html' target='_blank'>檢索增強生成不是萬靈丹：挑戰與優化技巧</a>：探討 RAG 技術應用與挑戰，提供實用經驗分享和工具建議。<br>
                    <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>大型語言模型 (LLM) 入門完整指南：原理、應用與未來 (2025 版)</a>：探討多種 LLM 工具的應用與挑戰，強調硬體資源的重要性。<br>
                    <a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>解析探索大型語言模型：模型發展歷史、訓練及微調技術的 VRAM 估算</a>：探討 LLM 的發展與應用，強調硬體資源在開發中的關鍵作用。。<br>
                    <a href='https://blog.twman.org/2024/11/diffusion.html' target='_blank'>Diffusion Model 完全解析：從原理、應用到實作 (AI 圖像生成)</a>：深入探討影像生成與分割技術的應用，強調硬體資源的重要性。<br>
                    <a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>ASR/TTS 開發避坑指南：語音辨識與合成的常見挑戰與對策</a>：探討 ASR 和 TTS 技術應用中的問題，強調數據質量的重要性。<br>
                    <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a>：分享 NLP 領域的實踐經驗，強調數據質量對模型效果的影響。<br>
                    <a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a>：分享語音處理領域的實務經驗，強調資料品質對模型效果的影響。<br>
                    <a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a><br>
                    <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>                
                    <a href='https://github.com/shibing624/pycorrector' target='_blank'>Masked Language Model (MLM) as correction BERT</a>
                    基於transformers的T5ForConditionalGeneration""",
        theme="default",
        examples=[
            ["你究輸入利的手機門號跟生分證就可以了。"],
            ["這裡是客服中新，很高性為您服物，請問金天有什麼須要幫忙您得"],
            ["因為我們這邊是按天術比例計蒜給您的，其實不會有態大的穎響。也就是您用前面的資非的廢率來做計算"],
            ["我來看以下，他的時價是多少？起實您就可以直皆就不用到門事"],
            ["因為你現在月富是六九九嘛，我幫擬減衣百塊，兒且也不會江速"]
        ]
    )
    interface.launch(share=True)

if __name__ == "__main__":
    main()