# app.py import spaces import gradio as gr from functools import lru_cache from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # 可選模型列表 MODEL_LIST = [ "ckiplab/gpt2-tiny-chinese", "ckiplab/gpt2-base-chinese", "liswei/Taiwan-ELM-270M-Instruct", "liswei/Taiwan-ELM-1_1B", "google/gemma-3-1b-pt", "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct", "benchang1110/Taiwan-tinyllama-v1.0-base", ] @lru_cache(maxsize=None) def get_pipeline(model_name): tok = AutoTokenizer.from_pretrained(model_name) mdl = AutoModelForCausalLM.from_pretrained(model_name) mdl.to("cuda") return pipeline("text-generation", model=mdl, tokenizer=tok, device=0) @spaces.GPU def suggest_next(text, model_name, k, m): """在 GPU(ZeroGPU H200)上執行推理,產生 M 個長度上限 K 的下段建議。""" pipe = get_pipeline(model_name) outs = pipe(text, max_new_tokens=k, num_return_sequences=m, do_sample=False) return [out["generated_text"][len(text):] for out in outs] def append_suggestion(current, choice): return current + choice with gr.Blocks() as demo: gr.Markdown( "## 🇹🇼 台灣中文下段預測\n" "結合小型語言模型與 ZeroGPU,提供即時 IME 風格的下段文字建議。" ) input_text = gr.TextArea( label="輸入文字", lines=4, placeholder="請在此輸入起始片段…" ) with gr.Row(): model_selector = gr.Dropdown( MODEL_LIST, value=MODEL_LIST[0], label="選擇模型" ) k_slider = gr.Slider( minimum=1, maximum=50, step=1, value=5, label="K(最大新生成詞元)" ) m_slider = gr.Slider( minimum=1, maximum=10, step=1, value=5, label="M(建議數量)" ) suggestions = gr.Dropdown([], label="建議清單", interactive=True) gpu_button = gr.Button("使用 GPU 生成建議") gpu_button.click( fn=suggest_next, inputs=[input_text, model_selector, k_slider, m_slider], outputs=suggestions, ) suggestions.change( fn=append_suggestion, inputs=[input_text, suggestions], outputs=input_text, ) demo.launch()