Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,087 Bytes
dc13b6d f358820 dc13b6d da40aec dc13b6d cc1322f dc13b6d c97ac97 dc13b6d 42dc39c dc13b6d 5799add dc13b6d da40aec 3941dc3 da40aec 8722708 72f7051 3941dc3 91852e0 dc13b6d 91852e0 f4fd3fb dc13b6d da40aec 1593e08 60c960b da40aec dc13b6d 3941dc3 0fd90d3 3941dc3 7e8e5b2 3941dc3 dc13b6d f4fd3fb dc13b6d da40aec d9961e2 da40aec abaf626 da40aec dc13b6d 0fd90d3 dc13b6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# app.py
import spaces
import gradio as gr
from gradio import update
from functools import lru_cache
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# 可選模型列表
MODEL_LIST = [
"unsloth/gemma-3-1b-pt",
"ckiplab/gpt2-tiny-chinese",
"ckiplab/gpt2-base-chinese",
"liswei/Taiwan-ELM-270M",
"liswei/Taiwan-ELM-1_1B",
"benchang1110/Qwen2.5-Taiwan-1.5B-Instruct",
"benchang1110/Taiwan-tinyllama-v1.0-base",
"lianghsun/Llama-3.2-Taiwan-3B",
"twinkle-ai/Llama-3.2-3B-F1-Instruct",
"Epiculous/Violet_Twilight-v0.2",
]
@lru_cache(maxsize=None)
def get_pipeline(model_name):
tok = AutoTokenizer.from_pretrained(model_name)
mdl = AutoModelForCausalLM.from_pretrained(model_name, weights_only=False, trust_remote_code=True)
mdl.to("cuda")
return pipeline("text-generation", model=mdl, tokenizer=tok, device=0)
@spaces.GPU
def suggest_next(text, model_name, k, m):
"""
使用 Beam Search 產生 M 條最可能的下段建議,並一次更新選項清單。
"""
gen_pipe = get_pipeline(model_name)
outs = gen_pipe(
text,
max_new_tokens=k,
num_beams=m,
num_return_sequences=m,
do_sample=False,
early_stopping=True
)
suggestions = [out["generated_text"][len(text):].strip() for out in outs]
suggestions = [s for s in suggestions if s]
# 重置選值並更新選項
return update(choices=suggestions, value=None)
def append_suggestion(current, choice):
if choice is None:
return current
return current + choice
with gr.Blocks() as demo:
gr.Markdown(
"## 🇹🇼 台灣中文輸入法加速器 \n"
"結合小型語言模型與 ZeroGPU,即時 IME 風格建議條。"
)
# 建議清單置頂,模仿輸入法候選欄
suggestions = gr.Radio(
[], label="建議清單", interactive=True, type="value", elem_id="suggestions-bar"
)
# 輸入區與生成按鈕並排:加長文字框,按鈕縮小置右側
with gr.Row():
with gr.Column(scale=5):
input_text = gr.TextArea(
label="輸入文字", lines=6,
placeholder="請在此輸入起始片段…"
)
with gr.Column(scale=1, min_width=80):
gpu_button = gr.Button("使用 GPU 生成建議")
# 參數設定區
with gr.Row():
model_selector = gr.Dropdown(
MODEL_LIST, value=MODEL_LIST[0], label="選擇模型"
)
k_slider = gr.Slider(
minimum=1, maximum=50, step=1, value=1, label="K(最大新生成詞元)"
)
m_slider = gr.Slider(
minimum=1, maximum=30, step=1, value=10, label="M(建議數量 / Beam 數)"
)
# 事件綁定
gpu_button.click(
fn=suggest_next,
inputs=[input_text, model_selector, k_slider, m_slider],
outputs=suggestions,
)
suggestions.change(
fn=append_suggestion,
inputs=[input_text, suggestions],
outputs=input_text,
)
demo.launch()
|