File size: 2,766 Bytes
dc13b6d
 
 
f358820
dc13b6d
 
 
da40aec
dc13b6d
 
 
 
 
 
 
 
 
 
 
 
 
8722708
dc13b6d
 
 
 
 
da40aec
7e8e5b2
da40aec
 
 
 
 
 
 
 
 
8722708
f358820
f4fd3fb
 
dc13b6d
 
f4fd3fb
 
 
dc13b6d
 
 
da40aec
7e8e5b2
60c960b
da40aec
dc13b6d
f4fd3fb
0fd90d3
 
 
 
f4fd3fb
7e8e5b2
 
 
 
 
dc13b6d
f4fd3fb
dc13b6d
da40aec
 
 
 
 
 
 
 
 
dc13b6d
0fd90d3
dc13b6d
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# app.py
import spaces
import gradio as gr
from gradio import update
from functools import lru_cache
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# 可選模型列表
MODEL_LIST = [
    "ckiplab/gpt2-tiny-chinese",
    "ckiplab/gpt2-base-chinese",
    "liswei/Taiwan-ELM-270M-Instruct",
    "liswei/Taiwan-ELM-1_1B",
    "google/gemma-3-1b-pt",
    "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct",
    "benchang1110/Taiwan-tinyllama-v1.0-base",
]

@lru_cache(maxsize=None)
def get_pipeline(model_name):
    tok = AutoTokenizer.from_pretrained(model_name)
    mdl = AutoModelForCausalLM.from_pretrained(model_name, weights_only=False)
    mdl.to("cuda")
    return pipeline("text-generation", model=mdl, tokenizer=tok, device=0)

@spaces.GPU
def suggest_next(text, model_name, k, m):
    """
    使用 Beam Search 產生 M 條最可能的下段建議,並一次更新可選項清單。
    """
    gen_pipe = get_pipeline(model_name)
    outs = gen_pipe(
        text,
        max_new_tokens=k,
        num_beams=m,
        num_return_sequences=m,
        do_sample=False,
        early_stopping=True
    )
    suggestions = [out["generated_text"][len(text):] for out in outs]
    # 保證 choices 與 value 為空字串而非 None
    return update(choices=suggestions, value="")

def append_suggestion(current, choice):
    # 防止 None 導致的拼接錯誤
    if not choice:
        return current
    return current + choice

with gr.Blocks() as demo:
    gr.Markdown(
        "## 🇹🇼 台灣中文下段預測  \n"
        "結合小型語言模型與 ZeroGPU,即時 IME 風格建議條。"
    )

    # 建議清單置頂
    suggestions = gr.Radio(
        [], label="建議清單", interactive=True, type="value", elem_id="suggestions-bar"
    )

    # 輸入區與生成按鈕並排
    with gr.Row():
        input_text = gr.TextArea(
            label="輸入文字", lines=4, placeholder="請在此輸入起始片段…"
        )
        gpu_button = gr.Button("使用 GPU 生成建議")

    # 參數設定區
    with gr.Row():
        model_selector = gr.Dropdown(
            MODEL_LIST, value=MODEL_LIST[0], label="選擇模型"
        )
        k_slider = gr.Slider(
            minimum=1, maximum=50, step=1, value=5, label="K(最大新生成詞元)"
        )
        m_slider = gr.Slider(
            minimum=1, maximum=10, step=1, value=5, label="M(建議數量 / Beam 數)"
        )

    # 事件綁定
    gpu_button.click(
        fn=suggest_next,
        inputs=[input_text, model_selector, k_slider, m_slider],
        outputs=suggestions,
    )
    suggestions.change(
        fn=append_suggestion,
        inputs=[input_text, suggestions],
        outputs=input_text,
    )

    demo.launch()