File size: 2,000 Bytes
26ca9d4
9423469
 
4ef74d7
 
 
26ca9d4
4ef74d7
 
 
 
 
0b90a57
628c773
 
 
26ca9d4
f29c587
26ca9d4
 
 
 
 
 
 
 
 
9423469
 
 
 
 
 
 
 
 
 
37ff5ad
9423469
 
 
2c010ad
 
9423469
 
73a2adf
628c773
 
26ca9d4
628c773
81390b6
628c773
 
81390b6
f29c587
81390b6
29f6a41
628c773
 
 
 
 
 
73a2adf
628c773
26ca9d4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# app.py
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from threading import Thread
import gradio as gr
import torch

# load model and tokenizer
model_name = "inclusionAI/Ling-lite-1.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
    trust_remote_code=True
).eval()

# define chat function
def chat(user_input, max_new_tokens=512):
    # chat history
    messages = [
        {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
        {"role": "user", "content": user_input}
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    # encode the input prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    #create streamer
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

    def generate():
        model.generate(**inputs, max_new_tokens=max_new_tokens, streamer=streamer)

    thread = Thread(target=generate)
    thread.start()

    prompt_len = len(prompt)
    generated_text = ""
    for new_text in streamer:
        generated_text += new_text
        yield generated_text
        #yield generated_text[prompt_len:]

    thread.join()

# Construct Gradio Interface
interface = gr.Interface(
    fn=chat,
    inputs=[
        gr.Textbox(lines=8, label="输入你的问题"),
        gr.Slider(minimum=100, maximum=1024, step=50, label="生成长度")
    ],
    outputs=[
        gr.Textbox(lines=8, label="模型回复")
    ],
    title="Ling-lite-1.5 AI助手",
    description="基于 [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5)  的对话式文本生成演示。",
    examples=[
        ["介绍大型语言模型的基本概念", 512],
        ["如何解决数学问题中的长上下文依赖?", 768]
    ]
)

# launch Gradion Service
interface.launch()