Spaces:
Running
Running
File size: 2,000 Bytes
26ca9d4 9423469 4ef74d7 26ca9d4 4ef74d7 0b90a57 628c773 26ca9d4 f29c587 26ca9d4 9423469 37ff5ad 9423469 2c010ad 9423469 73a2adf 628c773 26ca9d4 628c773 81390b6 628c773 81390b6 f29c587 81390b6 29f6a41 628c773 73a2adf 628c773 26ca9d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# app.py
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from threading import Thread
import gradio as gr
import torch
# load model and tokenizer
model_name = "inclusionAI/Ling-lite-1.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto",
trust_remote_code=True
).eval()
# define chat function
def chat(user_input, max_new_tokens=512):
# chat history
messages = [
{"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
{"role": "user", "content": user_input}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# encode the input prompt
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
#create streamer
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
def generate():
model.generate(**inputs, max_new_tokens=max_new_tokens, streamer=streamer)
thread = Thread(target=generate)
thread.start()
prompt_len = len(prompt)
generated_text = ""
for new_text in streamer:
generated_text += new_text
yield generated_text
#yield generated_text[prompt_len:]
thread.join()
# Construct Gradio Interface
interface = gr.Interface(
fn=chat,
inputs=[
gr.Textbox(lines=8, label="输入你的问题"),
gr.Slider(minimum=100, maximum=1024, step=50, label="生成长度")
],
outputs=[
gr.Textbox(lines=8, label="模型回复")
],
title="Ling-lite-1.5 AI助手",
description="基于 [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5) 的对话式文本生成演示。",
examples=[
["介绍大型语言模型的基本概念", 512],
["如何解决数学问题中的长上下文依赖?", 768]
]
)
# launch Gradion Service
interface.launch()
|