Spaces:

inclusionAI
/

Ling-lite-1.5

Running

Ling-lite-1.5 / app.py

雷娃

add interactive mode

2c010ad 2 months ago

2 kB

	# app.py
	from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
	from threading import Thread
	import gradio as gr
	import torch

	# load model and tokenizer
	model_name = "inclusionAI/Ling-lite-1.5"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype="auto",
	device_map="auto",
	trust_remote_code=True
	).eval()

	# define chat function
	def chat(user_input, max_new_tokens=512):
	# chat history
	messages = [
	{"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
	{"role": "user", "content": user_input}
	]
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	# encode the input prompt
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	#create streamer
	streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

	def generate():
	model.generate(**inputs, max_new_tokens=max_new_tokens, streamer=streamer)

	thread = Thread(target=generate)
	thread.start()

	prompt_len = len(prompt)
	generated_text = ""
	for new_text in streamer:
	generated_text += new_text
	yield generated_text
	#yield generated_text[prompt_len:]

	thread.join()

	# Construct Gradio Interface
	interface = gr.Interface(
	fn=chat,
	inputs=[
	gr.Textbox(lines=8, label="输入你的问题"),
	gr.Slider(minimum=100, maximum=1024, step=50, label="生成长度")
	],
	outputs=[
	gr.Textbox(lines=8, label="模型回复")
	],
	title="Ling-lite-1.5 AI助手",
	description="基于 [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5) 的对话式文本生成演示。",
	examples=[
	["介绍大型语言模型的基本概念", 512],
	["如何解决数学问题中的长上下文依赖？", 768]
	]
	)

	# launch Gradion Service
	interface.launch()