Spaces:

merterbak
/

gpt-oss-20b-demo

Running on Zero

App Files Files Community

gpt-oss-20b-demo / app.py

merterbak

No streaming quick demo

a4b21e5 verified 28 days ago

raw

history blame

3.04 kB

	from transformers import pipeline, TextIteratorStreamer
	import torch
	from threading import Thread
	import gradio as gr
	import spaces

	model_id = "openai/gpt-oss-20b"

	pipe = pipeline(
	"text-generation",
	model=model_id,
	torch_dtype="auto",
	device_map="auto",
	)

	def format_conversation_history(chat_history):
	messages = []
	for item in chat_history:
	role = item["role"]
	content = item["content"]
	if isinstance(content, list):
	content = content[0]["text"] if content and "text" in content[0] else str(content)
	messages.append({"role": role, "content": content})
	return messages

	@spaces.GPU(duration=120)
	def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
	new_message = {"role": "user", "content": input_data}
	system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
	processed_history = format_conversation_history(chat_history)
	messages = system_message + processed_history + [new_message]

	streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
	generation_kwargs = {
	"streamer": streamer,
	"max_new_tokens": max_new_tokens,
	"do_sample": True,
	"temperature": temperature,
	"top_p": top_p,
	"top_k": top_k,
	"repetition_penalty": repetition_penalty
	}
	thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
	thread.start()
	outputs = []
	for text_chunk in streamer:
	outputs.append(text_chunk)
	yield "".join(outputs)

	demo = gr.ChatInterface(
	fn=generate_response,
	additional_inputs=[
	gr.Slider(label="Max new tokens", minimum=64, maximum=4096, step=1, value=1024),
	gr.Textbox(
	label="System Prompt",
	value="You are a helpful assistant. Reasoning: medium",
	lines=4,
	placeholder="Change system prompt"
	),
	gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7),
	gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
	gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=50),
	gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
	],
	examples=[
	[{"text": "What are the benefits of open weight AI models"}],
	[{"text": "Write a Python function to calculate the Fibonacci sequence"}],
	[{"text": "Explain Newton laws clearly and concisely."}],
	],
	cache_examples=False,
	type="messages",
	description="""
	# gpt-oss-20b
	You can adjust reasoning level in the system prompt like "Reasoning: high".
	""",
	fill_height=True,
	textbox=gr.Textbox(
	label="Query Input",
	placeholder="Type your prompt"
	),
	stop_btn="Stop Generation",
	multimodal=False,
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	demo.launch(share=True)