Spaces:

fastrtc
/

talk-to-oai-gpt-oss-20b

Running

App Files Files Community

talk-to-oai-gpt-oss-20b / app.py

freddyaboulton HF Staff

Update app.py

5859070 verified 7 days ago

raw

history blame contribute delete

2.46 kB

	import gradio as gr
	import numpy as np
	from dotenv import load_dotenv
	from fastrtc import (
	AdditionalOutputs,
	ReplyOnPause,
	Stream,
	WebRTCError,
	get_current_context,
	get_hf_turn_credentials,
	get_hf_turn_credentials_async,
	get_stt_model,
	get_tts_model,
	)
	from huggingface_hub import InferenceClient

	load_dotenv()

	stt_model = get_stt_model()
	tts_model = get_tts_model()

	conversations: dict[str, list[dict[str, str]]] = {}


	def response(
	audio: tuple[int, np.ndarray],
	hf_token: str \| None,
	):
	if hf_token is None or hf_token == "":
	raise WebRTCError("HF Token is required")

	llm_client = InferenceClient(provider="auto", token=hf_token)

	context = get_current_context()
	if context.webrtc_id not in conversations:
	conversations[context.webrtc_id] = [
	{
	"role": "system",
	"content": (
	"You are a helpful assistant that can have engaging conversations."
	"Your responses must be very short and concise. No more than two sentences. "
	"Reasoning: low"
	),
	}
	]

	messages = conversations[context.webrtc_id]

	transcription = stt_model.stt(audio)

	messages.append({"role": "user", "content": transcription})

	output = llm_client.chat.completions.create( # type: ignore
	model="openai/gpt-oss-20b",
	messages=messages, # type: ignore
	max_tokens=1024,
	stream=True,
	)

	output_text = ""
	for chunk in output:
	output_text += chunk.choices[0].delta.content or ""

	messages.append({"role": "assistant", "content": output_text})
	conversations[context.webrtc_id] = messages
	yield from tts_model.stream_tts_sync(output_text)
	yield AdditionalOutputs(messages)


	chatbot = gr.Chatbot(label="Chatbot", type="messages")
	token = gr.Textbox(
	label="HF Token",
	value="",
	type="password",
	)
	stream = Stream(
	modality="audio",
	mode="send-receive",
	handler=ReplyOnPause(response),
	server_rtc_configuration=get_hf_turn_credentials(ttl=600*10000),
	rtc_configuration=get_hf_turn_credentials,
	additional_inputs=[token],
	additional_outputs=[chatbot],
	additional_outputs_handler=lambda old, new: new,
	ui_args={"title": "Talk To OpenAI GPT-OSS 20B (Powered by FastRTC ⚡️)"},
	time_limit=90,
	concurrency_limit=5,
	)

	stream.ui.launch()