freddyaboulton's picture
Update app.py
5859070 verified
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
WebRTCError,
get_current_context,
get_hf_turn_credentials,
get_hf_turn_credentials_async,
get_stt_model,
get_tts_model,
)
from huggingface_hub import InferenceClient
load_dotenv()
stt_model = get_stt_model()
tts_model = get_tts_model()
conversations: dict[str, list[dict[str, str]]] = {}
def response(
audio: tuple[int, np.ndarray],
hf_token: str | None,
):
if hf_token is None or hf_token == "":
raise WebRTCError("HF Token is required")
llm_client = InferenceClient(provider="auto", token=hf_token)
context = get_current_context()
if context.webrtc_id not in conversations:
conversations[context.webrtc_id] = [
{
"role": "system",
"content": (
"You are a helpful assistant that can have engaging conversations."
"Your responses must be very short and concise. No more than two sentences. "
"Reasoning: low"
),
}
]
messages = conversations[context.webrtc_id]
transcription = stt_model.stt(audio)
messages.append({"role": "user", "content": transcription})
output = llm_client.chat.completions.create( # type: ignore
model="openai/gpt-oss-20b",
messages=messages, # type: ignore
max_tokens=1024,
stream=True,
)
output_text = ""
for chunk in output:
output_text += chunk.choices[0].delta.content or ""
messages.append({"role": "assistant", "content": output_text})
conversations[context.webrtc_id] = messages
yield from tts_model.stream_tts_sync(output_text)
yield AdditionalOutputs(messages)
chatbot = gr.Chatbot(label="Chatbot", type="messages")
token = gr.Textbox(
label="HF Token",
value="",
type="password",
)
stream = Stream(
modality="audio",
mode="send-receive",
handler=ReplyOnPause(response),
server_rtc_configuration=get_hf_turn_credentials(ttl=600*10000),
rtc_configuration=get_hf_turn_credentials,
additional_inputs=[token],
additional_outputs=[chatbot],
additional_outputs_handler=lambda old, new: new,
ui_args={"title": "Talk To OpenAI GPT-OSS 20B (Powered by FastRTC ⚡️)"},
time_limit=90,
concurrency_limit=5,
)
stream.ui.launch()