Spaces:

fastrtc
/

echo-audio-gradio

Sleeping

App Files Files Community

echo-audio-gradio / app.py

freddyaboulton HF Staff

Upload folder using huggingface_hub

606dee0 verified 10 months ago

raw

history blame

2.2 kB

	import os
	import time

	import numpy as np
	from dotenv import load_dotenv
	from fastapi import FastAPI
	from fastapi.responses import RedirectResponse
	from fastrtc import (
	ReplyOnPause,
	Stream,
	get_stt_model,
	get_tts_model,
	)
	from gradio.utils import get_space
	from numpy.typing import NDArray
	from openai import OpenAI

	load_dotenv()

	sambanova_client = OpenAI(
	api_key=os.getenv("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1"
	)

	stt_model = get_stt_model()
	tts_model = get_tts_model()

	chat_history = [
	{
	"role": "system",
	"content": (
	"You are a helpful assistant having a spoken conversation."
	"Please keep your answers short and concise."
	),
	}
	]


	def echo(audio: tuple[int, NDArray[np.int16]]):
	prompt = stt_model.stt(audio)
	print("prompt", prompt)
	chat_history.append({"role": "user", "content": prompt})
	start_time = time.time()
	response = sambanova_client.chat.completions.create(
	model="Meta-Llama-3.2-3B-Instruct",
	messages=chat_history,
	max_tokens=200,
	)
	end_time = time.time()
	print("time taken inference", end_time - start_time)
	prompt = response.choices[0].message.content
	chat_history.append({"role": "assistant", "content": prompt})
	start_time = time.time()
	for audio_chunk in tts_model.stream_tts_sync(prompt):
	yield audio_chunk
	end_time = time.time()
	print("time taken tts", end_time - start_time)


	stream = Stream(
	handler=ReplyOnPause(echo),
	modality="audio",
	mode="send-receive",
	rtc_configuration=None, # get_twilio_turn_credentials() if get_space() else None,
	concurrency_limit=20 if get_space() else None,
	)

	app = FastAPI()

	stream.mount(app)


	@app.get("/")
	async def index():
	return RedirectResponse(
	url="/ui" if not get_space() else "https://fastrtc-echo-audio.hf.space/ui/"
	)


	if __name__ == "__main__":
	import os

	if (mode := os.getenv("MODE")) == "UI":
	stream.ui.launch(server_port=7860)
	elif mode == "PHONE":
	stream.fastphone(port=7860)
	else:
	import uvicorn

	uvicorn.run(app, host="0.0.0.0", port=7860)