Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import numpy as np | |
| from dotenv import load_dotenv | |
| from fastapi import FastAPI | |
| from fastapi.responses import RedirectResponse | |
| from fastrtc import ( | |
| ReplyOnPause, | |
| Stream, | |
| get_stt_model, | |
| get_tts_model, | |
| ) | |
| from gradio.utils import get_space | |
| from numpy.typing import NDArray | |
| from openai import OpenAI | |
| load_dotenv() | |
| sambanova_client = OpenAI( | |
| api_key=os.getenv("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1" | |
| ) | |
| stt_model = get_stt_model() | |
| tts_model = get_tts_model() | |
| chat_history = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are a helpful assistant having a spoken conversation." | |
| "Please keep your answers short and concise." | |
| ), | |
| } | |
| ] | |
| def echo(audio: tuple[int, NDArray[np.int16]]): | |
| prompt = stt_model.stt(audio) | |
| print("prompt", prompt) | |
| chat_history.append({"role": "user", "content": prompt}) | |
| start_time = time.time() | |
| response = sambanova_client.chat.completions.create( | |
| model="Meta-Llama-3.2-3B-Instruct", | |
| messages=chat_history, | |
| max_tokens=200, | |
| ) | |
| end_time = time.time() | |
| print("time taken inference", end_time - start_time) | |
| prompt = response.choices[0].message.content | |
| chat_history.append({"role": "assistant", "content": prompt}) | |
| start_time = time.time() | |
| for audio_chunk in tts_model.stream_tts_sync(prompt): | |
| yield audio_chunk | |
| end_time = time.time() | |
| print("time taken tts", end_time - start_time) | |
| stream = Stream( | |
| handler=ReplyOnPause(echo), | |
| modality="audio", | |
| mode="send-receive", | |
| rtc_configuration=None, # get_twilio_turn_credentials() if get_space() else None, | |
| concurrency_limit=20 if get_space() else None, | |
| ) | |
| app = FastAPI() | |
| stream.mount(app) | |
| async def index(): | |
| return RedirectResponse( | |
| url="/ui" if not get_space() else "https://fastrtc-echo-audio.hf.space/ui/" | |
| ) | |
| if __name__ == "__main__": | |
| import os | |
| if (mode := os.getenv("MODE")) == "UI": | |
| stream.ui.launch(server_port=7860) | |
| elif mode == "PHONE": | |
| stream.fastphone(port=7860) | |
| else: | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |