Spaces:

bobpopboom
/

chaty

Sleeping

chaty / app.py

hashhac

added template code

e724e7e 4 months ago

1.47 kB

	from fastrtc import (
	ReplyOnPause, AdditionalOutputs, Stream,
	audio_to_bytes, aggregate_bytes_to_16bit
	)
	import gradio as gr
	from groq import Groq
	import numpy as np
	import anthropic
	from elevenlabs import ElevenLabs

	groq_client = Groq()
	claude_client = anthropic.Anthropic()
	tts_client = ElevenLabs()


	# See "Talk to Claude" in Cookbook for an example of how to keep
	# track of the chat history.
	def response(
	audio: tuple[int, np.ndarray],
	):
	prompt = groq_client.audio.transcriptions.create(
	file=("audio-file.mp3", audio_to_bytes(audio)),
	model="whisper-large-v3-turbo",
	response_format="verbose_json",
	).text
	response = claude_client.messages.create(
	model="claude-3-5-haiku-20241022",
	max_tokens=512,
	messages=[{"role": "user", "content": prompt}],
	)
	response_text = " ".join(
	block.text
	for block in response.content
	if getattr(block, "type", None) == "text"
	)
	iterator = tts_client.text_to_speech.convert_as_stream(
	text=response_text,
	voice_id="JBFqnCBsd6RMkjVDRZzb",
	model_id="eleven_multilingual_v2",
	output_format="pcm_24000"

	)
	for chunk in aggregate_bytes_to_16bit(iterator):
	audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
	yield (24000, audio_array)

	stream = Stream(
	modality="audio",
	mode="send-receive",
	handler=ReplyOnPause(response),
	)