Spaces:
Runtime error
Runtime error
streaming chat into speech
Browse files- debug.py +8 -10
- speech_service.py +5 -18
- streaming_chat_service.py +71 -0
debug.py
CHANGED
|
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
|
|
| 4 |
from speech_service import SpeechService
|
| 5 |
from concurrent.futures import ThreadPoolExecutor
|
| 6 |
from audio_stream_processor import AudioStreamProcessor
|
| 7 |
-
|
| 8 |
|
| 9 |
def run_debug_code():
|
| 10 |
load_dotenv()
|
|
@@ -14,11 +14,11 @@ def run_debug_code():
|
|
| 14 |
# print ("CLIP success")
|
| 15 |
|
| 16 |
print ("Initializing Chat")
|
| 17 |
-
chat_service = ChatService()
|
|
|
|
|
|
|
| 18 |
|
| 19 |
user_speech_service = SpeechService(voice_id="Adam")
|
| 20 |
-
ai_speech_service = SpeechService(voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
|
| 21 |
-
processor = AudioStreamProcessor()
|
| 22 |
|
| 23 |
# user_speech_service.print_voices() # if you want to see your custom voices
|
| 24 |
|
|
@@ -32,15 +32,13 @@ def run_debug_code():
|
|
| 32 |
print ("")
|
| 33 |
print (f'prompt: "{prompt}"')
|
| 34 |
stream = user_speech_service.stream(prompt)
|
| 35 |
-
|
| 36 |
|
| 37 |
-
response = chat_service.chat(prompt)
|
| 38 |
print ("")
|
| 39 |
-
print (f'response:
|
| 40 |
-
|
| 41 |
-
processor.add_audio_stream(stream)
|
| 42 |
|
| 43 |
-
|
| 44 |
print ("Chat success")
|
| 45 |
|
| 46 |
|
|
|
|
| 4 |
from speech_service import SpeechService
|
| 5 |
from concurrent.futures import ThreadPoolExecutor
|
| 6 |
from audio_stream_processor import AudioStreamProcessor
|
| 7 |
+
from streaming_chat_service import StreamingChatService
|
| 8 |
|
| 9 |
def run_debug_code():
|
| 10 |
load_dotenv()
|
|
|
|
| 14 |
# print ("CLIP success")
|
| 15 |
|
| 16 |
print ("Initializing Chat")
|
| 17 |
+
# chat_service = ChatService()
|
| 18 |
+
audio_processor = AudioStreamProcessor()
|
| 19 |
+
chat_service = StreamingChatService(audio_processor, voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
|
| 20 |
|
| 21 |
user_speech_service = SpeechService(voice_id="Adam")
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# user_speech_service.print_voices() # if you want to see your custom voices
|
| 24 |
|
|
|
|
| 32 |
print ("")
|
| 33 |
print (f'prompt: "{prompt}"')
|
| 34 |
stream = user_speech_service.stream(prompt)
|
| 35 |
+
audio_processor.add_audio_stream(stream)
|
| 36 |
|
|
|
|
| 37 |
print ("")
|
| 38 |
+
print (f'response:')
|
| 39 |
+
response = chat_service.respond_to(prompt)
|
|
|
|
| 40 |
|
| 41 |
+
audio_processor.close()
|
| 42 |
print ("Chat success")
|
| 43 |
|
| 44 |
|
speech_service.py
CHANGED
|
@@ -25,24 +25,11 @@ class SpeechService:
|
|
| 25 |
print (voice)
|
| 26 |
|
| 27 |
def speak(self, prompt):
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
# play(audio)
|
| 34 |
-
audio_stream = generate(
|
| 35 |
-
text=prompt,
|
| 36 |
-
voice=self._voice_id,
|
| 37 |
-
model=self._model_id,
|
| 38 |
-
stream=True
|
| 39 |
-
)
|
| 40 |
-
# stream(audio_stream)
|
| 41 |
-
audio = b""
|
| 42 |
-
for chunk in audio_stream:
|
| 43 |
-
if chunk is not None:
|
| 44 |
-
audio += chunk
|
| 45 |
-
# play(chunk)
|
| 46 |
play(audio)
|
| 47 |
return
|
| 48 |
|
|
|
|
| 25 |
print (voice)
|
| 26 |
|
| 27 |
def speak(self, prompt):
|
| 28 |
+
audio = generate(
|
| 29 |
+
text=prompt,
|
| 30 |
+
voice=self._voice_id,
|
| 31 |
+
model=self._model_id,
|
| 32 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
play(audio)
|
| 34 |
return
|
| 35 |
|
streaming_chat_service.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
import openai
|
| 5 |
+
|
| 6 |
+
from audio_stream_processor import AudioStreamProcessor
|
| 7 |
+
from speech_service import SpeechService
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class StreamingChatService:
|
| 11 |
+
def __init__(self, audio_processor:AudioStreamProcessor()=None, api="openai", model_id = "gpt-3.5-turbo", voice_id="Bella"):
|
| 12 |
+
self._audio_processor = audio_processor
|
| 13 |
+
self._speech_service = SpeechService(voice_id=voice_id)
|
| 14 |
+
self._api = api
|
| 15 |
+
self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 16 |
+
self._system_prompt = None
|
| 17 |
+
|
| 18 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 19 |
+
self._model_id = model_id
|
| 20 |
+
self.reset()
|
| 21 |
+
|
| 22 |
+
def reset(self):
|
| 23 |
+
self._messages = []
|
| 24 |
+
if self._system_prompt:
|
| 25 |
+
self._messages.append({"role": "system", "content": self._system_prompt})
|
| 26 |
+
|
| 27 |
+
def _should_we_send_to_voice(self, sentence):
|
| 28 |
+
sentence_termination_characters = [".", "?", "!"]
|
| 29 |
+
temination_charicter_present = any(c in sentence for c in sentence_termination_characters)
|
| 30 |
+
if temination_charicter_present and sentence[-1] not in sentence_termination_characters:
|
| 31 |
+
# text_to_speak = sentence up until the last sentence termination character
|
| 32 |
+
termination_indices = [sentence.rfind(char) for char in sentence_termination_characters]
|
| 33 |
+
last_termination_index = max(termination_indices)
|
| 34 |
+
text_to_speak = sentence[:last_termination_index+1]
|
| 35 |
+
return text_to_speak
|
| 36 |
+
if temination_charicter_present:
|
| 37 |
+
return False
|
| 38 |
+
return False
|
| 39 |
+
|
| 40 |
+
def respond_to(self, prompt):
|
| 41 |
+
self._messages.append({"role": "user", "content": prompt})
|
| 42 |
+
agent_response = ""
|
| 43 |
+
current_sentence = ""
|
| 44 |
+
|
| 45 |
+
response = openai.ChatCompletion.create(
|
| 46 |
+
model=self._model_id,
|
| 47 |
+
messages=self._messages,
|
| 48 |
+
temperature=1.0, # use 1.0 for debugging/deteministic results
|
| 49 |
+
stream=True
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
for chunk in response:
|
| 53 |
+
chunk_message = chunk['choices'][0]['delta']
|
| 54 |
+
if 'content' in chunk_message:
|
| 55 |
+
chunk_text = chunk_message['content']
|
| 56 |
+
print(chunk_text)
|
| 57 |
+
current_sentence += chunk_text
|
| 58 |
+
agent_response += chunk_text
|
| 59 |
+
text_to_speak = self._should_we_send_to_voice(current_sentence)
|
| 60 |
+
if text_to_speak:
|
| 61 |
+
stream = self._speech_service.stream(text_to_speak)
|
| 62 |
+
self._audio_processor.add_audio_stream(stream)
|
| 63 |
+
|
| 64 |
+
# current_sentence should be reset to the text after the last sentence termination character
|
| 65 |
+
current_sentence = current_sentence[len(text_to_speak):]
|
| 66 |
+
|
| 67 |
+
if len(current_sentence) > 0:
|
| 68 |
+
stream = self._speech_service.stream(current_sentence)
|
| 69 |
+
self._audio_processor.add_audio_stream(stream)
|
| 70 |
+
self._messages.append({"role": "assistant", "content": agent_response})
|
| 71 |
+
return agent_response
|