Spaces:
Build error
Build error
| try: | |
| from ..llm import * | |
| from ..utils.db import * | |
| from .tts_providers.openai import tts_openai | |
| from .tts_providers.microsoft_local import tts_microsoft_local | |
| except ImportError: | |
| from llm import * | |
| from utils.db import * | |
| from audio.tts_providers.openai import tts_openai | |
| from audio.tts_providers.microsoft_local import tts_microsoft_local | |
| import os | |
| import hashlib | |
| import random | |
| import threading | |
| def is_local_tts_available(): | |
| try: | |
| return True | |
| except: | |
| return False | |
| def is_openai_tts_available(): | |
| the_model = load_model_settings() | |
| if llm_settings[the_model]["provider"] == "openai": | |
| if load_api_key() != "CHANGE_ME": | |
| return True | |
| return False | |
| supported_openai_speakers = ["fable"] | |
| def random_model(exclude): | |
| models = supported_openai_speakers.copy() | |
| models.remove(exclude) | |
| return random.choice(models) | |
| def generate_speech_chunk(text_chunk, index, voice, results): | |
| sha = hashlib.sha256(text_chunk.encode()).hexdigest() | |
| location = os.path.join(artifacts_dir, f"{sha}.mp3") | |
| if os.path.exists(location): | |
| results[index] = location | |
| else: | |
| the_model = load_model_settings() | |
| tts_setting = load_tts_model_settings() | |
| if tts_setting == "openai": | |
| tts_openai(voice, text_chunk, location) | |
| if tts_setting == "microsoft_local": | |
| if not is_local_tts_available(): | |
| print("Please install gpt-computer-agent[local_tts] to use local TTS") | |
| else: | |
| tts_microsoft_local(text_chunk, location) | |
| results[index] = location | |
| def split_text_to_sentences(text, max_chunk_size=300): | |
| """Splits text into sentences and ensures chunks do not exceed max_chunk_size.""" | |
| sentences = text.split(".") | |
| chunks = [] | |
| current_chunk = "" | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if len(current_chunk) + len(sentence) + 1 <= max_chunk_size: | |
| current_chunk += sentence + ". " | |
| else: | |
| chunks.append(current_chunk.strip()) | |
| current_chunk = sentence + ". " | |
| if current_chunk: | |
| chunks.append(current_chunk.strip()) | |
| return chunks | |
| def text_to_speech(text): | |
| text_chunks = split_text_to_sentences(text) | |
| threads = [] | |
| results = [None] * len(text_chunks) | |
| initial_voice = random.choice(supported_openai_speakers) | |
| for i, chunk in enumerate(text_chunks): | |
| voice = ( | |
| initial_voice if i % 2 == 0 else random_model(initial_voice) | |
| ) # Alternate voices | |
| thread = threading.Thread( | |
| target=generate_speech_chunk, args=(chunk, i, voice, results) | |
| ) | |
| threads.append(thread) | |
| thread.start() | |
| for thread in threads: | |
| thread.join() | |
| mp3_files = [result for result in results if result is not None] | |
| return mp3_files | |