Spaces:
Running
Running
File size: 4,952 Bytes
adecb62 c8f7e68 adecb62 bc5091e 5a007ca adecb62 bc5091e adecb62 a375dbf e9bcee8 8047063 5a007ca adecb62 c0b34a2 5a007ca adecb62 8047063 adecb62 e9bcee8 8047063 adecb62 bc5091e adecb62 e9bcee8 8047063 e9bcee8 adecb62 a6d4367 adecb62 a5cafbd a6d4367 a5cafbd a6d4367 a5cafbd 8047063 adecb62 8047063 adecb62 8047063 adecb62 36b195f adecb62 a6d4367 adecb62 e9bcee8 adecb62 e9bcee8 c8f7e68 8047063 c8f7e68 adecb62 bc5091e adecb62 49be7fc adecb62 e9bcee8 a6d4367 adecb62 c8f7e68 e9bcee8 e431b2b 36b195f adecb62 a6d4367 adecb62 a6d4367 adecb62 8047063 36b195f 8047063 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
"""
hume_api.py
This file defines the interaction with the Hume text-to-speech (TTS) API.
It includes functionality for API request handling and processing API responses.
Key Features:
- Encapsulates all logic related to the Hume TTS API.
- Implements retry logic for handling transient API errors.
- Handles received audio and processes it for playback on the web.
- Provides detailed logging for debugging and error tracking.
Classes:
- HumeConfig: Immutable configuration for interacting with Hume's TTS API.
- HumeError: Custom exception for Hume API-related errors.
Functions:
- text_to_speech_with_hume: Synthesizes speech from text using Hume's TTS API.
"""
# Standard Library Imports
from dataclasses import dataclass
import logging
import random
from typing import List, Literal, Optional, Tuple
# Third-Party Library Imports
import requests
from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
# Local Application Imports
from src.config import logger
from src.utils import validate_env_var, truncate_text
HumeVoiceName = Literal['ITO', 'KORA', 'STELLA', 'DACHER']
@dataclass(frozen=True)
class HumeConfig:
"""Immutable configuration for interacting with the Hume TTS API."""
api_key: str = validate_env_var('HUME_API_KEY')
tts_endpoint_url: str = 'https://api.hume.ai/v0/tts'
voice_names: List[HumeVoiceName] = ('ITO', 'KORA', 'STELLA', 'DACHER')
audio_format: str = 'wav'
headers: dict = None
def __post_init__(self):
# Validate required attributes
if not self.api_key:
raise ValueError('Hume API key is not set.')
if not self.tts_endpoint_url:
raise ValueError('Hume TTS endpoint URL is not set.')
if not self.voice_names:
raise ValueError('Hume voice names list is not set.')
if not self.audio_format:
raise ValueError('Hume audio format is not set.')
# Set headers dynamically after validation
object.__setattr__(self, 'headers', {
'X-Hume-Api-Key': f'{self.api_key}',
'Content-Type': 'application/json',
})
class HumeError(Exception):
"""Custom exception for errors related to the Hume TTS API."""
def __init__(self, message: str, original_exception: Optional[Exception] = None):
super().__init__(message)
self.original_exception = original_exception
# Initialize the Hume client
hume_config = HumeConfig()
@retry(
stop=stop_after_attempt(1),
wait=wait_fixed(2),
before=before_log(logger, logging.DEBUG),
after=after_log(logger, logging.DEBUG),
reraise=True
)
def text_to_speech_with_hume(prompt: str, text: str, voice_name: HumeVoiceName) -> bytes:
"""
Synthesizes text to speech using the Hume TTS API and processes raw binary audio data.
Args:
prompt (str): The original user prompt (for debugging).
text (str): The generated text to be converted to speech.
voice_name (HumeVoiceName): Name of the voice Hume will use when synthesizing speech.
Returns:
voice_name: The name of the voice used for speech synthesis.
bytes: The raw binary audio data for playback.
Raises:
HumeError: If there is an error communicating with the Hume TTS API.
"""
logger.debug(f'Processing TTS with Hume. Prompt length: {len(prompt)} characters. Text length: {len(text)} characters.')
request_body = {
'text': text,
'voice': {
'name': voice_name
},
}
try:
# Synthesize speech using the Hume TTS API
response = requests.post(
url=hume_config.tts_endpoint_url,
headers=hume_config.headers,
json=request_body,
)
# Validate response
if response.status_code != 200:
logger.error(f'Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)')
raise HumeError(f'Hume TTS API responded with status {response.status_code}: {response.text[:200]}')
# Process response audio
if response.headers.get('Content-Type', '').startswith('audio/'):
audio = response.content # Raw binary audio data
logger.info(f'Received audio data from Hume ({len(audio)} bytes).')
return voice_name, audio
raise HumeError(f'Unexpected Content-Type: {response.headers.get("Content-Type", "Unknown")}')
except Exception as e:
logger.exception(f'Error synthesizing speech from text with Hume: {e}')
raise HumeError(
message=f'Failed to synthesize speech from text with Hume: {e}',
original_exception=e,
)
def get_random_hume_voice_names() -> Tuple[HumeVoiceName, HumeVoiceName]:
"""
Get two random Hume voice names.
Voices:
- ITO
- KORA
- STELLA
- DACHER
"""
return tuple(random.sample(hume_config.voice_names, 2)) |