File size: 6,851 Bytes
adecb62
 
 
bf6610d
 
adecb62
 
 
 
 
 
 
 
 
a375dbf
e1385f3
fc85b67
bf6610d
5a007ca
adecb62
bf6610d
 
1a6c67a
e1385f3
5a007ca
adecb62
1ed6720
fe85e28
1ed6720
0e508c8
adecb62
 
 
e9bcee8
d1ed6b1
fc85b67
bf6610d
a35c804
adecb62
fc85b67
bf6610d
0e508c8
 
e9bcee8
fc85b67
 
adecb62
e91a94a
 
 
 
 
 
 
 
 
 
 
 
 
adecb62
a6d4367
adecb62
d1ed6b1
fc85b67
2f050a8
 
 
 
 
 
 
 
fc85b67
 
adecb62
104737f
adecb62
 
a5cafbd
104737f
7f25817
e1385f3
a5cafbd
 
d1ed6b1
a5cafbd
104737f
fc85b67
 
 
7854f13
adecb62
104737f
adecb62
bf6610d
7854f13
 
ba3994f
adecb62
104737f
 
 
adecb62
 
7854f13
 
 
adecb62
 
104737f
 
adecb62
e91a94a
1ed6720
e91a94a
e1385f3
adecb62
e91a94a
 
bf6610d
 
 
 
 
 
 
 
d4b2b49
e91a94a
0e508c8
e91a94a
 
 
 
 
0e508c8
e91a94a
0e508c8
bf6610d
 
e1385f3
7854f13
 
104737f
fe85e28
 
 
 
 
 
7854f13
 
ba3994f
104737f
 
 
 
2192d9b
ba3994f
e91a94a
7854f13
 
 
 
 
 
 
 
 
 
 
 
2192d9b
7854f13
 
2192d9b
7854f13
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
hume_api.py

This file defines the interaction with the Hume text-to-speech (TTS) API using the
Hume Python SDK. It includes functionality for API request handling and processing API responses.

Key Features:
- Encapsulates all logic related to the Hume TTS API.
- Implements retry logic for handling transient API errors.
- Handles received audio and processes it for playback on the web.
- Provides detailed logging for debugging and error tracking.
"""

# Standard Library Imports
import logging
import time
from dataclasses import dataclass, field
from typing import Tuple, Union

# Third-Party Library Imports
from hume import AsyncHumeClient
from hume.core.api_error import ApiError
from hume.tts.types import Format, FormatMp3, PostedUtterance, ReturnTts
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential

# Local Application Imports
from src.config import Config, logger
from src.constants import CLIENT_ERROR_CODE, GENERIC_API_ERROR_MESSAGE, RATE_LIMIT_ERROR_CODE, SERVER_ERROR_CODE
from src.utils import save_base64_audio_to_file, validate_env_var


@dataclass(frozen=True)
class HumeConfig:
    """Immutable configuration for interacting with the Hume TTS API."""

    api_key: str = field(init=False)
    file_format: Format = field(default_factory=FormatMp3)
    request_timeout: float = 40.0

    def __post_init__(self) -> None:
        """Validate required attributes and set computed fields."""
        if not self.file_format:
            raise ValueError("Hume TTS file format is not set.")

        computed_api_key = validate_env_var("HUME_API_KEY")
        object.__setattr__(self, "api_key", computed_api_key)

    @property
    def client(self) -> AsyncHumeClient:
        """
        Lazy initialization of the asynchronous Hume client.

        Returns:
            AsyncHumeClient: Configured async client instance.
        """
        return AsyncHumeClient(
            api_key=self.api_key,
            timeout=self.request_timeout
        )


class HumeError(Exception):
    """Custom exception for errors related to the Hume TTS API."""

    def __init__(self, message: str, original_exception: Union[Exception, None] = None):
        super().__init__(message)
        self.original_exception = original_exception
        self.message = message


class UnretryableHumeError(HumeError):
    """Custom exception for errors related to the Hume TTS API that should not be retried."""

    def __init__(self, message: str, original_exception: Union[Exception, None] = None):
        super().__init__(message, original_exception)
        self.original_exception = original_exception
        self.message = message


@retry(
    retry=retry_if_exception(lambda e: not isinstance(e, UnretryableHumeError)),
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=2, max=5),
    before=before_log(logger, logging.DEBUG),
    after=after_log(logger, logging.DEBUG),
    reraise=True,
)
async def text_to_speech_with_hume(
    character_description: str,
    text: str,
    config: Config,
) -> Tuple[str, str]:
    """
    Asynchronously synthesizes speech using the Hume TTS API, processes audio data, and writes audio to a file.

    This function uses the Hume Python SDK to send a request to the Hume TTS API with a character description
    and text to be converted to speech. It extracts the base64-encoded audio and generation ID from the response,
    saves the audio as an MP3 file, and returns the relevant details.

    Args:
        character_description (str): Description used for voice synthesis.
        text (str): Text to be converted to speech.
        config (Config): Application configuration containing Hume API settings.

    Returns:
        Tuple[str, str]: A tuple containing:
            - generation_id (str): Unique identifier for the generated audio.
            - audio_file_path (str): Path to the saved audio file.

    Raises:
        HumeError: For errors communicating with the Hume API.
        UnretryableHumeError: For client-side HTTP errors (status code 4xx).
    """
    logger.debug(f"Synthesizing speech with Hume. Text length: {len(text)} characters.")
    hume_config = config.hume_config
    client = hume_config.client
    start_time = time.time()
    try:
        utterance = PostedUtterance(text=text, description=character_description)
        response: ReturnTts = await client.tts.synthesize_json(
            utterances=[utterance],
            format=hume_config.file_format,
        )

        elapsed_time = time.time() - start_time
        logger.info(f"Hume API request completed in {elapsed_time:.2f} seconds")

        generations = response.generations
        if not generations:
            raise HumeError("No generations returned by Hume API.")

        generation = generations[0]
        generation_id = generation.generation_id
        base64_audio = generation.audio
        filename = f"{generation_id}.mp3"
        audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)

        return generation_id, audio_file_path

    except ApiError as e:
        elapsed_time = time.time() - start_time
        logger.error(f"Hume API request failed after {elapsed_time:.2f} seconds: {e!s}")
        clean_message = _extract_hume_api_error_message(e)
        logger.error(f"Full Hume API error: {e!s}")

        if e.status_code is not None:
            if e.status_code == RATE_LIMIT_ERROR_CODE:
                rate_limit_error_message = "We're working on scaling capacity. Please try again in a few seconds."
                raise HumeError(message=rate_limit_error_message, original_exception=e) from e
            if CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE:
                raise UnretryableHumeError(message=clean_message, original_exception=e) from e

        raise HumeError(message=clean_message, original_exception=e) from e

    except Exception as e:
        error_type = type(e).__name__
        error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
        logger.error("Error during Hume API call: %s - %s", error_type, error_message)
        clean_message = GENERIC_API_ERROR_MESSAGE

        raise HumeError(message=clean_message, original_exception=e) from e


def _extract_hume_api_error_message(e: ApiError) -> str:
    """
    Extracts a clean, user-friendly error message from a Hume API error response.

    Args:
        e (ApiError): The Hume API error exception containing response information.

    Returns:
        str: A clean, user-friendly error message suitable for display to end users.
    """
    clean_message = GENERIC_API_ERROR_MESSAGE

    if hasattr(e, 'body') and isinstance(e.body, dict) and 'message' in e.body:
        clean_message = e.body['message']

    return clean_message