Spaces:
Running
Running
zach
commited on
Commit
·
e1385f3
1
Parent(s):
d699ad7
Relax timeout configs for Hume TTS calls, add exponential backoff, add logs for logging call times
Browse files- pyproject.toml +1 -0
- src/app.py +26 -12
- src/integrations/hume_api.py +11 -6
pyproject.toml
CHANGED
@@ -43,6 +43,7 @@ ignore = [
|
|
43 |
"G004",
|
44 |
"PLR0912",
|
45 |
"PLR0913",
|
|
|
46 |
"PLR2004",
|
47 |
"RUF006",
|
48 |
"SIM117",
|
|
|
43 |
"G004",
|
44 |
"PLR0912",
|
45 |
"PLR0913",
|
46 |
+
"PLR0915",
|
47 |
"PLR2004",
|
48 |
"RUF006",
|
49 |
"SIM117",
|
src/app.py
CHANGED
@@ -129,37 +129,51 @@ class App:
|
|
129 |
provider_a, provider_b = choose_providers(text_modified, character_description)
|
130 |
|
131 |
try:
|
|
|
|
|
|
|
132 |
if provider_b == constants.HUME_AI:
|
|
|
|
|
133 |
num_generations = 2
|
134 |
-
# If generating 2 Hume outputs, do so in a single API call.
|
135 |
result = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
|
|
|
136 |
# Enforce that 4 values are returned.
|
137 |
if not (isinstance(result, tuple) and len(result) == 4):
|
138 |
raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
|
|
|
139 |
generation_id_a, audio_a, generation_id_b, audio_b = result
|
|
|
140 |
else:
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
# Generate a single Hume output
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
# Generate a second TTS output from the second provider
|
|
|
|
|
|
|
148 |
match provider_b:
|
149 |
case constants.ELEVENLABS:
|
150 |
-
|
151 |
case _:
|
152 |
# Additional TTS Providers can be added here.
|
153 |
raise ValueError(f"Unsupported provider: {provider_b}")
|
154 |
|
155 |
-
|
156 |
-
result_a, result_b = await asyncio.gather(*tasks)
|
157 |
|
158 |
-
|
159 |
-
|
160 |
|
161 |
-
generation_id_a, audio_a = result_a[0], result_a[1]
|
162 |
-
generation_id_b, audio_b = result_b[0], result_b[1]
|
163 |
|
164 |
# Shuffle options so that placement of options in the UI will always be random.
|
165 |
option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
|
|
|
129 |
provider_a, provider_b = choose_providers(text_modified, character_description)
|
130 |
|
131 |
try:
|
132 |
+
start_time = time.time()
|
133 |
+
logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
|
134 |
+
|
135 |
if provider_b == constants.HUME_AI:
|
136 |
+
# If generating 2 Hume outputs, do so in a single API call to reduce overhead
|
137 |
+
logger.info("Using single Hume API call for both audio outputs")
|
138 |
num_generations = 2
|
|
|
139 |
result = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
|
140 |
+
|
141 |
# Enforce that 4 values are returned.
|
142 |
if not (isinstance(result, tuple) and len(result) == 4):
|
143 |
raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
|
144 |
+
|
145 |
generation_id_a, audio_a, generation_id_b, audio_b = result
|
146 |
+
logger.info(f"Completed dual Hume synthesis in {time.time() - start_time:.2f} seconds")
|
147 |
else:
|
148 |
+
# Process API calls sequentially to avoid resource contention
|
149 |
+
logger.info(f"Sequential processing: First generating audio with {provider_a}")
|
150 |
+
|
151 |
# Generate a single Hume output
|
152 |
+
num_generations = 1
|
153 |
+
result_a = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
|
154 |
+
|
155 |
+
if not isinstance(result_a, tuple) or len(result_a) != 2:
|
156 |
+
raise ValueError("Expected 2 values from Hume TTS call when generating 1 output")
|
157 |
+
|
158 |
+
generation_id_a, audio_a = result_a[0], result_a[1]
|
159 |
+
logger.info(f"First audio generated in {time.time() - start_time:.2f} seconds")
|
160 |
|
161 |
# Generate a second TTS output from the second provider
|
162 |
+
logger.info(f"Now generating audio with {provider_b}")
|
163 |
+
second_start = time.time()
|
164 |
+
|
165 |
match provider_b:
|
166 |
case constants.ELEVENLABS:
|
167 |
+
result_b = await text_to_speech_with_elevenlabs(character_description, text, self.config)
|
168 |
case _:
|
169 |
# Additional TTS Providers can be added here.
|
170 |
raise ValueError(f"Unsupported provider: {provider_b}")
|
171 |
|
172 |
+
generation_id_b, audio_b = result_b[0], result_b[1]
|
|
|
173 |
|
174 |
+
logger.info(f"Second audio generated in {time.time() - second_start:.2f} seconds")
|
175 |
+
logger.info(f"Total synthesis time: {time.time() - start_time:.2f} seconds")
|
176 |
|
|
|
|
|
177 |
|
178 |
# Shuffle options so that placement of options in the UI will always be random.
|
179 |
option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
|
src/integrations/hume_api.py
CHANGED
@@ -13,12 +13,13 @@ Key Features:
|
|
13 |
|
14 |
# Standard Library Imports
|
15 |
import logging
|
|
|
16 |
from dataclasses import dataclass, field
|
17 |
from typing import Any, Dict, Literal, Tuple, Union
|
18 |
|
19 |
# Third-Party Library Imports
|
20 |
import httpx
|
21 |
-
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt,
|
22 |
|
23 |
# Local Application Imports
|
24 |
from src.config import Config, logger
|
@@ -37,6 +38,7 @@ class HumeConfig:
|
|
37 |
headers: Dict[str, str] = field(init=False)
|
38 |
url: str = "https://api.hume.ai/v0/tts/octave"
|
39 |
file_format: HumeSupportedFileFormat = "mp3"
|
|
|
40 |
|
41 |
def __post_init__(self) -> None:
|
42 |
# Validate required attributes.
|
@@ -75,7 +77,7 @@ class UnretryableHumeError(HumeError):
|
|
75 |
@retry(
|
76 |
retry=retry_if_exception(lambda e: not isinstance(e, UnretryableHumeError)),
|
77 |
stop=stop_after_attempt(3),
|
78 |
-
wait=
|
79 |
before=before_log(logger, logging.DEBUG),
|
80 |
after=after_log(logger, logging.DEBUG),
|
81 |
reraise=True,
|
@@ -126,14 +128,17 @@ async def text_to_speech_with_hume(
|
|
126 |
"num_generations": num_generations,
|
127 |
}
|
128 |
|
|
|
129 |
try:
|
130 |
async with httpx.AsyncClient() as client:
|
131 |
response = await client.post(
|
132 |
url=hume_config.url,
|
133 |
headers=hume_config.headers,
|
134 |
json=request_body,
|
135 |
-
timeout=
|
136 |
)
|
|
|
|
|
137 |
response.raise_for_status()
|
138 |
response_data = response.json()
|
139 |
|
@@ -153,10 +158,10 @@ async def text_to_speech_with_hume(
|
|
153 |
generation_b_id, audio_b_path = _parse_hume_tts_generation(generation_b, config)
|
154 |
return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
|
155 |
|
156 |
-
except httpx.ReadTimeout as e:
|
157 |
-
|
158 |
raise HumeError(
|
159 |
-
message="
|
160 |
original_exception=e,
|
161 |
) from e
|
162 |
|
|
|
13 |
|
14 |
# Standard Library Imports
|
15 |
import logging
|
16 |
+
import time
|
17 |
from dataclasses import dataclass, field
|
18 |
from typing import Any, Dict, Literal, Tuple, Union
|
19 |
|
20 |
# Third-Party Library Imports
|
21 |
import httpx
|
22 |
+
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
23 |
|
24 |
# Local Application Imports
|
25 |
from src.config import Config, logger
|
|
|
38 |
headers: Dict[str, str] = field(init=False)
|
39 |
url: str = "https://api.hume.ai/v0/tts/octave"
|
40 |
file_format: HumeSupportedFileFormat = "mp3"
|
41 |
+
request_timeout: float = 60.0
|
42 |
|
43 |
def __post_init__(self) -> None:
|
44 |
# Validate required attributes.
|
|
|
77 |
@retry(
|
78 |
retry=retry_if_exception(lambda e: not isinstance(e, UnretryableHumeError)),
|
79 |
stop=stop_after_attempt(3),
|
80 |
+
wait=wait_exponential(multiplier=1, min=2, max=5),
|
81 |
before=before_log(logger, logging.DEBUG),
|
82 |
after=after_log(logger, logging.DEBUG),
|
83 |
reraise=True,
|
|
|
128 |
"num_generations": num_generations,
|
129 |
}
|
130 |
|
131 |
+
start_time = time.time()
|
132 |
try:
|
133 |
async with httpx.AsyncClient() as client:
|
134 |
response = await client.post(
|
135 |
url=hume_config.url,
|
136 |
headers=hume_config.headers,
|
137 |
json=request_body,
|
138 |
+
timeout=hume_config.request_timeout,
|
139 |
)
|
140 |
+
elapsed_time = time.time() - start_time
|
141 |
+
logger.info(f"Hume API request completed in {elapsed_time:.2f} seconds")
|
142 |
response.raise_for_status()
|
143 |
response_data = response.json()
|
144 |
|
|
|
158 |
generation_b_id, audio_b_path = _parse_hume_tts_generation(generation_b, config)
|
159 |
return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
|
160 |
|
161 |
+
except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.ConnectError) as e:
|
162 |
+
logger.error(f"Hume API request failed after {elapsed_time:.2f} seconds: {e!s}")
|
163 |
raise HumeError(
|
164 |
+
message=f"Connection to Hume API failed: {e!s}. Please try again later.",
|
165 |
original_exception=e,
|
166 |
) from e
|
167 |
|