Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

zach commited on Feb 25

Commit

e1385f3

1 Parent(s): d699ad7

Relax timeout configs for Hume TTS calls, add exponential backoff, add logs for logging call times

Browse files

Files changed (3) hide show

pyproject.toml +1 -0
src/app.py +26 -12
src/integrations/hume_api.py +11 -6

pyproject.toml CHANGED Viewed

@@ -43,6 +43,7 @@ ignore = [
     "G004",
     "PLR0912",
     "PLR0913",
     "PLR2004",
     "RUF006",
     "SIM117",

     "G004",
     "PLR0912",
     "PLR0913",
+    "PLR0915",
     "PLR2004",
     "RUF006",
     "SIM117",

src/app.py CHANGED Viewed

@@ -129,37 +129,51 @@ class App:
         provider_a, provider_b = choose_providers(text_modified, character_description)
         try:
             if provider_b == constants.HUME_AI:
                 num_generations = 2
-                # If generating 2 Hume outputs, do so in a single API call.
                 result = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
                 # Enforce that 4 values are returned.
                 if not (isinstance(result, tuple) and len(result) == 4):
                     raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
                 generation_id_a, audio_a, generation_id_b, audio_b = result
             else:
-                num_generations = 1
-                # Run both API calls concurrently using asyncio
-                tasks = []
                 # Generate a single Hume output
-                tasks.append(text_to_speech_with_hume(character_description, text, num_generations, self.config))
                 # Generate a second TTS output from the second provider
                 match provider_b:
                     case constants.ELEVENLABS:
-                        tasks.append(text_to_speech_with_elevenlabs(character_description, text, self.config))
                     case _:
                         # Additional TTS Providers can be added here.
                         raise ValueError(f"Unsupported provider: {provider_b}")
-                # Await both tasks concurrently
-                result_a, result_b = await asyncio.gather(*tasks)
-                if not isinstance(result_a, tuple) or len(result_a) != 2:
-                    raise ValueError("Expected 2 values from Hume TTS call when generating 1 output")
-                generation_id_a, audio_a = result_a[0], result_a[1]
-                generation_id_b, audio_b = result_b[0], result_b[1]
             # Shuffle options so that placement of options in the UI will always be random.
             option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)

         provider_a, provider_b = choose_providers(text_modified, character_description)
         try:
+            start_time = time.time()
+            logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
             if provider_b == constants.HUME_AI:
+                # If generating 2 Hume outputs, do so in a single API call to reduce overhead
+                logger.info("Using single Hume API call for both audio outputs")
                 num_generations = 2
                 result = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
                 # Enforce that 4 values are returned.
                 if not (isinstance(result, tuple) and len(result) == 4):
                     raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
                 generation_id_a, audio_a, generation_id_b, audio_b = result
+                logger.info(f"Completed dual Hume synthesis in {time.time() - start_time:.2f} seconds")
             else:
+                # Process API calls sequentially to avoid resource contention
+                logger.info(f"Sequential processing: First generating audio with {provider_a}")
                 # Generate a single Hume output
+                num_generations = 1
+                result_a = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
+                if not isinstance(result_a, tuple) or len(result_a) != 2:
+                    raise ValueError("Expected 2 values from Hume TTS call when generating 1 output")
+                generation_id_a, audio_a = result_a[0], result_a[1]
+                logger.info(f"First audio generated in {time.time() - start_time:.2f} seconds")
                 # Generate a second TTS output from the second provider
+                logger.info(f"Now generating audio with {provider_b}")
+                second_start = time.time()
                 match provider_b:
                     case constants.ELEVENLABS:
+                        result_b = await text_to_speech_with_elevenlabs(character_description, text, self.config)
                     case _:
                         # Additional TTS Providers can be added here.
                         raise ValueError(f"Unsupported provider: {provider_b}")
+                generation_id_b, audio_b = result_b[0], result_b[1]
+                logger.info(f"Second audio generated in {time.time() - second_start:.2f} seconds")
+                logger.info(f"Total synthesis time: {time.time() - start_time:.2f} seconds")
             # Shuffle options so that placement of options in the UI will always be random.
             option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)

src/integrations/hume_api.py CHANGED Viewed

@@ -13,12 +13,13 @@ Key Features:
 # Standard Library Imports
 import logging
 from dataclasses import dataclass, field
 from typing import Any, Dict, Literal, Tuple, Union
 # Third-Party Library Imports
 import httpx
-from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_fixed
 # Local Application Imports
 from src.config import Config, logger
@@ -37,6 +38,7 @@ class HumeConfig:
     headers: Dict[str, str] = field(init=False)
     url: str = "https://api.hume.ai/v0/tts/octave"
     file_format: HumeSupportedFileFormat = "mp3"
     def __post_init__(self) -> None:
         # Validate required attributes.
@@ -75,7 +77,7 @@ class UnretryableHumeError(HumeError):
 @retry(
     retry=retry_if_exception(lambda e: not isinstance(e, UnretryableHumeError)),
     stop=stop_after_attempt(3),
-    wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
     reraise=True,
@@ -126,14 +128,17 @@ async def text_to_speech_with_hume(
         "num_generations": num_generations,
     }
     try:
         async with httpx.AsyncClient() as client:
             response = await client.post(
                 url=hume_config.url,
                 headers=hume_config.headers,
                 json=request_body,
-                timeout=30.0,
             )
             response.raise_for_status()
             response_data = response.json()
@@ -153,10 +158,10 @@ async def text_to_speech_with_hume(
         generation_b_id, audio_b_path = _parse_hume_tts_generation(generation_b, config)
         return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
-    except httpx.ReadTimeout as e:
-        # Handle timeout specifically
         raise HumeError(
-            message="Request to Hume API timed out. Please try again later.",
             original_exception=e,
         ) from e

 # Standard Library Imports
 import logging
+import time
 from dataclasses import dataclass, field
 from typing import Any, Dict, Literal, Tuple, Union
 # Third-Party Library Imports
 import httpx
+from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
 # Local Application Imports
 from src.config import Config, logger
     headers: Dict[str, str] = field(init=False)
     url: str = "https://api.hume.ai/v0/tts/octave"
     file_format: HumeSupportedFileFormat = "mp3"
+    request_timeout: float = 60.0
     def __post_init__(self) -> None:
         # Validate required attributes.
 @retry(
     retry=retry_if_exception(lambda e: not isinstance(e, UnretryableHumeError)),
     stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=2, max=5),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
     reraise=True,
         "num_generations": num_generations,
     }
+    start_time = time.time()
     try:
         async with httpx.AsyncClient() as client:
             response = await client.post(
                 url=hume_config.url,
                 headers=hume_config.headers,
                 json=request_body,
+                timeout=hume_config.request_timeout,
             )
+            elapsed_time = time.time() - start_time
+            logger.info(f"Hume API request completed in {elapsed_time:.2f} seconds")
             response.raise_for_status()
             response_data = response.json()
         generation_b_id, audio_b_path = _parse_hume_tts_generation(generation_b, config)
         return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
+    except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.ConnectError) as e:
+        logger.error(f"Hume API request failed after {elapsed_time:.2f} seconds: {e!s}")
         raise HumeError(
+            message=f"Connection to Hume API failed: {e!s}. Please try again later.",
             original_exception=e,
         ) from e