Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

zach commited on Feb 1

Commit

bc5091e

1 Parent(s): c8f7e68

Update documentation and fix typos across project

Browse files

Files changed (6) hide show

src/config.py +2 -2
src/constants.py +5 -4
src/integrations/anthropic_api.py +6 -8
src/integrations/elevenlabs_api.py +3 -3
src/integrations/hume_api.py +5 -4
src/theme.py +1 -1

src/config.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """
 config.py
-Global configuration and logger setup for the project. This file centralizes shared
-constants and settings, such as the logging configuration and API constraints.
 Key Features:
 - Configures the logger for consistent logging across all modules.
 - Dynamically sets the logging level based on the DEBUG environment variable.
 """

 """
 config.py
+Global configuration and logger setup for the project.
 Key Features:
+- Loads environment variables
 - Configures the logger for consistent logging across all modules.
 - Dynamically sets the logging level based on the DEBUG environment variable.
 """

src/constants.py CHANGED Viewed

@@ -9,13 +9,14 @@ PROMPT_MIN_LENGTH: int = 10
 PROMPT_MAX_LENGTH: int = 400
 OPTION_ONE: str = "Option 1"
 OPTION_TWO: str = "Option 2"
-TROPHY_EMOJI = "🏆"
-UNKNOWN_PROVIDER = "Unknown"
 VOTE_FOR_OPTION_ONE: str = "Vote for option 1"
 VOTE_FOR_OPTION_TWO: str = "Vote for option 2"
-# A collection of pre-defined prompts categorized by theme, used to provide users with inspiration for generating creative text.
-SAMPLE_PROMPTS = {
     '🚀 Dramatic Monologue (Stranded Astronaut)':
         'Write a short dramatic monologue from a lone astronaut stranded on Mars, speaking to '
         'mission control for the last time. The tone should be reflective and filled with awe, conveying '

 PROMPT_MAX_LENGTH: int = 400
 OPTION_ONE: str = "Option 1"
 OPTION_TWO: str = "Option 2"
+TROPHY_EMOJI: str = "🏆"
+UNKNOWN_PROVIDER: str = "Unknown"
 VOTE_FOR_OPTION_ONE: str = "Vote for option 1"
 VOTE_FOR_OPTION_TWO: str = "Vote for option 2"
+# A collection of pre-defined prompts categorized by theme, used to provide users with
+# inspiration for generating creative text for expressive TTS.
+SAMPLE_PROMPTS: dict = {
     '🚀 Dramatic Monologue (Stranded Astronaut)':
         'Write a short dramatic monologue from a lone astronaut stranded on Mars, speaking to '
         'mission control for the last time. The tone should be reflective and filled with awe, conveying '

src/integrations/anthropic_api.py CHANGED Viewed

@@ -35,13 +35,10 @@ from src.utils import truncate_text, validate_env_var
 @dataclass(frozen=True)
 class AnthropicConfig:
-    """
-    Immutable configuration for interacting with the Anthropic API.
-    Includes client initialization for encapsulation.
-    """
     api_key: str = validate_env_var('ANTHROPIC_API_KEY')
-    model: ModelParam = 'claude-3-5-sonnet-latest' # Valid predefined model
-    max_tokens: int = 256 # Max tokens for API response
     system_prompt: str = f"""You are an imaginative and articulate assistant, skilled in generating creative, concise, and engaging content that is perfectly suited for expressive speech synthesis.
 Your task is to generate:
@@ -117,6 +114,7 @@ def generate_text_with_claude(prompt: str) -> str:
     response = None
     try:
         response: Message = anthropic_config.client.messages.create(
             model=anthropic_config.model,
             max_tokens=anthropic_config.max_tokens,
@@ -125,12 +123,12 @@ def generate_text_with_claude(prompt: str) -> str:
         )
         logger.debug(f'API response received: {truncate_text(str(response))}')
-        # Validate response content
         if not hasattr(response, 'content'):
             logger.error("Response is missing 'content'. Response: %s", response)
             raise AnthropicError('Invalid API response: Missing "content".')
-        # Process response content
         blocks: Union[List[TextBlock], TextBlock, None] = response.content
         if isinstance(blocks, list):
             result = '\n\n'.join(block.text for block in blocks if isinstance(block, TextBlock))

 @dataclass(frozen=True)
 class AnthropicConfig:
+    """Immutable configuration for interacting with the Anthropic API."""
     api_key: str = validate_env_var('ANTHROPIC_API_KEY')
+    model: ModelParam = 'claude-3-5-sonnet-latest'
+    max_tokens: int = 256
     system_prompt: str = f"""You are an imaginative and articulate assistant, skilled in generating creative, concise, and engaging content that is perfectly suited for expressive speech synthesis.
 Your task is to generate:
     response = None
     try:
+        # Generate text using the Anthropic SDK
         response: Message = anthropic_config.client.messages.create(
             model=anthropic_config.model,
             max_tokens=anthropic_config.max_tokens,
         )
         logger.debug(f'API response received: {truncate_text(str(response))}')
+        # Validate response
         if not hasattr(response, 'content'):
             logger.error("Response is missing 'content'. Response: %s", response)
             raise AnthropicError('Invalid API response: Missing "content".')
+        # Process response
         blocks: Union[List[TextBlock], TextBlock, None] = response.content
         if isinstance(blocks, list):
             result = '\n\n'.join(block.text for block in blocks if isinstance(block, TextBlock))

src/integrations/elevenlabs_api.py CHANGED Viewed

@@ -38,7 +38,7 @@ class ElevenLabsConfig:
     """Immutable configuration for interacting with the ElevenLabs TTS API."""
     api_key: str = validate_env_var('ELEVENLABS_API_KEY')
     model_id: str = 'eleven_multilingual_v2' # ElevenLab's most emotionally expressive model
-    output_format: str = 'mp3_44100_128' # Output format of the generated audio.
     top_voices: list[str] = (
         'pNInz6obpgDQGcFmaJgB',  # Adam
         'ErXwobaYiN019PkySvjV',  # Antoni
@@ -109,10 +109,10 @@ def text_to_speech_with_elevenlabs(text: str) -> bytes:
     logger.debug(f'Synthesizing speech from text with ElevenLabs. Text length: {len(text)} characters.')
     try:
-        # Generate audio using the ElevenLabs SDK
         audio_iterator = elevenlabs_config.client.text_to_speech.convert(
             text=text,
-            voice_id=elevenlabs_config.random_voice_id,  # Randomly chosen voice ID
             model_id=elevenlabs_config.model_id,
             output_format=elevenlabs_config.output_format,
         )

     """Immutable configuration for interacting with the ElevenLabs TTS API."""
     api_key: str = validate_env_var('ELEVENLABS_API_KEY')
     model_id: str = 'eleven_multilingual_v2' # ElevenLab's most emotionally expressive model
+    output_format: str = 'mp3_44100_128' # Output format of the generated audio
     top_voices: list[str] = (
         'pNInz6obpgDQGcFmaJgB',  # Adam
         'ErXwobaYiN019PkySvjV',  # Antoni
     logger.debug(f'Synthesizing speech from text with ElevenLabs. Text length: {len(text)} characters.')
     try:
+        # Synthesize speech using the ElevenLabs SDK
         audio_iterator = elevenlabs_config.client.text_to_speech.convert(
             text=text,
+            voice_id=elevenlabs_config.random_voice_id,
             model_id=elevenlabs_config.model_id,
             output_format=elevenlabs_config.output_format,
         )

src/integrations/hume_api.py CHANGED Viewed

@@ -11,11 +11,11 @@ Key Features:
 - Provides detailed logging for debugging and error tracking.
 Classes:
-- HumeConfig: Immutable configuration for interacting with Hume's text-to-speech API.
 - HumeError: Custom exception for Hume API-related errors.
 Functions:
-- text_to_speech_with_hume: Synthesizes speech from text using Hume's text-to-speech API.
 """
 # Standard Library Imports
@@ -38,9 +38,9 @@ class HumeConfig:
     """Immutable configuration for interacting with the Hume TTS API."""
     tts_endpoint_url: str = 'https://api.hume.ai/v0/tts'
     api_key: str = validate_env_var('HUME_API_KEY')
-    voices: List[str] = ('ITO', 'KORA', 'STELLA')  # List of available Hume voices
     audio_format: str = 'wav'
-    headers: dict = None  # Headers for the API requests
     def __post_init__(self):
         # Validate required attributes
@@ -110,6 +110,7 @@ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
     }
     try:
         response = requests.post(
             url=hume_config.tts_endpoint_url,
             headers=hume_config.headers,

 - Provides detailed logging for debugging and error tracking.
 Classes:
+- HumeConfig: Immutable configuration for interacting with Hume's TTS API.
 - HumeError: Custom exception for Hume API-related errors.
 Functions:
+- text_to_speech_with_hume: Synthesizes speech from text using Hume's TTS API.
 """
 # Standard Library Imports
     """Immutable configuration for interacting with the Hume TTS API."""
     tts_endpoint_url: str = 'https://api.hume.ai/v0/tts'
     api_key: str = validate_env_var('HUME_API_KEY')
+    voices: List[str] = ('ITO', 'KORA', 'STELLA')
     audio_format: str = 'wav'
+    headers: dict = None
     def __post_init__(self):
         # Validate required attributes
     }
     try:
+        # Synthesize speech using the Hume TTS API
         response = requests.post(
             url=hume_config.tts_endpoint_url,
             headers=hume_config.headers,

src/theme.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 theme.py
-Defines a custom Gradio theme.
 - For more information on Gradio themes see: https://www.gradio.app/docs/gradio/themes
 - For manual styling with css, see /src/assets/styles.css
 """

 """
 theme.py
+This module defines a custom Gradio theme.
 - For more information on Gradio themes see: https://www.gradio.app/docs/gradio/themes
 - For manual styling with css, see /src/assets/styles.css
 """