Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

zach commited on Feb 12

Commit

09be04f

1 Parent(s): f477f87

Move more business logic out of app.py, simplify state management for options

Browse files

Files changed (4) hide show

src/app.py +31 -64
src/constants.py +5 -3
src/types.py +33 -5
src/utils.py +88 -26

src/app.py CHANGED Viewed

@@ -10,9 +10,8 @@ Users can compare the outputs and vote for their favorite in an interactive UI.
 # Standard Library Imports
 from concurrent.futures import ThreadPoolExecutor
-import json
 import time
-from typing import Union, Tuple
 # Third-Party Library Imports
 import gradio as gr
@@ -29,10 +28,12 @@ from src.integrations import (
     text_to_speech_with_hume,
 )
 from src.theme import CustomTheme
-from src.types import ComparisonType, OptionMap, VotingResults
 from src.utils import (
     choose_providers,
     create_shuffled_tts_options,
     validate_character_description_length,
 )
@@ -148,24 +149,18 @@ def synthesize_speech(
                 generation_id_b, audio_b = future_audio_b.result()
         # Shuffle options so that placement of options in the UI will always be random
-        (
-            option_a_audio,
-            option_b_audio,
-            option_a_generation_id,
-            option_b_generation_id,
-            options_map,
-        ) = create_shuffled_tts_options(
             provider_a, audio_a, generation_id_a, provider_b, audio_b, generation_id_b
         )
         return (
             gr.update(value=option_a_audio, visible=True, autoplay=True),
             gr.update(value=option_b_audio, visible=True),
             options_map,
-            option_b_audio,
             comparison_type,
-            option_a_generation_id,
-            option_b_generation_id,
             text_modified,
             text,
             character_description,
@@ -188,10 +183,8 @@ def synthesize_speech(
 def vote(
     vote_submitted: bool,
     option_map: OptionMap,
-    selected_button: str,
     comparison_type: ComparisonType,
-    option_a_generation_id: str,
-    option_b_generation_id: str,
     text_modified: bool,
     character_description: str,
     text: str,
@@ -219,52 +212,41 @@ def vote(
     if not option_map or vote_submitted:
         return gr.skip(), gr.skip(), gr.skip(), gr.skip()
-    option_a_selected = selected_button == constants.SELECT_OPTION_A
-    selected_option, other_option = (
-        (constants.OPTION_A, constants.OPTION_B)
-        if option_a_selected
-        else (constants.OPTION_B, constants.OPTION_A)
     )
-    selected_provider = option_map.get(selected_option)
-    other_provider = option_map.get(other_option)
-    # Build button labels, displaying the provider and voice name, appending the trophy emoji to the selected option.
     selected_label = f"{selected_provider} {constants.TROPHY_EMOJI}"
     other_label = f"{other_provider}"
-    # Report voting results to be persisted to results DB
-    voting_results: VotingResults = {
-        "comparison_type": comparison_type,
-        "winning_provider": selected_provider,
-        "winning_option": selected_option,
-        "option_a_provider": option_map.get(constants.OPTION_A),
-        "option_b_provider": option_map.get(constants.OPTION_B),
-        "option_a_generation_id": option_a_generation_id,
-        "option_b_generation_id": option_b_generation_id,
-        "character_description": character_description,
-        "text": text,
-        "is_custom_text": text_modified,
-    }
-    # TODO: Currently logging the results until we hook the API for writing results to DB
-    logger.info("Voting results:\n%s", json.dumps(voting_results, indent=4))
     return (
         True,
         (
             gr.update(value=selected_label, variant="primary", interactive=False)
-            if option_a_selected
             else gr.update(value=other_label, variant="secondary", interactive=False)
         ),
         (
             gr.update(value=other_label, variant="secondary", interactive=False)
-            if option_a_selected
             else gr.update(value=selected_label, variant="primary", interactive=False)
         ),
         gr.update(interactive=True),
     )
-def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, None, bool]:
     """
     Resets UI state before generating new text.
@@ -275,7 +257,6 @@ def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, None,
          - vote_button_a (disable and reset button text)
          - vote_button_a (disable and reset button text)
          - option_map_state (reset option map state)
-         - option_b_audio_state (reset option B audio state)
          - vote_submitted_state (reset submitted vote state)
     """
     return (
@@ -284,7 +265,6 @@ def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, None,
         gr.update(value=constants.SELECT_OPTION_A, variant="secondary"),
         gr.update(value=constants.SELECT_OPTION_B, variant="secondary"),
         None,
-        None,
         False,
     )
@@ -330,10 +310,10 @@ def build_output_section() -> (
     synthesize_speech_button = gr.Button("Synthesize Speech", variant="primary")
     with gr.Row(equal_height=True):
         option_a_audio_player = gr.Audio(
-            label=constants.OPTION_A, type="filepath", interactive=False
         )
         option_b_audio_player = gr.Audio(
-            label=constants.OPTION_B, type="filepath", interactive=False
         )
     with gr.Row(equal_height=True):
         vote_button_a = gr.Button(constants.SELECT_OPTION_A, interactive=False)
@@ -402,12 +382,6 @@ def build_gradio_interface() -> gr.Blocks:
         # Track whether text that was used was generated or modified/custom
         text_modified_state = gr.State()
-        # Track generated audio for option B (for playing automatically after option 1 audio finishes)
-        option_b_audio_state = gr.State()
-        # Track generation ID for Option A
-        option_a_generation_id_state = gr.State()
-        # Track generation ID for Option B
-        option_b_generation_id_state = gr.State()
         # Track comparison type (which set of providers are being compared)
         comparison_type_state = gr.State()
         # Track option map (option A and option B are randomized)
@@ -465,7 +439,6 @@ def build_gradio_interface() -> gr.Blocks:
                 vote_button_a,
                 vote_button_b,
                 option_map_state,
-                option_b_audio_state,
                 vote_submitted_state,
             ],
         ).then(
@@ -475,10 +448,7 @@ def build_gradio_interface() -> gr.Blocks:
                 option_a_audio_player,
                 option_b_audio_player,
                 option_map_state,
-                option_b_audio_state,
                 comparison_type_state,
-                option_a_generation_id_state,
-                option_b_generation_id_state,
                 text_modified_state,
                 text_state,
                 character_description_state,
@@ -501,8 +471,6 @@ def build_gradio_interface() -> gr.Blocks:
                 option_map_state,
                 vote_button_a,
                 comparison_type_state,
-                option_a_generation_id_state,
-                option_b_generation_id_state,
                 text_modified_state,
                 character_description_state,
                 text_state,
@@ -521,8 +489,6 @@ def build_gradio_interface() -> gr.Blocks:
                 option_map_state,
                 vote_button_b,
                 comparison_type_state,
-                option_a_generation_id_state,
-                option_b_generation_id_state,
                 text_modified_state,
                 character_description_state,
                 text_state,
@@ -537,10 +503,11 @@ def build_gradio_interface() -> gr.Blocks:
         # Reload audio player B with audio and set autoplay to True (workaround to play audio back-to-back)
         option_a_audio_player.stop(
-            fn=lambda current_audio_path: gr.update(
-                value=f"{current_audio_path}?t={int(time.time())}", autoplay=True
             ),
-            inputs=[option_b_audio_state],
             outputs=[option_b_audio_player],
         )

 # Standard Library Imports
 from concurrent.futures import ThreadPoolExecutor
 import time
+from typing import Tuple, Union
 # Third-Party Library Imports
 import gradio as gr
     text_to_speech_with_hume,
 )
 from src.theme import CustomTheme
+from src.types import ComparisonType, OptionMap
 from src.utils import (
     choose_providers,
     create_shuffled_tts_options,
+    determine_selected_option,
+    submit_voting_results,
     validate_character_description_length,
 )
                 generation_id_b, audio_b = future_audio_b.result()
         # Shuffle options so that placement of options in the UI will always be random
+        options_map: OptionMap = create_shuffled_tts_options(
             provider_a, audio_a, generation_id_a, provider_b, audio_b, generation_id_b
         )
+        option_a_audio = options_map["option_a"]["audio_file_path"]
+        option_b_audio = options_map["option_b"]["audio_file_path"]
         return (
             gr.update(value=option_a_audio, visible=True, autoplay=True),
             gr.update(value=option_b_audio, visible=True),
             options_map,
             comparison_type,
             text_modified,
             text,
             character_description,
 def vote(
     vote_submitted: bool,
     option_map: OptionMap,
+    clicked_option_button: str,
     comparison_type: ComparisonType,
     text_modified: bool,
     character_description: str,
     text: str,
     if not option_map or vote_submitted:
         return gr.skip(), gr.skip(), gr.skip(), gr.skip()
+    selected_option, other_option = determine_selected_option(clicked_option_button)
+    selected_provider = option_map[selected_option]["provider"]
+    other_provider = option_map[other_option]["provider"]
+    # Report voting results to be persisted to results DB
+    submit_voting_results(
+        option_map,
+        selected_option,
+        comparison_type,
+        text_modified,
+        character_description,
+        text,
     )
+    # Build button text, displaying the provider and voice name, appending the trophy emoji to the selected option.
     selected_label = f"{selected_provider} {constants.TROPHY_EMOJI}"
     other_label = f"{other_provider}"
     return (
         True,
         (
             gr.update(value=selected_label, variant="primary", interactive=False)
+            if selected_option == constants.OPTION_A_KEY
             else gr.update(value=other_label, variant="secondary", interactive=False)
         ),
         (
             gr.update(value=other_label, variant="secondary", interactive=False)
+            if selected_option == constants.OPTION_A_KEY
             else gr.update(value=selected_label, variant="primary", interactive=False)
         ),
         gr.update(interactive=True),
     )
+def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, bool]:
     """
     Resets UI state before generating new text.
          - vote_button_a (disable and reset button text)
          - vote_button_a (disable and reset button text)
          - option_map_state (reset option map state)
          - vote_submitted_state (reset submitted vote state)
     """
     return (
         gr.update(value=constants.SELECT_OPTION_A, variant="secondary"),
         gr.update(value=constants.SELECT_OPTION_B, variant="secondary"),
         None,
         False,
     )
     synthesize_speech_button = gr.Button("Synthesize Speech", variant="primary")
     with gr.Row(equal_height=True):
         option_a_audio_player = gr.Audio(
+            label=constants.OPTION_A_LABEL, type="filepath", interactive=False
         )
         option_b_audio_player = gr.Audio(
+            label=constants.OPTION_B_LABEL, type="filepath", interactive=False
         )
     with gr.Row(equal_height=True):
         vote_button_a = gr.Button(constants.SELECT_OPTION_A, interactive=False)
         # Track whether text that was used was generated or modified/custom
         text_modified_state = gr.State()
         # Track comparison type (which set of providers are being compared)
         comparison_type_state = gr.State()
         # Track option map (option A and option B are randomized)
                 vote_button_a,
                 vote_button_b,
                 option_map_state,
                 vote_submitted_state,
             ],
         ).then(
                 option_a_audio_player,
                 option_b_audio_player,
                 option_map_state,
                 comparison_type_state,
                 text_modified_state,
                 text_state,
                 character_description_state,
                 option_map_state,
                 vote_button_a,
                 comparison_type_state,
                 text_modified_state,
                 character_description_state,
                 text_state,
                 option_map_state,
                 vote_button_b,
                 comparison_type_state,
                 text_modified_state,
                 character_description_state,
                 text_state,
         # Reload audio player B with audio and set autoplay to True (workaround to play audio back-to-back)
         option_a_audio_player.stop(
+            fn=lambda option_map: gr.update(
+                value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
+                autoplay=True,
             ),
+            inputs=[option_map_state],
             outputs=[option_b_audio_player],
         )

src/constants.py CHANGED Viewed

@@ -8,7 +8,7 @@ This module defines global constants used throughout the project.
 from typing import List
 # Third-Party Library Imports
-from src.types import ComparisonType, OptionKey, TTSProviderName
 # UI constants
@@ -22,8 +22,10 @@ HUME_TO_ELEVENLABS: ComparisonType = "Hume AI - ElevenLabs"
 CHARACTER_DESCRIPTION_MIN_LENGTH: int = 20
 CHARACTER_DESCRIPTION_MAX_LENGTH: int = 800
-OPTION_A: OptionKey = "Option A"
-OPTION_B: OptionKey = "Option B"
 TROPHY_EMOJI: str = "🏆"
 SELECT_OPTION_A: str = "Select Option A"
 SELECT_OPTION_B: str = "Select Option B"

 from typing import List
 # Third-Party Library Imports
+from src.types import ComparisonType, OptionKey, OptionLabel, TTSProviderName
 # UI constants
 CHARACTER_DESCRIPTION_MIN_LENGTH: int = 20
 CHARACTER_DESCRIPTION_MAX_LENGTH: int = 800
+OPTION_A_KEY: OptionKey = "option_a"
+OPTION_B_KEY: OptionKey = "option_b"
+OPTION_A_LABEL: OptionLabel = "Option A"
+OPTION_B_LABEL: OptionLabel = "Option B"
 TROPHY_EMOJI: str = "🏆"
 SELECT_OPTION_A: str = "Select Option A"
 SELECT_OPTION_B: str = "Select Option B"

src/types.py CHANGED Viewed

@@ -5,7 +5,7 @@ This module defines custom types for the application.
 """
 # Standard Library Imports
-from typing import Dict, Literal, NamedTuple, TypedDict
 TTSProviderName = Literal["Hume AI", "ElevenLabs"]
@@ -16,12 +16,12 @@ ComparisonType = Literal["Hume AI - Hume AI", "Hume AI - ElevenLabs"]
 """Comparison type denoting which providers are compared."""
-OptionKey = Literal["Option A", "Option B"]
-"""OptionKey is restricted to the literal values 'Option A' or 'Option B'."""
-OptionMap = Dict[OptionKey, TTSProviderName]
-"""OptionMap defines the structure of the options mapping, where each key is an OptionKey and the value is a TTS provider."""
 class Option(NamedTuple):
@@ -56,3 +56,31 @@ class VotingResults(TypedDict):
     voice_description: str
     text: str
     is_custom_text: bool

 """
 # Standard Library Imports
+from typing import Dict, Literal, NamedTuple, Optional, TypedDict
 TTSProviderName = Literal["Hume AI", "ElevenLabs"]
 """Comparison type denoting which providers are compared."""
+OptionLabel = Literal["Option A", "Option B"]
+"""OptionLabel is restricted to the literal values 'Option A' or 'Option B'."""
+OptionKey = Literal["option_a", "option_b"]
+"""OptionKey is restricted to the literal values 'option_a' or 'option_b'."""
 class Option(NamedTuple):
     voice_description: str
     text: str
     is_custom_text: bool
+class OptionDetail(TypedDict):
+    """
+    Details for a single TTS option.
+    Attributes:
+        provider (TTSProviderName): The TTS provider that generated the audio.
+        generation_id (Optional[str]): The unique identifier for this TTS generation, or None if not available.
+        audio_file_path (str): The relative file path to the generated audio file.
+    """
+    provider: TTSProviderName
+    generation_id: Optional[str]
+    audio_file_path: str
+class OptionMap(TypedDict):
+    """
+    Mapping of TTS options.
+    Structure:
+        option_a: OptionDetail,
+        option_b: OptionDetail
+    """
+    option_a: OptionDetail
+    option_b: OptionDetail

src/utils.py CHANGED Viewed

@@ -7,6 +7,7 @@ These functions provide reusable logic to simplify code in other modules.
 # Standard Library Imports
 import base64
 import os
 import random
 import time
@@ -15,7 +16,14 @@ from typing import Tuple
 # Local Application Imports
 from src import constants
 from src.config import AUDIO_DIR, logger
-from src.types import ComparisonType, Option, OptionMap, TTSProviderName
 def truncate_text(text: str, max_length: int = 50) -> str:
@@ -244,13 +252,13 @@ def create_shuffled_tts_options(
     provider_b: TTSProviderName,
     audio_b: str,
     generation_id_b: str,
-) -> Tuple[str, str, str, str, OptionMap]:
     """
     Create and shuffle TTS generation options.
     This function creates two Option instances from the provided TTS details, shuffles them,
-    and then extracts the audio file paths and generation IDs from the shuffled options.
-    It also returns a mapping from option constants to the corresponding TTS providers.
     Args:
         provider_a (TTSProviderName): The TTS provider for the first generation.
@@ -261,13 +269,7 @@ def create_shuffled_tts_options(
         generation_id_b (str): The generation ID for the second generation.
     Returns:
-        Tuple[str, str, str, str, OptionMap]:
-            A tuple containing:
-            - option_a_audio (str): Audio file path for the first shuffled option.
-            - option_b_audio (str): Audio file path for the second shuffled option.
-            - option_a_generation_id (str): Generation ID for the first shuffled option.
-            - option_b_generation_id (str): Generation ID for the second shuffled option.
-            - options_map (OptionMap): Mapping from option constants to their TTS providers.
     """
     # Create a list of Option instances for the available providers.
     options = [
@@ -281,22 +283,82 @@ def create_shuffled_tts_options(
     # Unpack the two options.
     option_a, option_b = options
-    # Extract audio file paths and generation IDs.
-    option_a_audio = option_a.audio
-    option_b_audio = option_b.audio
-    option_a_generation_id = option_a.generation_id
-    option_b_generation_id = option_b.generation_id
     # Build a mapping from option constants to the corresponding providers.
     options_map: OptionMap = {
-        constants.OPTION_A: option_a.provider,
-        constants.OPTION_B: option_b.provider,
     }
-    return (
-        option_a_audio,
-        option_b_audio,
-        option_a_generation_id,
-        option_b_generation_id,
-        options_map,
-    )

 # Standard Library Imports
 import base64
+import json
 import os
 import random
 import time
 # Local Application Imports
 from src import constants
 from src.config import AUDIO_DIR, logger
+from src.types import (
+    ComparisonType,
+    Option,
+    OptionKey,
+    OptionMap,
+    TTSProviderName,
+    VotingResults,
+)
 def truncate_text(text: str, max_length: int = 50) -> str:
     provider_b: TTSProviderName,
     audio_b: str,
     generation_id_b: str,
+) -> OptionMap:
     """
     Create and shuffle TTS generation options.
     This function creates two Option instances from the provided TTS details, shuffles them,
+    then extracts the providers, audio file paths, and generation IDs from the shuffled options,
+    and finally maps the options to an OptionMap.
     Args:
         provider_a (TTSProviderName): The TTS provider for the first generation.
         generation_id_b (str): The generation ID for the second generation.
     Returns:
+        options_map (OptionMap): Mapping of TTS output options.
     """
     # Create a list of Option instances for the available providers.
     options = [
     # Unpack the two options.
     option_a, option_b = options
     # Build a mapping from option constants to the corresponding providers.
     options_map: OptionMap = {
+        "option_a": {
+            "provider": option_a.provider,
+            "generation_id": option_a.generation_id,
+            "audio_file_path": option_a.audio,
+        },
+        "option_b": {
+            "provider": option_b.provider,
+            "generation_id": option_b.generation_id,
+            "audio_file_path": option_b.audio,
+        },
     }
+    return options_map
+def determine_selected_option(
+    selected_option_button: str,
+) -> Tuple[OptionKey, OptionKey]:
+    """
+    Determines the selected option and the alternative option based on the user's selection.
+    Args:
+        selected_option_button (str): The option selected by the user, expected to be either
+            constants.OPTION_A_KEY or constants.OPTION_B_KEY.
+    Returns:
+        tuple: A tuple (selected_option, other_option) where:
+            - selected_option is the same as the selected_option.
+            - other_option is the alternative option.
+    """
+    if selected_option_button == constants.SELECT_OPTION_A:
+        selected_option, other_option = constants.OPTION_A_KEY, constants.OPTION_B_KEY
+    elif selected_option_button == constants.SELECT_OPTION_B:
+        selected_option, other_option = constants.OPTION_B_KEY, constants.OPTION_A_KEY
+    else:
+        raise ValueError(f"Invalid selected button: {selected_option_button}")
+    return selected_option, other_option
+def submit_voting_results(
+    option_map: OptionMap,
+    selected_option: str,
+    comparison_type: ComparisonType,
+    text_modified: bool,
+    character_description: str,
+    text: str,
+) -> VotingResults:
+    """
+    Constructs the voting results dictionary from the provided inputs and logs it.
+    Args:
+        option_map (OptionMap): Mapping of comparison data and TTS options.
+        selected_option (str): The option selected by the user.
+        comparison_type (ComparisonType): The type of comparison between providers.
+        text_modified (bool): Indicates whether the text was modified.
+        character_description (str): Description of the voice/character.
+        text (str): The text associated with the TTS generation.
+    Returns:
+        VotingResults: The constructed voting results dictionary.
+    """
+    voting_results: VotingResults = {
+        "comparison_type": comparison_type,
+        "winning_provider": option_map[selected_option]["provider"],
+        "winning_option": selected_option,
+        "option_a_provider": option_map[constants.OPTION_A_KEY]["provider"],
+        "option_b_provider": option_map[constants.OPTION_B_KEY]["provider"],
+        "option_a_generation_id": option_map[constants.OPTION_A_KEY]["generation_id"],
+        "option_b_generation_id": option_map[constants.OPTION_B_KEY]["generation_id"],
+        "voice_description": character_description,
+        "text": text,
+        "is_custom_text": text_modified,
+    }
+    # TODO: Currently logging the results until we hook the API for writing results to DB
+    logger.info("Voting results:\n%s", json.dumps(voting_results, indent=4))
+    return voting_results