zach commited on
Commit
ba3994f
·
1 Parent(s): 2f050a8

Move business logic out of app.py, refactor code to call hume once specifying 2 generations instead of call Hume twice

Browse files
src/app.py CHANGED
@@ -30,8 +30,12 @@ from src.integrations import (
30
  text_to_speech_with_hume,
31
  )
32
  from src.theme import CustomTheme
33
- from src.types import ComparisonType, OptionMap, VotingResults
34
- from src.utils import validate_character_description_length
 
 
 
 
35
 
36
 
37
  def generate_text(
@@ -73,73 +77,85 @@ def generate_text(
73
 
74
  def text_to_speech(
75
  character_description: str, text: str, generated_text_state: str
76
- ) -> Tuple[gr.update, gr.update, dict, Union[str, None]]:
77
  """
78
- Synthesizes two text to speech outputs, loads the two audio players with the
79
- output audio, and updates related UI state components.
80
- - 50% chance to synthesize one Hume and one Elevenlabs output.
81
- - 50% chance to synthesize two Hume outputs.
 
 
 
 
 
82
 
83
  Args:
84
- character_description (str): The original character_description.
85
- text (str): The text to synthesize to speech.
 
86
 
87
  Returns:
88
- A tuple of:
89
- - Update for first audio player (with autoplay)
90
- - Update for second audio player
91
- - A dictionary mapping options to providers
92
- - The raw audio value for option B
 
 
 
 
 
 
93
 
94
  Raises:
95
- gr.Error: On API or unexpected errors.
96
  """
97
  if not text:
98
  logger.warning("Skipping text-to-speech due to empty text.")
99
  raise gr.Error("Please generate or enter text to synthesize.")
100
 
101
- # Hume AI always included in comparison
102
- provider_a = constants.HUME_AI
103
- # If not using generated text, then only compare Hume to Hume
104
  text_modified = text != generated_text_state
105
- provider_b: constants.TTSProviderName = (
106
- constants.HUME_AI if text_modified else random.choice(constants.TTS_PROVIDERS)
107
- )
108
 
109
  try:
110
- with ThreadPoolExecutor(max_workers=2) as executor:
111
- future_audio_a = executor.submit(
112
- text_to_speech_with_hume, character_description, text
113
- )
114
-
115
- match provider_b:
116
- case constants.HUME_AI:
117
- comparison_type: ComparisonType = constants.HUME_TO_HUME
118
- future_audio_b = executor.submit(
119
- text_to_speech_with_hume, character_description, text
120
- )
121
- case constants.ELEVENLABS:
122
- comparison_type: ComparisonType = constants.HUME_TO_ELEVENLABS
123
- future_audio_b = executor.submit(
124
- text_to_speech_with_elevenlabs, character_description, text
125
- )
126
- case _:
127
- raise ValueError(f"Unsupported provider: {provider_b}")
128
-
129
- generation_id_a, audio_a = future_audio_a.result()
130
- generation_id_b, audio_b = future_audio_b.result()
131
-
132
- options = [
133
- (provider_a, audio_a, generation_id_a),
134
- (provider_b, audio_b, generation_id_b),
135
- ]
136
- random.shuffle(options)
137
- options_map: OptionMap = {
138
- constants.OPTION_A: options[0][0],
139
- constants.OPTION_B: options[1][0],
140
- }
141
- option_a_audio, option_b_audio = options[0][1], options[1][1]
142
- option_a_generation_id, option_b_generation_id = options[0][2], options[1][2]
 
 
 
 
143
 
144
  return (
145
  gr.update(value=option_a_audio, visible=True, autoplay=True),
 
30
  text_to_speech_with_hume,
31
  )
32
  from src.theme import CustomTheme
33
+ from src.types import ComparisonType, Option, OptionMap, VotingResults
34
+ from src.utils import (
35
+ choose_providers,
36
+ create_shuffled_tts_options,
37
+ validate_character_description_length,
38
+ )
39
 
40
 
41
  def generate_text(
 
77
 
78
  def text_to_speech(
79
  character_description: str, text: str, generated_text_state: str
80
+ ) -> Tuple[gr.update, gr.update, dict, str, ComparisonType, str, str, bool, str, str]:
81
  """
82
+ Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
83
+
84
+ This function generates TTS outputs using different providers based on the input text and its modification
85
+ state. Depending on the selected providers, it may:
86
+ - Synthesize one Hume and one ElevenLabs output (50% chance), or
87
+ - Synthesize two Hume outputs (50% chance).
88
+
89
+ The outputs are processed and shuffled, and the corresponding UI components for two audio players are updated.
90
+ Additional metadata such as the generation IDs, comparison type, and state information are also returned.
91
 
92
  Args:
93
+ character_description (str): The description of the character used for generating the voice.
94
+ text (str): The text content to be synthesized into speech.
95
+ generated_text_state (str): The previously generated text state, used to determine if the text has been modified.
96
 
97
  Returns:
98
+ Tuple containing:
99
+ - gr.update: Update for the first audio player (with autoplay enabled).
100
+ - gr.update: Update for the second audio player.
101
+ - dict: A mapping of option constants to their corresponding TTS providers.
102
+ - str: The raw audio value (relative file path) for option B.
103
+ - ComparisonType: The comparison type between the selected TTS providers.
104
+ - str: Generation ID for option A.
105
+ - str: Generation ID for option B.
106
+ - bool: Flag indicating whether the text was modified.
107
+ - str: The original text that was synthesized.
108
+ - str: The original character description.
109
 
110
  Raises:
111
+ gr.Error: If any API or unexpected errors occur during the TTS synthesis process.
112
  """
113
  if not text:
114
  logger.warning("Skipping text-to-speech due to empty text.")
115
  raise gr.Error("Please generate or enter text to synthesize.")
116
 
117
+ # Select 2 TTS providers based on whether the text has been modified.
 
 
118
  text_modified = text != generated_text_state
119
+ comparison_type, provider_a, provider_b = choose_providers(text_modified)
 
 
120
 
121
  try:
122
+ if provider_b == constants.HUME_AI:
123
+ # If generating 2 Hume outputs, do so in a single API call
124
+ (
125
+ generation_id_a,
126
+ audio_a,
127
+ generation_id_b,
128
+ audio_b,
129
+ ) = text_to_speech_with_hume(character_description, text, 2)
130
+ else:
131
+ with ThreadPoolExecutor(max_workers=2) as executor:
132
+ # Generate a single Hume output
133
+ future_audio_a = executor.submit(
134
+ text_to_speech_with_hume, character_description, text
135
+ )
136
+ # Generate a second TTS output from the second provider
137
+ match provider_b:
138
+ case constants.ELEVENLABS:
139
+ future_audio_b = executor.submit(
140
+ text_to_speech_with_elevenlabs, character_description, text
141
+ )
142
+ case _:
143
+ # Additional TTS Providers can be added here
144
+ raise ValueError(f"Unsupported provider: {provider_b}")
145
+
146
+ generation_id_a, audio_a = future_audio_a.result()
147
+ generation_id_b, audio_b = future_audio_b.result()
148
+
149
+ # Shuffle options so that placement of options in the UI will always be random
150
+ (
151
+ option_a_audio,
152
+ option_b_audio,
153
+ option_a_generation_id,
154
+ option_b_generation_id,
155
+ options_map,
156
+ ) = create_shuffled_tts_options(
157
+ provider_a, audio_a, generation_id_a, provider_b, audio_b, generation_id_b
158
+ )
159
 
160
  return (
161
  gr.update(value=option_a_audio, visible=True, autoplay=True),
src/integrations/elevenlabs_api.py CHANGED
@@ -23,7 +23,7 @@ Functions:
23
  from dataclasses import dataclass
24
  import logging
25
  import random
26
- from typing import Optional
27
 
28
  # Third-Party Library Imports
29
  from elevenlabs import ElevenLabs, TextToVoiceCreatePreviewsRequestOutputFormat
@@ -85,18 +85,20 @@ elevenlabs_config = ElevenLabsConfig()
85
  after=after_log(logger, logging.DEBUG),
86
  reraise=True,
87
  )
88
- def text_to_speech_with_elevenlabs(character_description: str, text: str) -> bytes:
 
 
89
  """
90
- Synthesizes text to speech using the ElevenLabs TTS API, processes audio data, and writes audio to a file.
91
 
92
  Args:
93
- character_description (str): The original user character description used as the voice description.
94
- text (str): The text to be synthesized to speech.
95
 
96
  Returns:
97
  Tuple[None, str]: A tuple containing:
98
  - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity across TTS integrations
99
- - file_path (str): The relative path to the file where the synthesized audio was saved.
100
 
101
  Raises:
102
  ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
@@ -124,9 +126,10 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str) -> byt
124
  generated_voice_id = preview.generated_voice_id
125
  base64_audio = preview.audio_base_64
126
  filename = f"{generated_voice_id}.mp3"
 
127
 
128
  # Write audio to file and return the relative path
129
- return None, save_base64_audio_to_file(base64_audio, filename)
130
 
131
  except Exception as e:
132
  if isinstance(e, ApiError):
 
23
  from dataclasses import dataclass
24
  import logging
25
  import random
26
+ from typing import Optional, Union
27
 
28
  # Third-Party Library Imports
29
  from elevenlabs import ElevenLabs, TextToVoiceCreatePreviewsRequestOutputFormat
 
85
  after=after_log(logger, logging.DEBUG),
86
  reraise=True,
87
  )
88
+ def text_to_speech_with_elevenlabs(
89
+ character_description: str, text: str
90
+ ) -> Tuple[None, str]:
91
  """
92
+ Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
93
 
94
  Args:
95
+ character_description (str): The character description used as the voice description.
96
+ text (str): The text to be synthesized into speech.
97
 
98
  Returns:
99
  Tuple[None, str]: A tuple containing:
100
  - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity across TTS integrations
101
+ - file_path (str): The relative file path to the audio file where the synthesized speech was saved.
102
 
103
  Raises:
104
  ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
 
126
  generated_voice_id = preview.generated_voice_id
127
  base64_audio = preview.audio_base_64
128
  filename = f"{generated_voice_id}.mp3"
129
+ audio_file_path = save_base64_audio_to_file(base64_audio, filename)
130
 
131
  # Write audio to file and return the relative path
132
+ return None, audio_file_path
133
 
134
  except Exception as e:
135
  if isinstance(e, ApiError):
src/integrations/hume_api.py CHANGED
@@ -23,7 +23,7 @@ from dataclasses import dataclass
23
  import logging
24
  import os
25
  import random
26
- from typing import Literal, Optional
27
 
28
  # Third-Party Library Imports
29
  import requests
@@ -96,28 +96,50 @@ hume_config = HumeConfig()
96
  after=after_log(logger, logging.DEBUG),
97
  reraise=True,
98
  )
99
- def text_to_speech_with_hume(character_description: str, text: str) -> bytes:
 
 
100
  """
101
  Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
102
 
 
 
 
 
 
 
103
  Args:
104
- character_description (str): The original user character description to use as the description for generating the voice.
105
- text (str): The generated text to be converted to speech.
 
 
 
 
106
 
107
  Returns:
108
- Tuple[str, str]: A tuple containing:
109
- - generation_id (str): The generation ID returned from the Hume API.
110
- - file_path (str): The relative path to the file where the synthesized audio was saved.
111
 
112
  Raises:
113
- HumeError: If there is an error communicating with the Hume TTS API or parsing the response.
 
 
 
114
  """
115
  logger.debug(
116
  f"Processing TTS with Hume. Prompt length: {len(character_description)} characters. Text length: {len(text)} characters."
117
  )
118
 
 
 
 
119
  request_body = {
120
- "utterances": [{"text": text, "description": character_description}]
 
 
 
 
121
  }
122
 
123
  try:
@@ -137,22 +159,58 @@ def text_to_speech_with_hume(character_description: str, text: str) -> bytes:
137
  raise HumeError(msg)
138
 
139
  # Extract the base64 encoded audio and generation ID from the generation
140
- generation = generations[0]
141
- generation_id = generation.get("generation_id")
142
- base64_audio = generation.get("audio")
143
- filename = f"{generation_id}.mp3"
144
 
145
- # Write audio to file and return the relative path
146
- return generation_id, save_base64_audio_to_file(base64_audio, filename)
 
 
 
 
147
 
148
  except Exception as e:
149
  if isinstance(e, HTTPError):
150
  if e.response.status_code >= 400 and e.response.status_code < 500:
151
  raise UnretryableHumeError(
152
- message=f'"{e.response.text}"',
153
- original_exception=e,
154
  ) from e
155
- raise HumeError(
156
- message=f"{e}",
157
- original_exception=e,
158
- ) from e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  import logging
24
  import os
25
  import random
26
+ from typing import Any, Dict, Literal, Optional, Tuple, Union
27
 
28
  # Third-Party Library Imports
29
  import requests
 
96
  after=after_log(logger, logging.DEBUG),
97
  reraise=True,
98
  )
99
+ def text_to_speech_with_hume(
100
+ character_description: str, text: str, num_generations: int = 1
101
+ ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
102
  """
103
  Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
104
 
105
+ This function sends a POST request to the Hume TTS API with a character description and text
106
+ to be converted to speech. Depending on the specified number of generations (allowed values: 1 or 2),
107
+ the API returns one or two generations. For each generation, the function extracts the base64-encoded
108
+ audio and the generation ID, saves the audio as an MP3 file via the `save_base64_audio_to_file` helper,
109
+ and returns the relevant details.
110
+
111
  Args:
112
+ character_description (str): A description of the character, which is used as contextual input
113
+ for generating the voice.
114
+ text (str): The text to be converted to speech.
115
+ num_generations (int, optional): The number of audio generations to request from the API.
116
+ Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
117
+ generation is expected in the API response. Defaults to 1.
118
 
119
  Returns:
120
+ Union[Tuple[str, str], Tuple[str, str, str, str]]:
121
+ - If num_generations == 1: A tuple in the form (generation_a_id, audio_a_path).
122
+ - If num_generations == 2: A tuple in the form (generation_a_id, audio_a_path, generation_b_id, audio_b_path).
123
 
124
  Raises:
125
+ ValueError: If num_generations is not 1 or 2.
126
+ HumeError: If there is an error communicating with the Hume TTS API or parsing its response.
127
+ UnretryableHumeError: If a client-side HTTP error (status code in the 4xx range) is encountered.
128
+ Exception: Any other exceptions raised during the request or processing will be wrapped and re-raised as HumeError.
129
  """
130
  logger.debug(
131
  f"Processing TTS with Hume. Prompt length: {len(character_description)} characters. Text length: {len(text)} characters."
132
  )
133
 
134
+ if num_generations < 1 or num_generations > 2:
135
+ raise ValueError("Invalid number of generations specified. Must be 1 or 2.")
136
+
137
  request_body = {
138
+ "utterances": [{"text": text, "description": character_description}],
139
+ "format": {
140
+ "type": hume_config.file_format,
141
+ },
142
+ "num_generations": num_generations,
143
  }
144
 
145
  try:
 
159
  raise HumeError(msg)
160
 
161
  # Extract the base64 encoded audio and generation ID from the generation
162
+ generation_a = generations[0]
163
+ generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a)
 
 
164
 
165
+ if num_generations == 1:
166
+ return (generation_a_id, audio_a_path)
167
+
168
+ generation_b = generations[1]
169
+ generation_b_id, audio_b_path = parse_hume_tts_generation(generation_b)
170
+ return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
171
 
172
  except Exception as e:
173
  if isinstance(e, HTTPError):
174
  if e.response.status_code >= 400 and e.response.status_code < 500:
175
  raise UnretryableHumeError(
176
+ message=f'"{e.response.text}"', original_exception=e
 
177
  ) from e
178
+ raise HumeError(message=f"{e}", original_exception=e) from e
179
+
180
+
181
+ def parse_hume_tts_generation(generation: Dict[str, Any]) -> Tuple[str, str]:
182
+ """
183
+ Parse a Hume TTS generation response and save the decoded audio as an MP3 file.
184
+
185
+ This function extracts the generation ID and the base64-encoded audio from the provided
186
+ dictionary. It then decodes and saves the audio data to an MP3 file, naming the file using
187
+ the generation ID. Finally, it returns a tuple containing the generation ID and the file path
188
+ of the saved audio.
189
+
190
+ Args:
191
+ generation (Dict[str, Any]): A dictionary representing the TTS generation response from Hume.
192
+ Expected keys are:
193
+ - "generation_id" (str): A unique identifier for the generated audio.
194
+ - "audio" (str): A base64 encoded string of the audio data.
195
+
196
+ Returns:
197
+ Tuple[str, str]: A tuple containing:
198
+ - generation_id (str): The unique identifier for the audio generation.
199
+ - audio_path (str): The filesystem path where the audio file was saved.
200
+
201
+ Raises:
202
+ KeyError: If the "generation_id" or "audio" key is missing from the generation dictionary.
203
+ Exception: Propagates any exceptions raised by save_base64_audio_to_file, such as errors during
204
+ the decoding or file saving process.
205
+ """
206
+ generation_id = generation.get("generation_id")
207
+ if generation_id is None:
208
+ raise KeyError("The generation dictionary is missing the 'generation_id' key.")
209
+
210
+ base64_audio = generation.get("audio")
211
+ if base64_audio is None:
212
+ raise KeyError("The generation dictionary is missing the 'audio' key.")
213
+
214
+ filename = f"{generation_id}.mp3"
215
+ audio_file_path = save_base64_audio_to_file(base64_audio, filename)
216
+ return generation_id, audio_file_path
src/types.py CHANGED
@@ -5,7 +5,7 @@ This module defines custom types for the application.
5
  """
6
 
7
  # Standard Library Imports
8
- from typing import Dict, Literal, TypedDict
9
 
10
 
11
  TTSProviderName = Literal["Hume AI", "ElevenLabs"]
@@ -24,6 +24,25 @@ OptionMap = Dict[OptionKey, TTSProviderName]
24
  """OptionMap defines the structure of the options mapping, where each key is an OptionKey and the value is a TTS provider."""
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  class VotingResults(TypedDict):
28
  """Voting results data structure representing values we want to persist to the votes DB"""
29
 
 
5
  """
6
 
7
  # Standard Library Imports
8
+ from typing import Dict, Literal, NamedTuple, TypedDict
9
 
10
 
11
  TTSProviderName = Literal["Hume AI", "ElevenLabs"]
 
24
  """OptionMap defines the structure of the options mapping, where each key is an OptionKey and the value is a TTS provider."""
25
 
26
 
27
+ class Option(NamedTuple):
28
+ """
29
+ Represents a text-to-speech generation option.
30
+
31
+ This type encapsulates the details for a generated text-to-speech (TTS) option,
32
+ including the provider that produced the audio, the relative file path to the generated
33
+ audio file, and the unique generation identifier associated with the TTS output.
34
+
35
+ Attributes:
36
+ provider (TTSProviderName): The TTS provider that generated the audio.
37
+ audio (str): The relative file path to the audio file produced by the TTS provider.
38
+ generation_id (str): The unique identifier for this TTS generation.
39
+ """
40
+
41
+ provider: TTSProviderName
42
+ audio: str
43
+ generation_id: str
44
+
45
+
46
  class VotingResults(TypedDict):
47
  """Voting results data structure representing values we want to persist to the votes DB"""
48
 
src/utils.py CHANGED
@@ -13,13 +13,13 @@ Functions:
13
  # Standard Library Imports
14
  import base64
15
  import os
 
 
16
 
17
  # Local Application Imports
 
18
  from src.config import AUDIO_DIR, logger
19
- from src.constants import (
20
- CHARACTER_DESCRIPTION_MIN_LENGTH,
21
- CHARACTER_DESCRIPTION_MAX_LENGTH,
22
- )
23
 
24
 
25
  def truncate_text(text: str, max_length: int = 50) -> str:
@@ -108,14 +108,14 @@ def validate_character_description_length(character_description: str) -> None:
108
  f"Voice description length being validated: {character_description_length} characters"
109
  )
110
 
111
- if character_description_length < CHARACTER_DESCRIPTION_MIN_LENGTH:
112
  raise ValueError(
113
- f"Your character description is too short. Please enter at least {CHARACTER_DESCRIPTION_MIN_LENGTH} characters. "
114
  f"(Current length: {character_description_length})"
115
  )
116
- if character_description_length > CHARACTER_DESCRIPTION_MAX_LENGTH:
117
  raise ValueError(
118
- f"Your character description is too long. Please limit it to {CHARACTER_DESCRIPTION_MAX_LENGTH} characters. "
119
  f"(Current length: {character_description_length})"
120
  )
121
  logger.debug(
@@ -162,3 +162,102 @@ def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
162
  logger.debug(f"Audio file relative path: {relative_path}")
163
 
164
  return relative_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Standard Library Imports
14
  import base64
15
  import os
16
+ import random
17
+ from typing import Tuple
18
 
19
  # Local Application Imports
20
+ from src import constants
21
  from src.config import AUDIO_DIR, logger
22
+ from src.types import ComparisonType, Option, OptionMap, TTSProviderName
 
 
 
23
 
24
 
25
  def truncate_text(text: str, max_length: int = 50) -> str:
 
108
  f"Voice description length being validated: {character_description_length} characters"
109
  )
110
 
111
+ if character_description_length < constants.CHARACTER_DESCRIPTION_MIN_LENGTH:
112
  raise ValueError(
113
+ f"Your character description is too short. Please enter at least {constants.CHARACTER_DESCRIPTION_MIN_LENGTH} characters. "
114
  f"(Current length: {character_description_length})"
115
  )
116
+ if character_description_length > constants.CHARACTER_DESCRIPTION_MAX_LENGTH:
117
  raise ValueError(
118
+ f"Your character description is too long. Please limit it to {constants.CHARACTER_DESCRIPTION_MAX_LENGTH} characters. "
119
  f"(Current length: {character_description_length})"
120
  )
121
  logger.debug(
 
162
  logger.debug(f"Audio file relative path: {relative_path}")
163
 
164
  return relative_path
165
+
166
+
167
+ def choose_providers(
168
+ text_modified: bool,
169
+ ) -> Tuple[ComparisonType, TTSProviderName, TTSProviderName]:
170
+ """
171
+ Select two TTS providers based on whether the text has been modified.
172
+
173
+ The first provider is always set to "Hume AI". For the second provider, the function
174
+ selects "Hume AI" if the text has been modified; otherwise, it randomly chooses one from
175
+ the TTS_PROVIDERS list.
176
+
177
+ Args:
178
+ text_modified (bool): A flag indicating whether the text has been modified.
179
+ - If True, both providers will be "Hume AI".
180
+ - If False, the second provider is randomly selected from TTS_PROVIDERS.
181
+
182
+ Returns:
183
+ Tuple[TTSProviderName, TTSProviderName]: A tuple containing two TTS provider names,
184
+ where the first is always "Hume AI" and the second is determined by the text_modified
185
+ flag and random selection.
186
+ """
187
+ provider_a = constants.HUME_AI
188
+ provider_b = (
189
+ constants.HUME_AI if text_modified else random.choice(constants.TTS_PROVIDERS)
190
+ )
191
+
192
+ match provider_b:
193
+ case constants.HUME_AI:
194
+ comparison_type = constants.HUME_TO_HUME
195
+ case constants.ELEVENLABS:
196
+ comparison_type = constants.HUME_TO_ELEVENLABS
197
+
198
+ return comparison_type, provider_a, provider_b
199
+
200
+
201
+ def create_shuffled_tts_options(
202
+ provider_a: TTSProviderName,
203
+ audio_a: str,
204
+ generation_id_a: str,
205
+ provider_b: TTSProviderName,
206
+ audio_b: str,
207
+ generation_id_b: str,
208
+ ) -> Tuple[str, str, str, str, OptionMap]:
209
+ """
210
+ Create and shuffle TTS generation options.
211
+
212
+ This function creates two Option instances from the provided TTS details, shuffles them,
213
+ and then extracts the audio file paths and generation IDs from the shuffled options.
214
+ It also returns a mapping from option constants to the corresponding TTS providers.
215
+
216
+ Args:
217
+ provider_a (TTSProviderName): The TTS provider for the first generation.
218
+ audio_a (str): The relative file path to the audio file for the first generation.
219
+ generation_id_a (str): The generation ID for the first generation.
220
+ provider_b (TTSProviderName): The TTS provider for the second generation.
221
+ audio_b (str): The relative file path to the audio file for the second generation.
222
+ generation_id_b (str): The generation ID for the second generation.
223
+
224
+ Returns:
225
+ Tuple[str, str, str, str, OptionMap]:
226
+ A tuple containing:
227
+ - option_a_audio (str): Audio file path for the first shuffled option.
228
+ - option_b_audio (str): Audio file path for the second shuffled option.
229
+ - option_a_generation_id (str): Generation ID for the first shuffled option.
230
+ - option_b_generation_id (str): Generation ID for the second shuffled option.
231
+ - options_map (OptionMap): Mapping from option constants to their TTS providers.
232
+ """
233
+ # Create a list of Option instances for the available providers.
234
+ options = [
235
+ Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a),
236
+ Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b),
237
+ ]
238
+
239
+ # Randomly shuffle the list of options.
240
+ random.shuffle(options)
241
+
242
+ # Unpack the two options.
243
+ option_a, option_b = options
244
+
245
+ # Extract audio file paths and generation IDs.
246
+ option_a_audio = option_a.audio
247
+ option_b_audio = option_b.audio
248
+ option_a_generation_id = option_a.generation_id
249
+ option_b_generation_id = option_b.generation_id
250
+
251
+ # Build a mapping from option constants to the corresponding providers.
252
+ options_map: OptionMap = {
253
+ constants.OPTION_A: option_a.provider,
254
+ constants.OPTION_B: option_b.provider,
255
+ }
256
+
257
+ return (
258
+ option_a_audio,
259
+ option_b_audio,
260
+ option_a_generation_id,
261
+ option_b_generation_id,
262
+ options_map,
263
+ )