Spaces:
Running
Running
File size: 5,328 Bytes
1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import re
import math
MAX_SCRIPT_LENGTH = 10000 # characters
TTS_1_HD_COST_PER_CHAR = 0.00003 # $30 / 1M chars for tts-1-hd and tts-1
GPT_4O_MINI_TTS_COST_PER_SECOND = 0.015 / 60 # $0.015 / minute for gpt-4o-mini-tts
CHARS_PER_SECOND_ESTIMATE = 12 # Average characters spoken per second, for estimation
def parse_dialogue_script(script_text):
"""
Parses a dialogue script into a list of dictionaries, each representing a line.
Input format: "[Speaker] Utterance" per line.
Lines not matching the format are assigned to a "Narrator" speaker.
"""
lines = script_text.strip().split('\n')
parsed_lines = []
total_chars = 0
if len(script_text) > MAX_SCRIPT_LENGTH:
raise ValueError(f"Script is too long. Maximum {MAX_SCRIPT_LENGTH} characters allowed. Your script has {len(script_text)} characters.")
for i, line_content in enumerate(lines):
line_content = line_content.strip()
if not line_content:
continue # Skip empty lines
match = re.match(r'\[(.*?)\]\s*(.*)', line_content)
if match:
speaker, utterance = match.groups()
speaker = speaker.strip()
utterance = utterance.strip()
if not speaker: # If speaker tag is empty like "[] Text"
speaker = "UnknownSpeaker"
else:
# If no speaker tag, assign the whole line as utterance by "Narrator"
speaker = "Narrator"
utterance = line_content # Already stripped
if not utterance: # Skip if utterance is empty after parsing (e.g. "[Speaker]" with no text)
continue
parsed_lines.append({"id": i, "speaker": speaker, "text": utterance})
total_chars += len(utterance)
return parsed_lines, total_chars
def calculate_cost(total_chars, num_lines, model_name="tts-1-hd"):
"""
Calculates the estimated cost for TTS processing.
"""
cost = 0.0
if model_name in ["tts-1", "tts-1-hd"]: # OpenAI charges same for tts-1 and tts-1-hd
cost = total_chars * TTS_1_HD_COST_PER_CHAR
elif model_name == "gpt-4o-mini-tts":
# Estimate duration: total_chars / X chars per second. This is a rough estimate.
# OpenAI pricing for gpt-4o-mini's TTS is by character, similar to tts-1.
# As of latest check, gpt-4o-mini is priced same as tts-1.
# $0.000015 / char ($15.00 / 1M characters)
# Let's update cost for gpt-4o-mini-tts if it differs.
# The prompt says: "# seconds × $0.015 for gpt‑4o‑mini‑tts (0.015 USD / minute)"
# This conflicts with OpenAI's typical character-based TTS pricing.
# Assuming prompt's per-second pricing is the requirement for gpt-4o-mini-tts for this exercise.
if CHARS_PER_SECOND_ESTIMATE <= 0: # Avoid division by zero
estimated_seconds = total_chars / 10.0 # Fallback chars/sec
else:
estimated_seconds = total_chars / CHARS_PER_SECOND_ESTIMATE
cost = estimated_seconds * GPT_4O_MINI_TTS_COST_PER_SECOND
else: # Fallback to character-based costing for any other tts-1 like model
cost = total_chars * TTS_1_HD_COST_PER_CHAR
# raise ValueError(f"Unknown model for cost calculation: {model_name}") # Or assume default if model not matched
return cost
if __name__ == '__main__':
sample_script_1 = """
[Alice] Hello Bob, how are you?
[Bob] I'm fine, Alice. And you?
This is a line without a speaker tag.
[Charlie] Just listening in.
[] This line has an empty speaker tag.
[EmptySpeakerText]
"""
print(f"--- Test Case 1: Mixed Script ---")
parsed, chars = parse_dialogue_script(sample_script_1)
print("Parsed Lines:")
for p_line in parsed:
print(p_line)
print(f"\nTotal Characters for TTS: {chars}")
cost_hd = calculate_cost(chars, len(parsed), "tts-1-hd")
print(f"Estimated cost for tts-1-hd: ${cost_hd:.6f}")
cost_tts1 = calculate_cost(chars, len(parsed), "tts-1")
print(f"Estimated cost for tts-1: ${cost_tts1:.6f}")
# Test cost for gpt-4o-mini-tts using the per-second formula
cost_gpt_mini = calculate_cost(chars, len(parsed), "gpt-4o-mini-tts")
print(f"Estimated cost for gpt-4o-mini-tts (at {CHARS_PER_SECOND_ESTIMATE} chars/sec): ${cost_gpt_mini:.6f}")
print(f"\n--- Test Case 2: Long Script (Boundary Check) ---")
long_script_text = "[SpeakerA] " + "a" * (MAX_SCRIPT_LENGTH - 11) # 11 chars for "[SpeakerA] "
parsed_long, chars_long = parse_dialogue_script(long_script_text)
print(f"Long script (length {len(long_script_text)} chars) parsed successfully. TTS Chars: {chars_long}")
try:
too_long_script = "a" * (MAX_SCRIPT_LENGTH + 1)
parse_dialogue_script(too_long_script)
except ValueError as e:
print(f"Correctly caught error for too long script: {e}")
print(f"\n--- Test Case 3: Empty and Invalid Scripts ---")
parsed_empty, chars_empty = parse_dialogue_script("")
print(f"Empty script: Parsed lines: {len(parsed_empty)}, Chars: {chars_empty}")
parsed_blank_lines, chars_blank_lines = parse_dialogue_script("\n\n[Speaker]\n\n")
print(f"Script with blank/invalid lines: Parsed lines: {len(parsed_blank_lines)}, Chars: {chars_blank_lines} (Result: {parsed_blank_lines})") |