File size: 5,328 Bytes
1190db4
 
 
 
d48101f
 
 
1190db4
 
 
d48101f
1190db4
d48101f
1190db4
 
 
 
 
 
 
 
 
 
 
d48101f
1190db4
 
 
 
d48101f
1190db4
d48101f
 
1190db4
d48101f
 
 
1190db4
d48101f
1190db4
 
d48101f
1190db4
 
 
 
 
 
 
 
d48101f
 
 
1190db4
d48101f
 
 
 
 
 
 
 
 
 
 
 
1190db4
d48101f
 
 
1190db4
 
 
d48101f
1190db4
 
 
 
d48101f
 
1190db4
d48101f
 
1190db4
 
 
d48101f
1190db4
 
 
 
d48101f
 
 
 
1190db4
d48101f
 
 
 
 
 
 
1190db4
 
d48101f
 
1190db4
d48101f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import re
import math

MAX_SCRIPT_LENGTH = 10000  # characters
TTS_1_HD_COST_PER_CHAR = 0.00003  # $30 / 1M chars for tts-1-hd and tts-1
GPT_4O_MINI_TTS_COST_PER_SECOND = 0.015 / 60  # $0.015 / minute for gpt-4o-mini-tts
CHARS_PER_SECOND_ESTIMATE = 12 # Average characters spoken per second, for estimation

def parse_dialogue_script(script_text):
    """
    Parses a dialogue script into a list of dictionaries, each representing a line.
    Input format: "[Speaker] Utterance" per line.
    Lines not matching the format are assigned to a "Narrator" speaker.
    """
    lines = script_text.strip().split('\n')
    parsed_lines = []
    total_chars = 0

    if len(script_text) > MAX_SCRIPT_LENGTH:
        raise ValueError(f"Script is too long. Maximum {MAX_SCRIPT_LENGTH} characters allowed. Your script has {len(script_text)} characters.")

    for i, line_content in enumerate(lines):
        line_content = line_content.strip()
        if not line_content:
            continue # Skip empty lines

        match = re.match(r'\[(.*?)\]\s*(.*)', line_content)
        if match:
            speaker, utterance = match.groups()
            speaker = speaker.strip()
            utterance = utterance.strip()
            if not speaker: # If speaker tag is empty like "[] Text"
                speaker = "UnknownSpeaker"
        else:
            # If no speaker tag, assign the whole line as utterance by "Narrator"
            speaker = "Narrator"
            utterance = line_content # Already stripped
        
        if not utterance: # Skip if utterance is empty after parsing (e.g. "[Speaker]" with no text)
            continue

        parsed_lines.append({"id": i, "speaker": speaker, "text": utterance})
        total_chars += len(utterance)
        
    return parsed_lines, total_chars

def calculate_cost(total_chars, num_lines, model_name="tts-1-hd"):
    """
    Calculates the estimated cost for TTS processing.
    """
    cost = 0.0
    if model_name in ["tts-1", "tts-1-hd"]: # OpenAI charges same for tts-1 and tts-1-hd
        cost = total_chars * TTS_1_HD_COST_PER_CHAR 
    elif model_name == "gpt-4o-mini-tts":
        # Estimate duration: total_chars / X chars per second. This is a rough estimate.
        # OpenAI pricing for gpt-4o-mini's TTS is by character, similar to tts-1.
        # As of latest check, gpt-4o-mini is priced same as tts-1.
        # $0.000015 / char ($15.00 / 1M characters)
        # Let's update cost for gpt-4o-mini-tts if it differs.
        # The prompt says: "# seconds × $0.015   for gpt‑4o‑mini‑tts (0.015 USD / minute)"
        # This conflicts with OpenAI's typical character-based TTS pricing.
        # Assuming prompt's per-second pricing is the requirement for gpt-4o-mini-tts for this exercise.
        if CHARS_PER_SECOND_ESTIMATE <= 0: # Avoid division by zero
            estimated_seconds = total_chars / 10.0 # Fallback chars/sec
        else:
            estimated_seconds = total_chars / CHARS_PER_SECOND_ESTIMATE
        cost = estimated_seconds * GPT_4O_MINI_TTS_COST_PER_SECOND
    else: # Fallback to character-based costing for any other tts-1 like model
        cost = total_chars * TTS_1_HD_COST_PER_CHAR
        # raise ValueError(f"Unknown model for cost calculation: {model_name}") # Or assume default if model not matched
    return cost

if __name__ == '__main__':
    sample_script_1 = """
    [Alice] Hello Bob, how are you?
    [Bob] I'm fine, Alice. And you?
    This is a line without a speaker tag.
    [Charlie] Just listening in.
    [] This line has an empty speaker tag.
    [EmptySpeakerText]
    """
    print(f"--- Test Case 1: Mixed Script ---")
    parsed, chars = parse_dialogue_script(sample_script_1)
    print("Parsed Lines:")
    for p_line in parsed:
        print(p_line)
    print(f"\nTotal Characters for TTS: {chars}")

    cost_hd = calculate_cost(chars, len(parsed), "tts-1-hd")
    print(f"Estimated cost for tts-1-hd: ${cost_hd:.6f}")

    cost_tts1 = calculate_cost(chars, len(parsed), "tts-1")
    print(f"Estimated cost for tts-1: ${cost_tts1:.6f}")
    
    # Test cost for gpt-4o-mini-tts using the per-second formula
    cost_gpt_mini = calculate_cost(chars, len(parsed), "gpt-4o-mini-tts")
    print(f"Estimated cost for gpt-4o-mini-tts (at {CHARS_PER_SECOND_ESTIMATE} chars/sec): ${cost_gpt_mini:.6f}")


    print(f"\n--- Test Case 2: Long Script (Boundary Check) ---")
    long_script_text = "[SpeakerA] " + "a" * (MAX_SCRIPT_LENGTH - 11) # 11 chars for "[SpeakerA] "
    parsed_long, chars_long = parse_dialogue_script(long_script_text)
    print(f"Long script (length {len(long_script_text)} chars) parsed successfully. TTS Chars: {chars_long}")

    try:
        too_long_script = "a" * (MAX_SCRIPT_LENGTH + 1)
        parse_dialogue_script(too_long_script)
    except ValueError as e:
        print(f"Correctly caught error for too long script: {e}")

    print(f"\n--- Test Case 3: Empty and Invalid Scripts ---")
    parsed_empty, chars_empty = parse_dialogue_script("")
    print(f"Empty script: Parsed lines: {len(parsed_empty)}, Chars: {chars_empty}")
    parsed_blank_lines, chars_blank_lines = parse_dialogue_script("\n\n[Speaker]\n\n")
    print(f"Script with blank/invalid lines: Parsed lines: {len(parsed_blank_lines)}, Chars: {chars_blank_lines} (Result: {parsed_blank_lines})")