Spaces:
Running
Running
ver 2
Browse files- .gitignore +1 -1
- README.md +65 -6
- app.py +141 -131
- utils/merge_audio.py +112 -69
- utils/openai_tts.py +139 -83
- utils/script_parser.py +58 -26
.gitignore
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
*.pyc
|
3 |
*.pyo
|
4 |
*.pyd
|
|
|
1 |
+
__pycache__/
|
2 |
*.pyc
|
3 |
*.pyo
|
4 |
*.pyd
|
README.md
CHANGED
@@ -1,12 +1,71 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.29.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Dialogue TTS
|
3 |
+
emoji: 🗣️🎙️
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
---
|
10 |
|
11 |
+
# Dialogue Script to Speech Synthesis
|
12 |
+
|
13 |
+
This Hugging Face Space converts dialogue scripts into speech using OpenAI's TTS models (`tts-1`, `tts-1-hd`, `gpt-4o-mini-tts`).
|
14 |
+
|
15 |
+
## Features
|
16 |
+
|
17 |
+
* **Input Script**: Provide a dialogue script with lines in the format `[Speaker] Utterance`.
|
18 |
+
* **TTS Models**: Choose from `tts-1`, `tts-1-hd`, or `gpt-4o-mini-tts`.
|
19 |
+
* **Voice Configuration**:
|
20 |
+
* **Single Global Voice**: Use one voice for all speakers.
|
21 |
+
* **Random per Speaker**: Assigns a unique random voice to each speaker consistently within a run.
|
22 |
+
* **A/B Round Robin**: Cycles through available voices for each unique speaker.
|
23 |
+
* **Detailed Per-Speaker UI**: Configure voice, speed (for `tts-1/hd`), and emotional vibe/custom instructions (for `gpt-4o-mini-tts`) for each speaker individually.
|
24 |
+
* **Output**:
|
25 |
+
* A ZIP file containing individual MP3s for each line.
|
26 |
+
* A single merged MP3 of the entire dialogue with custom pauses.
|
27 |
+
* **Cost Estimation**: Displays an estimated cost before generating audio.
|
28 |
+
* **NSFW Check**: Optional content safety check using an external API (if `NSFW_API_URL_TEMPLATE` is configured).
|
29 |
+
|
30 |
+
## How to Use
|
31 |
+
|
32 |
+
1. **Enter your dialogue script** in the text area.
|
33 |
+
Example:
|
34 |
+
```
|
35 |
+
[Alice] Hello Bob, how are you today?
|
36 |
+
[Bob] I'm doing great, Alice! Thanks for asking.
|
37 |
+
[Narrator] And so their conversation began.
|
38 |
+
```
|
39 |
+
2. **Select the TTS Model**.
|
40 |
+
3. **Set the pause duration** (in milliseconds) between lines for the merged audio.
|
41 |
+
4. **Choose a Speaker Configuration Method**:
|
42 |
+
* If "Single Voice (Global)", select the voice.
|
43 |
+
* If "Detailed Configuration...", click "Load/Refresh Per-Speaker Settings UI" and adjust settings for each speaker.
|
44 |
+
* Other methods will apply voices automatically.
|
45 |
+
5. (Optional) Adjust **Global Speed** or **Global Instructions** if applicable to your chosen model and configuration.
|
46 |
+
6. Click **"Calculate Cost"** to see an estimate.
|
47 |
+
7. Click **"Generate Audio"**.
|
48 |
+
8. Download the ZIP file or listen to/download the merged MP3.
|
49 |
+
|
50 |
+
## Secrets
|
51 |
+
|
52 |
+
This Space requires the following secrets to be set in the Hugging Face Space settings:
|
53 |
+
|
54 |
+
* `OPENAI_API_KEY`: Your OpenAI API key.
|
55 |
+
* `NSFW_API_URL_TEMPLATE` (Optional): URL template for NSFW checking, e.g., `https://api.example.com/check?text={text}`. The placeholder `{text}` will be URL-encoded.
|
56 |
+
* `MODEL_DEFAULT` (Optional): Default TTS model (e.g., `tts-1-hd`).
|
57 |
+
|
58 |
+
## Smoke Test Script
|
59 |
+
|
60 |
+
Use the following script to test basic functionality:
|
61 |
+
[Gandalf] You shall not pass!
|
62 |
+
[Frodo] I will take the Ring to Mordor.
|
63 |
+
[Gandalf] So be it.
|
64 |
+
|
65 |
+
Choose your desired model and settings (e.g., "Random per Speaker"), then generate.
|
66 |
+
|
67 |
+
## Deployment
|
68 |
+
|
69 |
+
This application is designed to be deployed as a Hugging Face Space.
|
70 |
+
Ensure `ffmpeg` is available (handled by `container.yaml` for Classic Spaces).
|
71 |
+
Set the necessary secrets in your Space settings on Hugging Face Hub.
|
app.py
CHANGED
@@ -46,13 +46,13 @@ SPEAKER_CONFIG_METHODS = [
|
|
46 |
"Single Voice (Global)",
|
47 |
"Random per Speaker",
|
48 |
"A/B Round Robin",
|
49 |
-
"Detailed Configuration (Per Speaker UI)"
|
50 |
]
|
51 |
DEFAULT_SPEAKER_CONFIG_METHOD = "Random per Speaker"
|
52 |
-
APP_AVAILABLE_VOICES = ALL_TTS_VOICES.copy()
|
53 |
|
54 |
PREDEFINED_VIBES = {
|
55 |
-
"None": "",
|
56 |
"Calm": "Speak in a calm, composed, and relaxed manner.",
|
57 |
"Excited": "Speak with an energetic, enthusiastic, and lively tone.",
|
58 |
"Happy": "Speak with a cheerful, bright, and joyful voice.",
|
@@ -63,7 +63,7 @@ PREDEFINED_VIBES = {
|
|
63 |
"Formal": "Speak in a clear, precise, and professional tone, suitable for a formal address.",
|
64 |
"Authoritative": "Speak with a commanding, confident, and firm voice.",
|
65 |
"Friendly": "Speak in a warm, approachable, and amiable manner.",
|
66 |
-
"Custom...": "CUSTOM"
|
67 |
}
|
68 |
VIBE_CHOICES = list(PREDEFINED_VIBES.keys())
|
69 |
DEFAULT_VIBE = "None"
|
@@ -72,45 +72,32 @@ def get_speakers_from_script(script_text):
|
|
72 |
if not script_text.strip(): return []
|
73 |
try:
|
74 |
parsed_lines, _ = parse_dialogue_script(script_text)
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
except ValueError: return []
|
77 |
|
78 |
|
79 |
def handle_dynamic_input_change(new_value, current_configs_state_dict, speaker_name, config_key, tts_model):
|
80 |
-
"""
|
81 |
-
Updates the gr.State dictionary when a dynamic UI element changes.
|
82 |
-
current_configs_state_dict is the raw dictionary from gr.State.
|
83 |
-
"""
|
84 |
if speaker_name not in current_configs_state_dict:
|
85 |
current_configs_state_dict[speaker_name] = {}
|
86 |
|
87 |
current_configs_state_dict[speaker_name][config_key] = new_value
|
88 |
-
|
89 |
-
# Special handling for Vibe -> Custom Instructions visibility (Simpler: custom textbox always visible)
|
90 |
-
# For this iteration, custom textbox is always visible. Backend decides to use it.
|
91 |
-
|
92 |
-
# Determine visibility/interactivity of speed slider for this specific speaker's UI (if we were to update it directly)
|
93 |
-
# This is complex to do from a generic handler. Better to set initial visibility in load_refresh_per_speaker_ui.
|
94 |
-
# Global tts_model_dropdown change will refresh the whole dynamic UI if needed for speed/instr applicability.
|
95 |
-
|
96 |
return current_configs_state_dict
|
97 |
|
98 |
|
99 |
def load_refresh_per_speaker_ui(script_text, current_configs_state_dict, tts_model):
|
100 |
-
"""
|
101 |
-
Generates the dynamic UI components (accordions) for each speaker.
|
102 |
-
Returns a list of Gradio components and the updated state.
|
103 |
-
"""
|
104 |
unique_speakers = get_speakers_from_script(script_text)
|
105 |
new_ui_components = []
|
106 |
|
107 |
-
# Ensure state dict is not None (Gradio might pass None initially for gr.State)
|
108 |
if current_configs_state_dict is None:
|
109 |
current_configs_state_dict = {}
|
110 |
|
111 |
-
# Update state for any new speakers or remove speakers no longer in script
|
112 |
-
# (Optional: more complex logic could be to remove speakers from state if not in script)
|
113 |
-
# For now, just add new ones with defaults if not present.
|
114 |
for speaker_name in unique_speakers:
|
115 |
if speaker_name not in current_configs_state_dict:
|
116 |
current_configs_state_dict[speaker_name] = {
|
@@ -119,7 +106,6 @@ def load_refresh_per_speaker_ui(script_text, current_configs_state_dict, tts_mod
|
|
119 |
"vibe": DEFAULT_VIBE,
|
120 |
"custom_instructions": ""
|
121 |
}
|
122 |
-
# Ensure all keys exist for existing speakers (e.g., if new fields added)
|
123 |
current_configs_state_dict[speaker_name].setdefault("voice", APP_AVAILABLE_VOICES[0])
|
124 |
current_configs_state_dict[speaker_name].setdefault("speed", 1.0)
|
125 |
current_configs_state_dict[speaker_name].setdefault("vibe", DEFAULT_VIBE)
|
@@ -128,42 +114,37 @@ def load_refresh_per_speaker_ui(script_text, current_configs_state_dict, tts_mod
|
|
128 |
|
129 |
if not unique_speakers:
|
130 |
new_ui_components.append(gr.Markdown("No speakers detected in the script, or script is empty. Type a script and click 'Load/Refresh' again."))
|
131 |
-
# Return current (possibly empty) state and the markdown message
|
132 |
return new_ui_components, current_configs_state_dict
|
133 |
|
134 |
|
135 |
for speaker_name in unique_speakers:
|
136 |
-
speaker_cfg = current_configs_state_dict[speaker_name]
|
137 |
|
138 |
-
# Determine if speed/instructions are applicable for the current global TTS model
|
139 |
speed_interactive = tts_model in ["tts-1", "tts-1-hd"]
|
140 |
-
instructions_relevant = tts_model == "gpt-4o-mini-tts"
|
141 |
|
142 |
with gr.Accordion(label=f"Settings for: {speaker_name}", open=False) as speaker_accordion:
|
143 |
-
# Voice Dropdown
|
144 |
voice_dd = gr.Dropdown(
|
145 |
label="Voice", choices=APP_AVAILABLE_VOICES, value=speaker_cfg["voice"], interactive=True
|
146 |
)
|
147 |
voice_dd.change(
|
148 |
fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="voice", tts_model=tts_model),
|
149 |
-
inputs=[voice_dd, speaker_configs_state],
|
150 |
outputs=[speaker_configs_state]
|
151 |
)
|
152 |
|
153 |
-
# Speed Slider
|
154 |
speed_slider_label = "Speech Speed" + (" (Active for tts-1/hd)" if speed_interactive else " (N/A for this model)")
|
155 |
speed_slider = gr.Slider(
|
156 |
label=speed_slider_label, minimum=0.25, maximum=4.0, value=speaker_cfg["speed"],
|
157 |
step=0.05, interactive=speed_interactive
|
158 |
)
|
159 |
-
if speed_interactive:
|
160 |
-
speed_slider.release(
|
161 |
fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="speed", tts_model=tts_model),
|
162 |
inputs=[speed_slider, speaker_configs_state],
|
163 |
outputs=[speaker_configs_state]
|
164 |
)
|
165 |
|
166 |
-
# Vibe Dropdown
|
167 |
vibe_label = "Vibe/Emotion Preset" + (" (For gpt-4o-mini-tts)" if instructions_relevant else " (Less impact on other models)")
|
168 |
vibe_dd = gr.Dropdown(
|
169 |
label=vibe_label, choices=VIBE_CHOICES, value=speaker_cfg["vibe"], interactive=True
|
@@ -174,16 +155,15 @@ def load_refresh_per_speaker_ui(script_text, current_configs_state_dict, tts_mod
|
|
174 |
outputs=[speaker_configs_state]
|
175 |
)
|
176 |
|
177 |
-
# Custom Instructions Textbox
|
178 |
custom_instr_label = "Custom Instructions"
|
179 |
custom_instr_placeholder = "Only used if Vibe is 'Custom...'. Overrides Vibe."
|
180 |
custom_instr_tb = gr.Textbox(
|
181 |
label=custom_instr_label,
|
182 |
value=speaker_cfg["custom_instructions"],
|
183 |
placeholder=custom_instr_placeholder,
|
184 |
-
lines=2, interactive=True
|
185 |
)
|
186 |
-
custom_instr_tb.input(
|
187 |
fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="custom_instructions", tts_model=tts_model),
|
188 |
inputs=[custom_instr_tb, speaker_configs_state],
|
189 |
outputs=[speaker_configs_state]
|
@@ -196,7 +176,6 @@ def load_refresh_per_speaker_ui(script_text, current_configs_state_dict, tts_mod
|
|
196 |
async def handle_script_processing(
|
197 |
dialogue_script: str, tts_model: str, pause_ms: int,
|
198 |
speaker_config_method: str, global_voice_selection: str,
|
199 |
-
# No more df_value, instead we use speaker_configs_state_dict from gr.State
|
200 |
speaker_configs_state_dict: dict,
|
201 |
global_speed: float,
|
202 |
global_instructions: str, progress=gr.Progress(track_tqdm=True)):
|
@@ -204,65 +183,65 @@ async def handle_script_processing(
|
|
204 |
if not OPENAI_API_KEY or not async_openai_client: return None, None, "Error: OPENAI_API_KEY missing."
|
205 |
if not dialogue_script.strip(): return None, None, "Error: Script empty."
|
206 |
|
207 |
-
|
|
|
208 |
if os.path.exists(job_audio_path_prefix): shutil.rmtree(job_audio_path_prefix)
|
209 |
os.makedirs(job_audio_path_prefix, exist_ok=True)
|
210 |
|
211 |
try:
|
212 |
parsed_lines, _ = parse_dialogue_script(dialogue_script)
|
213 |
-
if not parsed_lines:
|
214 |
-
|
|
|
|
|
|
|
|
|
215 |
|
216 |
-
# Ensure state dict is usable
|
217 |
if speaker_configs_state_dict is None: speaker_configs_state_dict = {}
|
218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
tasks, line_audio_files = [], [None] * len(parsed_lines)
|
220 |
for i, line_data in enumerate(parsed_lines):
|
221 |
speaker_name = line_data["speaker"]
|
222 |
|
223 |
-
|
224 |
-
line_voice = global_voice_selection
|
225 |
line_speed = global_speed
|
226 |
line_instructions = global_instructions if global_instructions and global_instructions.strip() else None
|
227 |
|
228 |
if speaker_config_method == "Detailed Configuration (Per Speaker UI)":
|
229 |
spk_cfg = speaker_configs_state_dict.get(speaker_name, {})
|
230 |
-
line_voice = spk_cfg.get("voice", global_voice_selection)
|
231 |
-
|
232 |
-
# Speed: per-speaker if tts-1/hd and set, else global if tts-1/hd, else API default
|
233 |
if tts_model in ["tts-1", "tts-1-hd"]:
|
234 |
line_speed = spk_cfg.get("speed", global_speed)
|
235 |
-
|
236 |
-
# Instructions: primarily for gpt-4o-mini-tts
|
237 |
if tts_model == "gpt-4o-mini-tts":
|
238 |
vibe = spk_cfg.get("vibe", DEFAULT_VIBE)
|
239 |
custom_instr = spk_cfg.get("custom_instructions", "").strip()
|
240 |
-
if vibe == "Custom..." and custom_instr:
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
line_instructions = global_instructions
|
247 |
-
elif not line_instructions : # Ensure it's None if truly no instruction
|
248 |
-
line_instructions = None
|
249 |
-
|
250 |
-
|
251 |
-
elif speaker_config_method == "Random per Speaker":
|
252 |
-
# Simplified: assign random now, could be cached as before for consistency within run
|
253 |
-
line_voice = random.choice(APP_AVAILABLE_VOICES)
|
254 |
-
elif speaker_config_method == "A/B Round Robin":
|
255 |
-
# Simplified: assign A/B now
|
256 |
-
unique_script_speakers = get_speakers_from_script(dialogue_script) # Re-get for this logic
|
257 |
-
speaker_idx = unique_script_speakers.index(speaker_name) if speaker_name in unique_script_speakers else 0
|
258 |
-
line_voice = APP_AVAILABLE_VOICES[speaker_idx % len(APP_AVAILABLE_VOICES)]
|
259 |
|
260 |
-
|
261 |
-
if tts_model not in ["tts-1", "tts-1-hd"]:
|
262 |
-
line_speed = 1.0 # API default, won't be sent
|
263 |
|
264 |
out_fn = os.path.join(job_audio_path_prefix, f"line_{line_data['id']}.mp3")
|
265 |
-
progress(i / len(parsed_lines), desc=f"Line {i+1}/{len(parsed_lines)} ({speaker_name})")
|
266 |
tasks.append(synthesize_speech_line(
|
267 |
client=async_openai_client, text=line_data["text"], voice=line_voice,
|
268 |
output_path=out_fn, model=tts_model, speed=line_speed,
|
@@ -271,102 +250,125 @@ async def handle_script_processing(
|
|
271 |
|
272 |
results = await asyncio.gather(*tasks, return_exceptions=True)
|
273 |
for idx, res in enumerate(results):
|
274 |
-
if isinstance(res, Exception): print(f"Error line {parsed_lines[idx]['id']}: {res}")
|
275 |
-
elif res is None: print(f"Skipped
|
276 |
-
else: line_audio_files[idx] = res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
281 |
|
282 |
zip_fn = os.path.join(job_audio_path_prefix, "dialogue_lines.zip")
|
283 |
-
with zipfile.ZipFile(zip_fn, 'w') as zf:
|
|
|
|
|
284 |
|
285 |
merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
|
286 |
-
|
|
|
|
|
|
|
287 |
|
288 |
-
status = f"{len(
|
289 |
-
if len(
|
290 |
-
if not merged_path and len(
|
291 |
-
elif not merged_path: status
|
292 |
-
else: status += "
|
293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
return (zip_fn if os.path.exists(zip_fn) else None,
|
295 |
merged_path if merged_path and os.path.exists(merged_path) else None,
|
296 |
status)
|
297 |
|
298 |
|
299 |
def handle_calculate_cost(dialogue_script: str, tts_model: str):
|
300 |
-
|
301 |
-
if not dialogue_script.strip(): return "Cost: $0.00 (Empty)"
|
302 |
try:
|
303 |
parsed, chars = parse_dialogue_script(dialogue_script)
|
304 |
-
if not parsed: return "Cost: $0.00 (No lines)"
|
305 |
cost = calculate_cost(chars, len(parsed), tts_model)
|
306 |
-
|
307 |
-
|
|
|
|
|
|
|
|
|
308 |
|
309 |
|
310 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
311 |
-
gr.Markdown("# Dialogue Script to Speech (
|
312 |
if not OPENAI_API_KEY or not async_openai_client:
|
313 |
-
gr.Markdown("<h3 style='color:red;'
|
314 |
|
315 |
-
# State to hold detailed speaker configurations
|
316 |
speaker_configs_state = gr.State({})
|
317 |
|
318 |
with gr.Row():
|
319 |
with gr.Column(scale=2):
|
320 |
-
script_input = gr.TextArea(label="Dialogue Script", placeholder="[
|
321 |
with gr.Column(scale=1):
|
322 |
tts_model_dropdown = gr.Dropdown(TTS_MODELS_AVAILABLE, label="TTS Model", value=MODEL_DEFAULT)
|
323 |
-
pause_input = gr.Number(label="Pause (ms)", value=500, minimum=0, maximum=5000, step=50)
|
324 |
-
global_speed_input = gr.Slider(minimum=0.25, maximum=4.0, value=1.0, step=0.05, label="Global Speed", visible=(MODEL_DEFAULT in ["tts-1", "tts-1-hd"]), interactive=True)
|
325 |
-
global_instructions_input = gr.Textbox(label="Global Instructions", placeholder="e.g., Speak
|
326 |
|
327 |
-
gr.Markdown("### Speaker Configuration
|
328 |
speaker_config_method_dropdown = gr.Dropdown(
|
329 |
-
SPEAKER_CONFIG_METHODS, label="Method", value=DEFAULT_SPEAKER_CONFIG_METHOD
|
330 |
)
|
331 |
|
332 |
-
# UI for "Single Voice (Global)"
|
333 |
with gr.Group(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Single Voice (Global)")) as single_voice_group:
|
334 |
global_voice_dropdown = gr.Dropdown(
|
335 |
-
APP_AVAILABLE_VOICES, label="Global Voice", value=APP_AVAILABLE_VOICES[0], interactive=True
|
336 |
)
|
337 |
|
338 |
-
# UI for "Detailed Configuration (Per Speaker UI)"
|
339 |
with gr.Column(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Detailed Configuration (Per Speaker UI)")) as detailed_per_speaker_ui_group:
|
340 |
load_per_speaker_ui_button = gr.Button("Load/Refresh Per-Speaker Settings UI (from Script Above)")
|
341 |
-
gr.Markdown("<small>Click button above to populate settings for each speaker found in the script. Settings are per-speaker.</small>")
|
342 |
-
# This column will be populated by the output of load_per_speaker_ui_button
|
343 |
dynamic_speaker_ui_area = gr.Column(elem_id="dynamic_ui_area_for_speakers")
|
344 |
|
345 |
|
346 |
with gr.Row():
|
347 |
-
calculate_cost_button = gr.Button("Calculate Cost")
|
348 |
generate_button = gr.Button("Generate Audio", variant="primary")
|
349 |
|
350 |
cost_output = gr.Textbox(label="Estimated Cost", interactive=False)
|
351 |
with gr.Row():
|
352 |
-
individual_lines_zip_output = gr.File(label="Download ZIP")
|
353 |
-
merged_dialogue_mp3_output = gr.Audio(label="Merged MP3", type="filepath")
|
354 |
-
status_output = gr.Textbox(label="Status", interactive=False, lines=
|
355 |
|
356 |
-
# --- Event Handlers ---
|
357 |
def update_model_controls_visibility(selected_model, script_text_for_refresh, current_speaker_configs_for_refresh):
|
358 |
-
|
359 |
-
|
360 |
-
|
|
|
361 |
|
362 |
-
|
363 |
-
|
|
|
364 |
return {
|
365 |
-
global_speed_input: gr.update(visible=
|
366 |
-
global_instructions_input: gr.update(visible=
|
367 |
-
dynamic_speaker_ui_area:
|
368 |
speaker_configs_state: updated_state
|
369 |
}
|
|
|
370 |
tts_model_dropdown.change(
|
371 |
fn=update_model_controls_visibility,
|
372 |
inputs=[tts_model_dropdown, script_input, speaker_configs_state],
|
@@ -376,7 +378,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
376 |
def update_speaker_config_method_visibility(method):
|
377 |
is_single = (method == "Single Voice (Global)")
|
378 |
is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
|
379 |
-
# Add more if other methods exist...
|
380 |
return {
|
381 |
single_voice_group: gr.update(visible=is_single),
|
382 |
detailed_per_speaker_ui_group: gr.update(visible=is_detailed_per_speaker),
|
@@ -390,40 +391,49 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
390 |
load_per_speaker_ui_button.click(
|
391 |
fn=load_refresh_per_speaker_ui,
|
392 |
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
|
393 |
-
# Output the list of components to the column, and the updated state to the state component
|
394 |
outputs=[dynamic_speaker_ui_area, speaker_configs_state]
|
395 |
)
|
396 |
|
397 |
calculate_cost_button.click(fn=handle_calculate_cost, inputs=[script_input, tts_model_dropdown], outputs=[cost_output])
|
398 |
|
399 |
-
# Generate button now takes speaker_configs_state as input
|
400 |
generate_button.click(
|
401 |
fn=handle_script_processing,
|
402 |
inputs=[
|
403 |
script_input, tts_model_dropdown, pause_input,
|
404 |
speaker_config_method_dropdown, global_voice_dropdown,
|
405 |
-
speaker_configs_state,
|
406 |
global_speed_input, global_instructions_input
|
407 |
],
|
408 |
outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output])
|
409 |
|
410 |
-
gr.Markdown("##
|
|
|
|
|
|
|
411 |
gr.Examples(
|
412 |
examples=[
|
413 |
-
[
|
414 |
-
[
|
|
|
415 |
],
|
416 |
-
#
|
417 |
-
#
|
418 |
inputs=[
|
419 |
script_input, tts_model_dropdown, pause_input,
|
420 |
speaker_config_method_dropdown, global_voice_dropdown,
|
421 |
speaker_configs_state,
|
422 |
global_speed_input, global_instructions_input
|
423 |
],
|
|
|
|
|
|
|
424 |
outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output],
|
425 |
-
fn=handle_script_processing,
|
|
|
|
|
426 |
|
427 |
if __name__ == "__main__":
|
428 |
-
if
|
429 |
-
|
|
|
|
|
|
46 |
"Single Voice (Global)",
|
47 |
"Random per Speaker",
|
48 |
"A/B Round Robin",
|
49 |
+
"Detailed Configuration (Per Speaker UI)"
|
50 |
]
|
51 |
DEFAULT_SPEAKER_CONFIG_METHOD = "Random per Speaker"
|
52 |
+
APP_AVAILABLE_VOICES = ALL_TTS_VOICES.copy() # Uses the extended list from openai_tts.py
|
53 |
|
54 |
PREDEFINED_VIBES = {
|
55 |
+
"None": "",
|
56 |
"Calm": "Speak in a calm, composed, and relaxed manner.",
|
57 |
"Excited": "Speak with an energetic, enthusiastic, and lively tone.",
|
58 |
"Happy": "Speak with a cheerful, bright, and joyful voice.",
|
|
|
63 |
"Formal": "Speak in a clear, precise, and professional tone, suitable for a formal address.",
|
64 |
"Authoritative": "Speak with a commanding, confident, and firm voice.",
|
65 |
"Friendly": "Speak in a warm, approachable, and amiable manner.",
|
66 |
+
"Custom...": "CUSTOM"
|
67 |
}
|
68 |
VIBE_CHOICES = list(PREDEFINED_VIBES.keys())
|
69 |
DEFAULT_VIBE = "None"
|
|
|
72 |
if not script_text.strip(): return []
|
73 |
try:
|
74 |
parsed_lines, _ = parse_dialogue_script(script_text)
|
75 |
+
# Return unique speakers in order of appearance (though order doesn't strictly matter for this use)
|
76 |
+
seen_speakers = set()
|
77 |
+
ordered_unique_speakers = []
|
78 |
+
for p in parsed_lines:
|
79 |
+
if p["speaker"] not in seen_speakers:
|
80 |
+
ordered_unique_speakers.append(p["speaker"])
|
81 |
+
seen_speakers.add(p["speaker"])
|
82 |
+
return ordered_unique_speakers
|
83 |
except ValueError: return []
|
84 |
|
85 |
|
86 |
def handle_dynamic_input_change(new_value, current_configs_state_dict, speaker_name, config_key, tts_model):
|
|
|
|
|
|
|
|
|
87 |
if speaker_name not in current_configs_state_dict:
|
88 |
current_configs_state_dict[speaker_name] = {}
|
89 |
|
90 |
current_configs_state_dict[speaker_name][config_key] = new_value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
return current_configs_state_dict
|
92 |
|
93 |
|
94 |
def load_refresh_per_speaker_ui(script_text, current_configs_state_dict, tts_model):
|
|
|
|
|
|
|
|
|
95 |
unique_speakers = get_speakers_from_script(script_text)
|
96 |
new_ui_components = []
|
97 |
|
|
|
98 |
if current_configs_state_dict is None:
|
99 |
current_configs_state_dict = {}
|
100 |
|
|
|
|
|
|
|
101 |
for speaker_name in unique_speakers:
|
102 |
if speaker_name not in current_configs_state_dict:
|
103 |
current_configs_state_dict[speaker_name] = {
|
|
|
106 |
"vibe": DEFAULT_VIBE,
|
107 |
"custom_instructions": ""
|
108 |
}
|
|
|
109 |
current_configs_state_dict[speaker_name].setdefault("voice", APP_AVAILABLE_VOICES[0])
|
110 |
current_configs_state_dict[speaker_name].setdefault("speed", 1.0)
|
111 |
current_configs_state_dict[speaker_name].setdefault("vibe", DEFAULT_VIBE)
|
|
|
114 |
|
115 |
if not unique_speakers:
|
116 |
new_ui_components.append(gr.Markdown("No speakers detected in the script, or script is empty. Type a script and click 'Load/Refresh' again."))
|
|
|
117 |
return new_ui_components, current_configs_state_dict
|
118 |
|
119 |
|
120 |
for speaker_name in unique_speakers:
|
121 |
+
speaker_cfg = current_configs_state_dict[speaker_name]
|
122 |
|
|
|
123 |
speed_interactive = tts_model in ["tts-1", "tts-1-hd"]
|
124 |
+
instructions_relevant = tts_model == "gpt-4o-mini-tts"
|
125 |
|
126 |
with gr.Accordion(label=f"Settings for: {speaker_name}", open=False) as speaker_accordion:
|
|
|
127 |
voice_dd = gr.Dropdown(
|
128 |
label="Voice", choices=APP_AVAILABLE_VOICES, value=speaker_cfg["voice"], interactive=True
|
129 |
)
|
130 |
voice_dd.change(
|
131 |
fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="voice", tts_model=tts_model),
|
132 |
+
inputs=[voice_dd, speaker_configs_state],
|
133 |
outputs=[speaker_configs_state]
|
134 |
)
|
135 |
|
|
|
136 |
speed_slider_label = "Speech Speed" + (" (Active for tts-1/hd)" if speed_interactive else " (N/A for this model)")
|
137 |
speed_slider = gr.Slider(
|
138 |
label=speed_slider_label, minimum=0.25, maximum=4.0, value=speaker_cfg["speed"],
|
139 |
step=0.05, interactive=speed_interactive
|
140 |
)
|
141 |
+
if speed_interactive:
|
142 |
+
speed_slider.release(
|
143 |
fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="speed", tts_model=tts_model),
|
144 |
inputs=[speed_slider, speaker_configs_state],
|
145 |
outputs=[speaker_configs_state]
|
146 |
)
|
147 |
|
|
|
148 |
vibe_label = "Vibe/Emotion Preset" + (" (For gpt-4o-mini-tts)" if instructions_relevant else " (Less impact on other models)")
|
149 |
vibe_dd = gr.Dropdown(
|
150 |
label=vibe_label, choices=VIBE_CHOICES, value=speaker_cfg["vibe"], interactive=True
|
|
|
155 |
outputs=[speaker_configs_state]
|
156 |
)
|
157 |
|
|
|
158 |
custom_instr_label = "Custom Instructions"
|
159 |
custom_instr_placeholder = "Only used if Vibe is 'Custom...'. Overrides Vibe."
|
160 |
custom_instr_tb = gr.Textbox(
|
161 |
label=custom_instr_label,
|
162 |
value=speaker_cfg["custom_instructions"],
|
163 |
placeholder=custom_instr_placeholder,
|
164 |
+
lines=2, interactive=True
|
165 |
)
|
166 |
+
custom_instr_tb.input(
|
167 |
fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="custom_instructions", tts_model=tts_model),
|
168 |
inputs=[custom_instr_tb, speaker_configs_state],
|
169 |
outputs=[speaker_configs_state]
|
|
|
176 |
async def handle_script_processing(
|
177 |
dialogue_script: str, tts_model: str, pause_ms: int,
|
178 |
speaker_config_method: str, global_voice_selection: str,
|
|
|
179 |
speaker_configs_state_dict: dict,
|
180 |
global_speed: float,
|
181 |
global_instructions: str, progress=gr.Progress(track_tqdm=True)):
|
|
|
183 |
if not OPENAI_API_KEY or not async_openai_client: return None, None, "Error: OPENAI_API_KEY missing."
|
184 |
if not dialogue_script.strip(): return None, None, "Error: Script empty."
|
185 |
|
186 |
+
# Create a job-specific temporary directory and ensure it's clean
|
187 |
+
job_audio_path_prefix = os.path.join(tempfile.gettempdir(), f"dialogue_tts_job_{random.randint(10000, 99999)}")
|
188 |
if os.path.exists(job_audio_path_prefix): shutil.rmtree(job_audio_path_prefix)
|
189 |
os.makedirs(job_audio_path_prefix, exist_ok=True)
|
190 |
|
191 |
try:
|
192 |
parsed_lines, _ = parse_dialogue_script(dialogue_script)
|
193 |
+
if not parsed_lines:
|
194 |
+
shutil.rmtree(job_audio_path_prefix)
|
195 |
+
return None, None, "Error: No valid lines found in script."
|
196 |
+
except ValueError as e:
|
197 |
+
shutil.rmtree(job_audio_path_prefix)
|
198 |
+
return None, None, f"Script parsing error: {str(e)}"
|
199 |
|
|
|
200 |
if speaker_configs_state_dict is None: speaker_configs_state_dict = {}
|
201 |
|
202 |
+
# --- Voice assignment map for Random and A/B per Speaker ---
|
203 |
+
speaker_voice_map = {}
|
204 |
+
if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
|
205 |
+
unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
|
206 |
+
if speaker_config_method == "Random per Speaker":
|
207 |
+
for spk_name in unique_script_speakers_for_map:
|
208 |
+
speaker_voice_map[spk_name] = random.choice(APP_AVAILABLE_VOICES)
|
209 |
+
elif speaker_config_method == "A/B Round Robin":
|
210 |
+
for i, spk_name in enumerate(unique_script_speakers_for_map):
|
211 |
+
# Ensure APP_AVAILABLE_VOICES is not empty to prevent modulo by zero
|
212 |
+
if APP_AVAILABLE_VOICES:
|
213 |
+
speaker_voice_map[spk_name] = APP_AVAILABLE_VOICES[i % len(APP_AVAILABLE_VOICES)]
|
214 |
+
else: # Fallback if voice list is somehow empty
|
215 |
+
speaker_voice_map[spk_name] = "alloy" # Default OpenAI voice
|
216 |
+
# --- End voice assignment map ---
|
217 |
+
|
218 |
tasks, line_audio_files = [], [None] * len(parsed_lines)
|
219 |
for i, line_data in enumerate(parsed_lines):
|
220 |
speaker_name = line_data["speaker"]
|
221 |
|
222 |
+
line_voice = global_voice_selection # Default for "Single Voice (Global)" or fallback
|
|
|
223 |
line_speed = global_speed
|
224 |
line_instructions = global_instructions if global_instructions and global_instructions.strip() else None
|
225 |
|
226 |
if speaker_config_method == "Detailed Configuration (Per Speaker UI)":
|
227 |
spk_cfg = speaker_configs_state_dict.get(speaker_name, {})
|
228 |
+
line_voice = spk_cfg.get("voice", global_voice_selection)
|
|
|
|
|
229 |
if tts_model in ["tts-1", "tts-1-hd"]:
|
230 |
line_speed = spk_cfg.get("speed", global_speed)
|
|
|
|
|
231 |
if tts_model == "gpt-4o-mini-tts":
|
232 |
vibe = spk_cfg.get("vibe", DEFAULT_VIBE)
|
233 |
custom_instr = spk_cfg.get("custom_instructions", "").strip()
|
234 |
+
if vibe == "Custom..." and custom_instr: line_instructions = custom_instr
|
235 |
+
elif vibe != "None" and vibe != "Custom...": line_instructions = PREDEFINED_VIBES.get(vibe, "")
|
236 |
+
if not line_instructions and global_instructions and global_instructions.strip(): line_instructions = global_instructions
|
237 |
+
elif not line_instructions : line_instructions = None
|
238 |
+
elif speaker_config_method == "Random per Speaker" or speaker_config_method == "A/B Round Robin":
|
239 |
+
line_voice = speaker_voice_map.get(speaker_name, global_voice_selection) # Use mapped voice
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
+
if tts_model not in ["tts-1", "tts-1-hd"]: line_speed = 1.0
|
|
|
|
|
242 |
|
243 |
out_fn = os.path.join(job_audio_path_prefix, f"line_{line_data['id']}.mp3")
|
244 |
+
progress(i / len(parsed_lines), desc=f"Synthesizing: Line {i+1}/{len(parsed_lines)} ({speaker_name})")
|
245 |
tasks.append(synthesize_speech_line(
|
246 |
client=async_openai_client, text=line_data["text"], voice=line_voice,
|
247 |
output_path=out_fn, model=tts_model, speed=line_speed,
|
|
|
250 |
|
251 |
results = await asyncio.gather(*tasks, return_exceptions=True)
|
252 |
for idx, res in enumerate(results):
|
253 |
+
if isinstance(res, Exception): print(f"Error synthesizing line {parsed_lines[idx]['id']}: {res}")
|
254 |
+
elif res is None: print(f"Skipped or failed synthesizing line {parsed_lines[idx]['id']}")
|
255 |
+
else: line_audio_files[parsed_lines[idx]['id']] = res # Store by original line ID if non-sequential
|
256 |
+
|
257 |
+
# Filter for valid, existing files, using the original parsed_lines order for merge
|
258 |
+
files_for_merge = []
|
259 |
+
for p_line in parsed_lines:
|
260 |
+
file_path = line_audio_files[p_line['id']]
|
261 |
+
if file_path and os.path.exists(file_path) and os.path.getsize(file_path) > 0:
|
262 |
+
files_for_merge.append(file_path)
|
263 |
+
else:
|
264 |
+
files_for_merge.append(None) # Keep placeholder for correct ordering if a line failed
|
265 |
|
266 |
+
valid_files_for_zip = [f for f in files_for_merge if f]
|
267 |
+
|
268 |
+
if not valid_files_for_zip:
|
269 |
+
shutil.rmtree(job_audio_path_prefix); return None, None, "Error: No audio was successfully synthesized."
|
270 |
|
271 |
zip_fn = os.path.join(job_audio_path_prefix, "dialogue_lines.zip")
|
272 |
+
with zipfile.ZipFile(zip_fn, 'w') as zf:
|
273 |
+
for f_path in valid_files_for_zip:
|
274 |
+
zf.write(f_path, os.path.basename(f_path))
|
275 |
|
276 |
merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
|
277 |
+
# Pass only existing files to merge_mp3_files, maintaining order
|
278 |
+
ordered_files_to_merge = [f for f in files_for_merge if f]
|
279 |
+
merged_path = merge_mp3_files(ordered_files_to_merge, merged_fn, pause_ms)
|
280 |
+
|
281 |
|
282 |
+
status = f"Successfully processed {len(valid_files_for_zip)} out of {len(parsed_lines)} lines. "
|
283 |
+
if len(valid_files_for_zip) < len(parsed_lines): status += "Some lines may have failed. "
|
284 |
+
if not merged_path and len(valid_files_for_zip) > 0: status += "Merging audio failed. "
|
285 |
+
elif not merged_path: status = "No audio to merge." # Overrides previous status if all failed before merge
|
286 |
+
else: status += "Merged audio generated."
|
287 |
|
288 |
+
# Note: job_audio_path_prefix (temp dir) is not explicitly deleted here.
|
289 |
+
# Gradio File/Audio components copy the file, so the temp dir can be cleaned
|
290 |
+
# by the OS or a cleanup routine if this Space were long-running.
|
291 |
+
# For HF Spaces, /tmp is ephemeral anyway. For robustness, could add shutil.rmtree(job_audio_path_prefix)
|
292 |
+
# after files are served, but need to ensure Gradio has finished with them.
|
293 |
+
# For now, rely on new unique dir per run and ephemeral /tmp.
|
294 |
+
|
295 |
return (zip_fn if os.path.exists(zip_fn) else None,
|
296 |
merged_path if merged_path and os.path.exists(merged_path) else None,
|
297 |
status)
|
298 |
|
299 |
|
300 |
def handle_calculate_cost(dialogue_script: str, tts_model: str):
|
301 |
+
if not dialogue_script.strip(): return "Cost: $0.00 (Script is empty)"
|
|
|
302 |
try:
|
303 |
parsed, chars = parse_dialogue_script(dialogue_script)
|
304 |
+
if not parsed: return "Cost: $0.00 (No valid lines in script)"
|
305 |
cost = calculate_cost(chars, len(parsed), tts_model)
|
306 |
+
# Using .6f for precision, especially for char-based cost
|
307 |
+
return f"Estimated Cost for {len(parsed)} lines ({chars} chars): ${cost:.6f}"
|
308 |
+
except ValueError as e: # Catch script length error from parser
|
309 |
+
return f"Cost calculation error: {str(e)}"
|
310 |
+
except Exception as e:
|
311 |
+
return f"An unexpected error occurred during cost calculation: {str(e)}"
|
312 |
|
313 |
|
314 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
315 |
+
gr.Markdown("# Dialogue Script to Speech (OpenAI TTS)")
|
316 |
if not OPENAI_API_KEY or not async_openai_client:
|
317 |
+
gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY secret is not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>")
|
318 |
|
|
|
319 |
speaker_configs_state = gr.State({})
|
320 |
|
321 |
with gr.Row():
|
322 |
with gr.Column(scale=2):
|
323 |
+
script_input = gr.TextArea(label="Dialogue Script", placeholder="[Speaker1] Hello world!\n[Speaker2] How are you today?", lines=10)
|
324 |
with gr.Column(scale=1):
|
325 |
tts_model_dropdown = gr.Dropdown(TTS_MODELS_AVAILABLE, label="TTS Model", value=MODEL_DEFAULT)
|
326 |
+
pause_input = gr.Number(label="Pause Between Lines (ms)", value=500, minimum=0, maximum=5000, step=50)
|
327 |
+
global_speed_input = gr.Slider(minimum=0.25, maximum=4.0, value=1.0, step=0.05, label="Global Speed (for tts-1/hd)", visible=(MODEL_DEFAULT in ["tts-1", "tts-1-hd"]), interactive=True)
|
328 |
+
global_instructions_input = gr.Textbox(label="Global Instructions (for gpt-4o-mini-tts)", placeholder="e.g., Speak with a calm tone.", visible=(MODEL_DEFAULT == "gpt-4o-mini-tts"), interactive=True, lines=2)
|
329 |
|
330 |
+
gr.Markdown("### Speaker Voice & Style Configuration")
|
331 |
speaker_config_method_dropdown = gr.Dropdown(
|
332 |
+
SPEAKER_CONFIG_METHODS, label="Configuration Method", value=DEFAULT_SPEAKER_CONFIG_METHOD
|
333 |
)
|
334 |
|
|
|
335 |
with gr.Group(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Single Voice (Global)")) as single_voice_group:
|
336 |
global_voice_dropdown = gr.Dropdown(
|
337 |
+
APP_AVAILABLE_VOICES, label="Global Voice", value=APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy", interactive=True
|
338 |
)
|
339 |
|
|
|
340 |
with gr.Column(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Detailed Configuration (Per Speaker UI)")) as detailed_per_speaker_ui_group:
|
341 |
load_per_speaker_ui_button = gr.Button("Load/Refresh Per-Speaker Settings UI (from Script Above)")
|
342 |
+
gr.Markdown("<small>Click button above to populate settings for each speaker found in the script. Settings are applied per-speaker. If script changes, click again to refresh.</small>")
|
|
|
343 |
dynamic_speaker_ui_area = gr.Column(elem_id="dynamic_ui_area_for_speakers")
|
344 |
|
345 |
|
346 |
with gr.Row():
|
347 |
+
calculate_cost_button = gr.Button("Calculate Estimated Cost")
|
348 |
generate_button = gr.Button("Generate Audio", variant="primary")
|
349 |
|
350 |
cost_output = gr.Textbox(label="Estimated Cost", interactive=False)
|
351 |
with gr.Row():
|
352 |
+
individual_lines_zip_output = gr.File(label="Download Individual Lines (ZIP)")
|
353 |
+
merged_dialogue_mp3_output = gr.Audio(label="Play/Download Merged Dialogue (MP3)", type="filepath")
|
354 |
+
status_output = gr.Textbox(label="Status", interactive=False, lines=2, max_lines=5)
|
355 |
|
|
|
356 |
def update_model_controls_visibility(selected_model, script_text_for_refresh, current_speaker_configs_for_refresh):
|
357 |
+
new_dynamic_ui_components, updated_state = load_refresh_per_speaker_ui(script_text_for_refresh, current_speaker_configs_for_refresh, selected_model)
|
358 |
+
|
359 |
+
is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
|
360 |
+
is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
|
361 |
|
362 |
+
# It's crucial that dynamic_speaker_ui_area receives the *list* of components.
|
363 |
+
# If it's wrapped in a gr.update, it might not render correctly unless gr.update(children=...)
|
364 |
+
# Direct assignment seems to be what Gradio expects when outputting to a Column/Row that acts as a container.
|
365 |
return {
|
366 |
+
global_speed_input: gr.update(visible=is_tts1_family, interactive=is_tts1_family),
|
367 |
+
global_instructions_input: gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
|
368 |
+
dynamic_speaker_ui_area: new_dynamic_ui_components,
|
369 |
speaker_configs_state: updated_state
|
370 |
}
|
371 |
+
|
372 |
tts_model_dropdown.change(
|
373 |
fn=update_model_controls_visibility,
|
374 |
inputs=[tts_model_dropdown, script_input, speaker_configs_state],
|
|
|
378 |
def update_speaker_config_method_visibility(method):
|
379 |
is_single = (method == "Single Voice (Global)")
|
380 |
is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
|
|
|
381 |
return {
|
382 |
single_voice_group: gr.update(visible=is_single),
|
383 |
detailed_per_speaker_ui_group: gr.update(visible=is_detailed_per_speaker),
|
|
|
391 |
load_per_speaker_ui_button.click(
|
392 |
fn=load_refresh_per_speaker_ui,
|
393 |
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
|
|
|
394 |
outputs=[dynamic_speaker_ui_area, speaker_configs_state]
|
395 |
)
|
396 |
|
397 |
calculate_cost_button.click(fn=handle_calculate_cost, inputs=[script_input, tts_model_dropdown], outputs=[cost_output])
|
398 |
|
|
|
399 |
generate_button.click(
|
400 |
fn=handle_script_processing,
|
401 |
inputs=[
|
402 |
script_input, tts_model_dropdown, pause_input,
|
403 |
speaker_config_method_dropdown, global_voice_dropdown,
|
404 |
+
speaker_configs_state,
|
405 |
global_speed_input, global_instructions_input
|
406 |
],
|
407 |
outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output])
|
408 |
|
409 |
+
gr.Markdown("## Example Scripts")
|
410 |
+
example_script_1 = "[Alice] Hello Bob, this is a test using the detailed configuration method.\n[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.\n[Alice] Let's see how this sounds."
|
411 |
+
example_script_2 = "[Narrator] This is a short story.\n[CharacterA] Once upon a time...\n[Narrator] ...there was a Gradio app.\n[CharacterB] And it could talk!"
|
412 |
+
|
413 |
gr.Examples(
|
414 |
examples=[
|
415 |
+
[example_script_1, "tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)", APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy", {}, 1.0, ""],
|
416 |
+
[example_script_2, "gpt-4o-mini-tts", 200, "Random per Speaker", APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy", {}, 1.0, "Speak with a gentle, storytelling voice for the narrator."],
|
417 |
+
["[Solo] Just one line, using global voice and speed.", "tts-1", 0, "Single Voice (Global)", "fable", {}, 1.2, ""],
|
418 |
],
|
419 |
+
# speaker_configs_state is passed as an empty dict {} for examples.
|
420 |
+
# For "Detailed Configuration", the user should click "Load/Refresh Per-Speaker UI" after an example loads to populate the UI.
|
421 |
inputs=[
|
422 |
script_input, tts_model_dropdown, pause_input,
|
423 |
speaker_config_method_dropdown, global_voice_dropdown,
|
424 |
speaker_configs_state,
|
425 |
global_speed_input, global_instructions_input
|
426 |
],
|
427 |
+
# Outputs for examples are not strictly necessary to pre-compute if cache_examples=False
|
428 |
+
# but defining them can help Gradio understand the flow.
|
429 |
+
# We can make the example click run the full processing.
|
430 |
outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output],
|
431 |
+
fn=handle_script_processing,
|
432 |
+
cache_examples=False # Set to True if pre-computation is desired and feasible
|
433 |
+
)
|
434 |
|
435 |
if __name__ == "__main__":
|
436 |
+
# Required for Windows if using asyncio with ProactorEventLoop which can be default
|
437 |
+
if os.name == 'nt':
|
438 |
+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
439 |
+
demo.launch(debug=True) # Debug=True for development, remove for production/HF Space
|
utils/merge_audio.py
CHANGED
@@ -4,92 +4,135 @@ import os
|
|
4 |
def merge_mp3_files(file_paths, output_filename, pause_ms=500):
|
5 |
"""
|
6 |
Merges multiple MP3 files into a single MP3 file with a specified pause
|
7 |
-
between each segment.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"""
|
9 |
if not file_paths:
|
|
|
10 |
return None
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
if not os.path.exists(file_path) or os.path.getsize(file_path) == 0:
|
17 |
-
print(f"Warning: File {file_path} is missing or empty. Skipping.")
|
18 |
-
continue
|
19 |
-
try:
|
20 |
-
segment = AudioSegment.from_mp3(file_path)
|
21 |
-
combined += segment
|
22 |
-
if i < len(file_paths) - 1: # Don't add pause after the last segment
|
23 |
-
combined += pause_segment
|
24 |
-
except Exception as e:
|
25 |
-
print(f"Error processing file {file_path}: {e}. Skipping.")
|
26 |
-
continue
|
27 |
-
|
28 |
-
if len(combined) == 0:
|
29 |
print("No valid audio segments found to merge.")
|
30 |
return None
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
try:
|
33 |
-
combined
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
except Exception as e:
|
36 |
print(f"Error exporting merged MP3 to {output_filename}: {e}")
|
37 |
return None
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
if __name__ == '__main__':
|
40 |
-
|
41 |
-
# This test assumes you have some small MP3s or can generate them.
|
42 |
-
# For a self-contained test, you might need to generate silent MP3s.
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
#
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
#
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
62 |
|
63 |
|
64 |
-
#
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
dummy_files_exist = False
|
71 |
-
print("Skipping merge test as dummy files could not be created (ffmpeg issue?).")
|
72 |
-
|
73 |
-
if dummy_files_exist:
|
74 |
-
print("\nTesting merge_mp3_files...")
|
75 |
-
files_to_merge = ["test_dummy1.mp3", "test_dummy2.mp3", "non_existent_file.mp3"]
|
76 |
-
output_merged = "test_merged_audio.mp3"
|
77 |
-
|
78 |
-
result_path = merge_mp3_files(files_to_merge, output_merged, pause_ms=300)
|
79 |
-
if result_path and os.path.exists(result_path):
|
80 |
-
print(f"Successfully merged audio to: {result_path}")
|
81 |
-
# Simple check: merged file should be larger than individual (roughly)
|
82 |
-
merged_size = os.path.getsize(result_path)
|
83 |
-
dummy1_size = os.path.getsize("test_dummy1.mp3")
|
84 |
-
print(f"Size of {result_path}: {merged_size} bytes (dummy1 was {dummy1_size})")
|
85 |
-
if merged_size > dummy1_size : # crude check
|
86 |
-
print("Merge test seems OK.")
|
87 |
-
else:
|
88 |
-
print("Merged file size issue.")
|
89 |
-
os.remove(result_path)
|
90 |
else:
|
91 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
if os.path.exists("test_dummy2.mp3"): os.remove("test_dummy2.mp3")
|
|
|
4 |
def merge_mp3_files(file_paths, output_filename, pause_ms=500):
|
5 |
"""
|
6 |
Merges multiple MP3 files into a single MP3 file with a specified pause
|
7 |
+
between each segment. Skips missing or empty files.
|
8 |
+
Args:
|
9 |
+
file_paths (list): A list of paths to MP3 files to merge.
|
10 |
+
Can contain None entries for files that failed synthesis; these will be skipped.
|
11 |
+
output_filename (str): The path to save the merged MP3 file.
|
12 |
+
pause_ms (int): Duration of silence in milliseconds to add between segments.
|
13 |
+
Returns:
|
14 |
+
str: The path to the merged MP3 file if successful, None otherwise.
|
15 |
"""
|
16 |
if not file_paths:
|
17 |
+
print("Warning: No file paths provided for merging.")
|
18 |
return None
|
19 |
|
20 |
+
valid_segments = []
|
21 |
+
for file_path in file_paths:
|
22 |
+
if file_path and os.path.exists(file_path) and os.path.getsize(file_path) > 0:
|
23 |
+
try:
|
24 |
+
segment = AudioSegment.from_mp3(file_path)
|
25 |
+
valid_segments.append(segment)
|
26 |
+
except Exception as e:
|
27 |
+
print(f"Error loading audio segment from {file_path}: {e}. Skipping this file.")
|
28 |
+
elif file_path: # File path provided but file is missing or empty
|
29 |
+
print(f"Warning: File {file_path} is missing or empty. Skipping.")
|
30 |
+
# If file_path is None, it's silently skipped (already handled upstream)
|
31 |
|
32 |
+
if not valid_segments:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
print("No valid audio segments found to merge.")
|
34 |
return None
|
35 |
|
36 |
+
# Start with the first valid segment
|
37 |
+
combined_audio = valid_segments[0]
|
38 |
+
|
39 |
+
# Add subsequent segments with pauses
|
40 |
+
if len(valid_segments) > 1:
|
41 |
+
pause_segment = AudioSegment.silent(duration=max(0, pause_ms)) # Ensure pause_ms is not negative
|
42 |
+
for segment in valid_segments[1:]:
|
43 |
+
combined_audio += pause_segment
|
44 |
+
combined_audio += segment
|
45 |
+
|
46 |
try:
|
47 |
+
# Export the combined audio to MP3 format
|
48 |
+
# May require ffmpeg/libav to be installed and accessible in PATH
|
49 |
+
combined_audio.export(output_filename, format="mp3")
|
50 |
+
if os.path.exists(output_filename) and os.path.getsize(output_filename) > 0:
|
51 |
+
return output_filename
|
52 |
+
else:
|
53 |
+
print(f"Error: Merged file {output_filename} was not created or is empty after export.")
|
54 |
+
return None
|
55 |
except Exception as e:
|
56 |
print(f"Error exporting merged MP3 to {output_filename}: {e}")
|
57 |
return None
|
58 |
|
59 |
+
# Helper function to create dummy MP3 files for testing (requires pydub and ffmpeg)
|
60 |
+
def _create_dummy_mp3(filename, duration_ms=1000, text_for_log="dummy"):
|
61 |
+
try:
|
62 |
+
# Create a silent audio segment
|
63 |
+
silence = AudioSegment.silent(duration=duration_ms)
|
64 |
+
# Export it as an MP3 file
|
65 |
+
silence.export(filename, format="mp3")
|
66 |
+
print(f"Successfully created dummy MP3: {filename} (duration: {duration_ms}ms) for '{text_for_log}'")
|
67 |
+
return True
|
68 |
+
except Exception as e:
|
69 |
+
print(f"Could not create dummy MP3 '{filename}'. Ensure ffmpeg is installed and accessible. Error: {e}")
|
70 |
+
return False
|
71 |
+
|
72 |
if __name__ == '__main__':
|
73 |
+
print("--- Testing merge_mp3_files ---")
|
|
|
|
|
74 |
|
75 |
+
test_output_dir = "test_audio_merge_output"
|
76 |
+
os.makedirs(test_output_dir, exist_ok=True)
|
77 |
+
|
78 |
+
dummy_files = []
|
79 |
+
# Create some dummy MP3 files for the test
|
80 |
+
if _create_dummy_mp3(os.path.join(test_output_dir, "dummy1.mp3"), 1000, "Segment 1"):
|
81 |
+
dummy_files.append(os.path.join(test_output_dir, "dummy1.mp3"))
|
82 |
+
if _create_dummy_mp3(os.path.join(test_output_dir, "dummy2.mp3"), 1500, "Segment 2"):
|
83 |
+
dummy_files.append(os.path.join(test_output_dir, "dummy2.mp3"))
|
84 |
+
|
85 |
+
# Test case 1: Merge existing files
|
86 |
+
if len(dummy_files) == 2:
|
87 |
+
output_merged_1 = os.path.join(test_output_dir, "merged_test1.mp3")
|
88 |
+
print(f"\nAttempting to merge: {dummy_files} with 300ms pause into {output_merged_1}")
|
89 |
+
result_path_1 = merge_mp3_files(dummy_files, output_merged_1, pause_ms=300)
|
90 |
+
if result_path_1 and os.path.exists(result_path_1):
|
91 |
+
print(f"SUCCESS: Merged audio created at: {result_path_1} (Size: {os.path.getsize(result_path_1)} bytes)")
|
92 |
+
else:
|
93 |
+
print(f"FAILURE: Merging test case 1 failed.")
|
94 |
+
else:
|
95 |
+
print("\nSkipping merge test case 1 due to failure in creating dummy files.")
|
96 |
|
97 |
+
# Test case 2: Include a non-existent file and a None entry
|
98 |
+
files_with_issues = [
|
99 |
+
dummy_files[0] if dummy_files else None,
|
100 |
+
os.path.join(test_output_dir, "non_existent_file.mp3"),
|
101 |
+
None, # Representing a failed synthesis
|
102 |
+
dummy_files[1] if len(dummy_files) > 1 else None
|
103 |
+
]
|
104 |
+
# Filter out None from the list if dummy files weren't created
|
105 |
+
files_with_issues_filtered = [f for f in files_with_issues if f is not None or isinstance(f, str)]
|
106 |
|
107 |
|
108 |
+
if any(f for f in files_with_issues_filtered if f and os.path.exists(f)): # Proceed if at least one valid dummy file exists
|
109 |
+
output_merged_2 = os.path.join(test_output_dir, "merged_test2_with_issues.mp3")
|
110 |
+
print(f"\nAttempting to merge: {files_with_issues_filtered} with 500ms pause into {output_merged_2}")
|
111 |
+
result_path_2 = merge_mp3_files(files_with_issues_filtered, output_merged_2, pause_ms=500)
|
112 |
+
if result_path_2 and os.path.exists(result_path_2):
|
113 |
+
print(f"SUCCESS (with skips): Merged audio created at: {result_path_2} (Size: {os.path.getsize(result_path_2)} bytes)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
else:
|
115 |
+
print(f"NOTE: Merging test case 2 might result in fewer segments or failure if no valid files remained.")
|
116 |
+
else:
|
117 |
+
print("\nSkipping merge test case 2 as no valid dummy files were available.")
|
118 |
+
|
119 |
+
# Test case 3: Empty list of files
|
120 |
+
output_merged_3 = os.path.join(test_output_dir, "merged_test3_empty.mp3")
|
121 |
+
print(f"\nAttempting to merge an empty list of files into {output_merged_3}")
|
122 |
+
result_path_3 = merge_mp3_files([], output_merged_3, pause_ms=100)
|
123 |
+
if result_path_3 is None:
|
124 |
+
print("SUCCESS: Correctly handled empty file list (returned None).")
|
125 |
+
else:
|
126 |
+
print(f"FAILURE: Expected None for empty file list, got {result_path_3}")
|
127 |
+
|
128 |
+
# Test case 4: List with only None or invalid paths
|
129 |
+
output_merged_4 = os.path.join(test_output_dir, "merged_test4_all_invalid.mp3")
|
130 |
+
print(f"\nAttempting to merge list with only invalid/None files into {output_merged_4}")
|
131 |
+
result_path_4 = merge_mp3_files([None, "non_existent.mp3"], output_merged_4, pause_ms=100)
|
132 |
+
if result_path_4 is None:
|
133 |
+
print("SUCCESS: Correctly handled list with only invalid/None files (returned None).")
|
134 |
+
else:
|
135 |
+
print(f"FAILURE: Expected None for all-invalid list, got {result_path_4}")
|
136 |
|
137 |
+
print(f"\nTest finished. Check ./{test_output_dir}/ for any generated files.")
|
138 |
+
# You might want to add shutil.rmtree(test_output_dir) here for cleanup after visual inspection.
|
|
utils/openai_tts.py
CHANGED
@@ -3,49 +3,64 @@ import os
|
|
3 |
import time
|
4 |
from openai import AsyncOpenAI, OpenAIError, RateLimitError
|
5 |
import httpx # For NSFW check
|
|
|
6 |
|
7 |
-
#
|
8 |
-
|
|
|
|
|
|
|
9 |
|
10 |
-
# Concurrency limiter
|
11 |
MAX_CONCURRENT_REQUESTS = 2
|
12 |
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
|
13 |
|
14 |
-
# Retry mechanism
|
15 |
MAX_RETRIES = 3
|
16 |
-
INITIAL_BACKOFF_SECONDS = 1
|
|
|
17 |
|
18 |
async def is_content_safe(text: str, api_url_template: str | None) -> bool:
|
19 |
"""
|
20 |
Checks if the content is safe using an external NSFW API.
|
21 |
-
Returns True if safe
|
|
|
22 |
"""
|
23 |
if not api_url_template:
|
24 |
-
return True
|
25 |
|
26 |
if "{text}" not in api_url_template:
|
27 |
-
print("Warning: NSFW_API_URL_TEMPLATE does not contain {text} placeholder. Skipping NSFW check.")
|
28 |
-
return True
|
29 |
|
30 |
try:
|
31 |
-
encoded_text =
|
32 |
-
url = api_url_template.
|
33 |
|
34 |
-
|
35 |
-
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
except httpx.RequestError as e:
|
43 |
-
print(f"NSFW Check: API request error: {e}")
|
44 |
-
return
|
45 |
except Exception as e:
|
46 |
print(f"NSFW Check: An unexpected error occurred: {e}")
|
47 |
-
return
|
48 |
-
|
49 |
|
50 |
async def synthesize_speech_line(
|
51 |
client: AsyncOpenAI,
|
@@ -53,116 +68,157 @@ async def synthesize_speech_line(
|
|
53 |
voice: str,
|
54 |
output_path: str,
|
55 |
model: str = "tts-1-hd",
|
56 |
-
speed: float = 1.0,
|
57 |
-
instructions: str | None = None,
|
58 |
nsfw_api_url_template: str | None = None,
|
59 |
-
line_index: int = -1
|
60 |
) -> str | None:
|
61 |
"""
|
62 |
Synthesizes a single line of text to speech using OpenAI TTS.
|
63 |
-
|
64 |
-
Retries on RateLimitError with exponential backoff.
|
65 |
Returns the output_path if successful, None otherwise.
|
66 |
"""
|
|
|
|
|
|
|
|
|
67 |
if nsfw_api_url_template:
|
68 |
if not await is_content_safe(text, nsfw_api_url_template):
|
69 |
-
print(f"Line {line_index if line_index != -1 else '
|
70 |
-
return None
|
71 |
|
72 |
current_retry = 0
|
73 |
backoff_seconds = INITIAL_BACKOFF_SECONDS
|
74 |
|
|
|
75 |
async with semaphore:
|
76 |
-
while current_retry
|
77 |
try:
|
78 |
request_params = {
|
79 |
"model": model,
|
80 |
-
"voice": voice,
|
81 |
"input": text,
|
82 |
-
"
|
|
|
83 |
}
|
84 |
|
85 |
-
# Add speed if model
|
86 |
if model in ["tts-1", "tts-1-hd"]:
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
91 |
|
92 |
-
# Add instructions if model
|
93 |
-
#
|
94 |
-
if model
|
95 |
-
request_params["instructions"] = instructions
|
|
|
|
|
|
|
96 |
|
97 |
response = await client.audio.speech.create(**request_params)
|
|
|
|
|
98 |
await response.astream_to_file(output_path)
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
except RateLimitError as e:
|
101 |
current_retry += 1
|
102 |
-
if current_retry
|
103 |
-
print(f"Line {line_index if line_index != -1 else ''}: Max retries reached
|
104 |
return None
|
105 |
-
|
|
|
|
|
106 |
await asyncio.sleep(backoff_seconds)
|
107 |
-
backoff_seconds
|
108 |
-
|
109 |
-
|
|
|
110 |
return None
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
if __name__ == '__main__':
|
117 |
async def main_test():
|
118 |
api_key = os.getenv("OPENAI_API_KEY")
|
119 |
if not api_key:
|
120 |
-
print("OPENAI_API_KEY not set. Skipping test.")
|
121 |
return
|
122 |
|
|
|
|
|
|
|
|
|
123 |
client = AsyncOpenAI(api_key=api_key)
|
124 |
|
125 |
-
|
126 |
-
{"id": 0, "
|
127 |
-
{"id": 1, "
|
128 |
-
{"id": 2, "
|
129 |
-
{"id": 3, "
|
|
|
|
|
|
|
|
|
130 |
]
|
131 |
|
132 |
-
|
133 |
-
os.makedirs(
|
|
|
134 |
|
135 |
-
|
136 |
-
for
|
137 |
-
|
138 |
-
# For Alice (speed): tts-1-hd. For Bob (instructions): gpt-4o-mini-tts
|
139 |
-
current_model = "tts-1-hd"
|
140 |
-
if "instructions" in line_data:
|
141 |
-
current_model = "gpt-4o-mini-tts" # Example, ensure this model is available for your key
|
142 |
-
|
143 |
-
voice = OPENAI_VOICES[i % len(OPENAI_VOICES)]
|
144 |
-
output_file = os.path.join(temp_dir, f"line_{line_data['id']}_{current_model}.mp3")
|
145 |
|
146 |
-
|
|
|
|
|
147 |
synthesize_speech_line(
|
148 |
-
client,
|
149 |
-
line_data["text"],
|
150 |
-
voice,
|
151 |
-
|
152 |
-
model=
|
153 |
-
speed=line_data.get("speed", 1.0),
|
154 |
instructions=line_data.get("instructions"),
|
|
|
155 |
line_index=line_data['id']
|
156 |
)
|
157 |
)
|
158 |
|
159 |
-
results = await asyncio.gather(*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
-
|
162 |
-
print(f"
|
163 |
-
for f_path in successful_files:
|
164 |
-
print(f" - {f_path}")
|
165 |
|
166 |
-
|
|
|
167 |
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
168 |
asyncio.run(main_test())
|
|
|
3 |
import time
|
4 |
from openai import AsyncOpenAI, OpenAIError, RateLimitError
|
5 |
import httpx # For NSFW check
|
6 |
+
import urllib.parse # For URL encoding text in NSFW check
|
7 |
|
8 |
+
# Voices available for OpenAI TTS models (tts-1, tts-1-hd, gpt-4o-mini-tts)
|
9 |
+
# As of May 2024, these are the primary voices. Ash, Ballad, Coral, Sage, Verse were mentioned for GPT-4o's voice capabilities.
|
10 |
+
OPENAI_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
|
11 |
+
# If gpt-4o-mini-tts explicitly supports more/different voices, this list might need adjustment
|
12 |
+
# or the app could query available voices if an API endpoint for that exists. For now, assume these are common.
|
13 |
|
14 |
+
# Concurrency limiter for OpenAI API calls
|
15 |
MAX_CONCURRENT_REQUESTS = 2
|
16 |
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
|
17 |
|
18 |
+
# Retry mechanism parameters
|
19 |
MAX_RETRIES = 3
|
20 |
+
INITIAL_BACKOFF_SECONDS = 1.0 # Start with 1 second
|
21 |
+
MAX_BACKOFF_SECONDS = 16.0 # Cap backoff to avoid excessively long waits
|
22 |
|
23 |
async def is_content_safe(text: str, api_url_template: str | None) -> bool:
|
24 |
"""
|
25 |
Checks if the content is safe using an external NSFW API.
|
26 |
+
Returns True if safe, API URL is not provided, or check fails open.
|
27 |
+
Returns False if content is flagged as unsafe by the API.
|
28 |
"""
|
29 |
if not api_url_template:
|
30 |
+
return True # No NSFW check configured, assume safe
|
31 |
|
32 |
if "{text}" not in api_url_template:
|
33 |
+
print(f"Warning: NSFW_API_URL_TEMPLATE ('{api_url_template}') does not contain {{text}} placeholder. Skipping NSFW check.")
|
34 |
+
return True # Configuration error, fail open (assume safe)
|
35 |
|
36 |
try:
|
37 |
+
encoded_text = urllib.parse.quote(text) # Ensure text is URL-safe
|
38 |
+
url = api_url_template.replace("{text}", encoded_text) # Use replace for simplicity
|
39 |
|
40 |
+
# Using a timeout for the external API call
|
41 |
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
42 |
+
response = await client.get(url)
|
43 |
|
44 |
+
response.raise_for_status() # Will raise an exception for 4xx/5xx responses
|
45 |
+
|
46 |
+
# Assuming the API returns a specific response to indicate safety.
|
47 |
+
# This part needs to be adapted to the actual API's response format.
|
48 |
+
# For example, if it returns JSON: `data = response.json()`
|
49 |
+
# If it returns 200 for safe, and non-200 for unsafe, raise_for_status handles it.
|
50 |
+
# For this placeholder, we'll assume 200 means safe.
|
51 |
+
return True # Content is safe based on API response
|
52 |
+
|
53 |
+
except httpx.HTTPStatusError as e:
|
54 |
+
# Log specific HTTP errors from the NSFW API
|
55 |
+
print(f"NSFW Check: API request failed. Status: {e.response.status_code}. URL: {e.request.url}. Response: {e.response.text[:200]}")
|
56 |
+
# Depending on policy, you might "fail closed" (treat as unsafe) or "fail open"
|
57 |
+
return False # Content flagged as unsafe or API error
|
58 |
except httpx.RequestError as e:
|
59 |
+
print(f"NSFW Check: API request error: {e}. URL: {e.request.url if e.request else 'N/A'}")
|
60 |
+
return True # Fail open (assume safe) on network/request errors to not block TTS
|
61 |
except Exception as e:
|
62 |
print(f"NSFW Check: An unexpected error occurred: {e}")
|
63 |
+
return True # Fail open (assume safe) on other unexpected errors
|
|
|
64 |
|
65 |
async def synthesize_speech_line(
|
66 |
client: AsyncOpenAI,
|
|
|
68 |
voice: str,
|
69 |
output_path: str,
|
70 |
model: str = "tts-1-hd",
|
71 |
+
speed: float = 1.0, # Speed parameter (0.25 to 4.0). Default 1.0.
|
72 |
+
instructions: str | None = None, # For models like gpt-4o-mini-tts potentially
|
73 |
nsfw_api_url_template: str | None = None,
|
74 |
+
line_index: int = -1 # For logging purposes
|
75 |
) -> str | None:
|
76 |
"""
|
77 |
Synthesizes a single line of text to speech using OpenAI TTS.
|
78 |
+
Handles rate limiting with exponential backoff and NSFW checks.
|
|
|
79 |
Returns the output_path if successful, None otherwise.
|
80 |
"""
|
81 |
+
if not text.strip():
|
82 |
+
print(f"Line {line_index if line_index != -1 else '(unknown)'}: Input text is empty. Skipping synthesis.")
|
83 |
+
return None
|
84 |
+
|
85 |
if nsfw_api_url_template:
|
86 |
if not await is_content_safe(text, nsfw_api_url_template):
|
87 |
+
print(f"Line {line_index if line_index != -1 else '(unknown)'}: Content flagged as potentially unsafe. Skipping synthesis.")
|
88 |
+
return None # Skip synthesis for flagged content
|
89 |
|
90 |
current_retry = 0
|
91 |
backoff_seconds = INITIAL_BACKOFF_SECONDS
|
92 |
|
93 |
+
# Acquire semaphore before entering retry loop
|
94 |
async with semaphore:
|
95 |
+
while current_retry <= MAX_RETRIES:
|
96 |
try:
|
97 |
request_params = {
|
98 |
"model": model,
|
|
|
99 |
"input": text,
|
100 |
+
"voice": voice,
|
101 |
+
"response_format": "mp3" # Explicitly request mp3
|
102 |
}
|
103 |
|
104 |
+
# Add speed if model is tts-1 or tts-1-hd and speed is not default 1.0
|
105 |
if model in ["tts-1", "tts-1-hd"]:
|
106 |
+
# OpenAI API speed range is 0.25 to 4.0.
|
107 |
+
# Clamp speed to be safe, although UI should also enforce this.
|
108 |
+
clamped_speed = max(0.25, min(float(speed), 4.0))
|
109 |
+
if clamped_speed != 1.0: # Only send if not default
|
110 |
+
request_params["speed"] = clamped_speed
|
111 |
|
112 |
+
# Add instructions if provided and model is gpt-4o-mini-tts (or other future models supporting it)
|
113 |
+
# tts-1 and tts-1-hd do not support an 'instructions' parameter.
|
114 |
+
if model == "gpt-4o-mini-tts" and instructions and instructions.strip():
|
115 |
+
request_params["instructions"] = instructions.strip()
|
116 |
+
|
117 |
+
# Log the request params being sent (excluding sensitive parts like full text if too long)
|
118 |
+
# print(f"Line {line_index}: Sending request to OpenAI TTS with params: {{'model': '{model}', 'voice': '{voice}', 'speed': {request_params.get('speed', 1.0)}, 'has_instructions': {bool(request_params.get('instructions'))}}}")
|
119 |
|
120 |
response = await client.audio.speech.create(**request_params)
|
121 |
+
|
122 |
+
# Stream response to file
|
123 |
await response.astream_to_file(output_path)
|
124 |
+
|
125 |
+
# Verify file was created and has content
|
126 |
+
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
127 |
+
return output_path
|
128 |
+
else:
|
129 |
+
print(f"Line {line_index if line_index != -1 else ''}: Synthesis appeared to succeed but output file is missing or empty: {output_path}")
|
130 |
+
return None # File not created or empty
|
131 |
+
|
132 |
except RateLimitError as e:
|
133 |
current_retry += 1
|
134 |
+
if current_retry > MAX_RETRIES:
|
135 |
+
print(f"Line {line_index if line_index != -1 else ''}: Max retries reached due to RateLimitError. Error: {e}")
|
136 |
return None
|
137 |
+
|
138 |
+
# Exponential backoff with jitter could be added, but simple exponential for now
|
139 |
+
print(f"Line {line_index if line_index != -1 else ''}: Rate limit hit (Attempt {current_retry}/{MAX_RETRIES}). Retrying in {backoff_seconds:.2f}s...")
|
140 |
await asyncio.sleep(backoff_seconds)
|
141 |
+
backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS) # Increase backoff, cap at max
|
142 |
+
|
143 |
+
except OpenAIError as e: # Catch other specific OpenAI errors
|
144 |
+
print(f"Line {line_index if line_index != -1 else ''}: OpenAI API error during synthesis: {type(e).__name__} - {e}")
|
145 |
return None
|
146 |
+
|
147 |
+
except Exception as e: # Catch any other unexpected errors
|
148 |
+
print(f"Line {line_index if line_index != -1 else ''}: An unexpected error occurred during synthesis: {type(e).__name__} - {e}")
|
149 |
+
# current_retry += 1 # Could also retry on generic errors if deemed transient
|
150 |
+
# if current_retry > MAX_RETRIES: return None
|
151 |
+
# await asyncio.sleep(backoff_seconds)
|
152 |
+
# backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS)
|
153 |
+
return None # For most unexpected errors, safer not to retry indefinitely
|
154 |
+
|
155 |
+
# If loop finishes due to max retries without returning output_path
|
156 |
+
print(f"Line {line_index if line_index != -1 else ''}: Failed to synthesize after all retries or due to non-retryable error.")
|
157 |
+
return None
|
158 |
|
159 |
if __name__ == '__main__':
|
160 |
async def main_test():
|
161 |
api_key = os.getenv("OPENAI_API_KEY")
|
162 |
if not api_key:
|
163 |
+
print("OPENAI_API_KEY environment variable not set. Skipping test.")
|
164 |
return
|
165 |
|
166 |
+
# Test with a mock NSFW API template
|
167 |
+
# Replace with a real one if you have one, or set to None to disable
|
168 |
+
mock_nsfw_template = "https://api.example.com/nsfw_check?text={text}" # This will likely fail open
|
169 |
+
|
170 |
client = AsyncOpenAI(api_key=api_key)
|
171 |
|
172 |
+
test_lines_data = [
|
173 |
+
{"id": 0, "text": "Hello from Alloy, this is a test of standard tts-1-hd.", "voice": "alloy", "model": "tts-1-hd", "speed": 1.0},
|
174 |
+
{"id": 1, "text": "Echo here, speaking a bit faster.", "voice": "echo", "model": "tts-1-hd", "speed": 1.3},
|
175 |
+
{"id": 2, "text": "Fable, narrating slowly and calmly.", "voice": "fable", "model": "tts-1", "speed": 0.8},
|
176 |
+
{"id": 3, "text": "This is Onyx with instructions for gpt-4o-mini-tts: speak with a deep, commanding voice.", "voice": "onyx", "model": "gpt-4o-mini-tts", "instructions": "Speak with a very deep, commanding and slightly robotic voice."},
|
177 |
+
{"id": 4, "text": "Nova, testing default speed with tts-1.", "voice": "nova", "model": "tts-1"},
|
178 |
+
{"id": 5, "text": "Shimmer testing gpt-4o-mini-tts without specific instructions.", "voice": "shimmer", "model": "gpt-4o-mini-tts"},
|
179 |
+
{"id": 6, "text": "This line contains potentially naughty words that might be flagged.", "voice": "alloy", "model": "tts-1-hd", "nsfw_check": True}, # Test NSFW
|
180 |
+
{"id": 7, "text": "", "voice": "echo", "model": "tts-1"}, # Test empty text
|
181 |
]
|
182 |
|
183 |
+
temp_output_dir = "test_audio_output_openai_tts"
|
184 |
+
os.makedirs(temp_output_dir, exist_ok=True)
|
185 |
+
print(f"Test audio will be saved in ./{temp_output_dir}/")
|
186 |
|
187 |
+
synthesis_tasks = []
|
188 |
+
for line_data in test_lines_data:
|
189 |
+
output_file_path = os.path.join(temp_output_dir, f"line_{line_data['id']}_{line_data['voice']}_{line_data['model']}.mp3")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
+
nsfw_url = mock_nsfw_template if line_data.get("nsfw_check") else None
|
192 |
+
|
193 |
+
synthesis_tasks.append(
|
194 |
synthesize_speech_line(
|
195 |
+
client=client,
|
196 |
+
text=line_data["text"],
|
197 |
+
voice=line_data["voice"],
|
198 |
+
output_path=output_file_path,
|
199 |
+
model=line_data["model"],
|
200 |
+
speed=line_data.get("speed", 1.0), # Default speed if not specified
|
201 |
instructions=line_data.get("instructions"),
|
202 |
+
nsfw_api_url_template=nsfw_url,
|
203 |
line_index=line_data['id']
|
204 |
)
|
205 |
)
|
206 |
|
207 |
+
results = await asyncio.gather(*synthesis_tasks)
|
208 |
+
|
209 |
+
successful_files_count = 0
|
210 |
+
print("\n--- Test Synthesis Results ---")
|
211 |
+
for i, result_path in enumerate(results):
|
212 |
+
if result_path and os.path.exists(result_path):
|
213 |
+
print(f"SUCCESS: Line {test_lines_data[i]['id']} -> {result_path} (Size: {os.path.getsize(result_path)} bytes)")
|
214 |
+
successful_files_count += 1
|
215 |
+
else:
|
216 |
+
print(f"FAILURE or SKIP: Line {test_lines_data[i]['id']} (Text: '{test_lines_data[i]['text'][:30]}...')")
|
217 |
|
218 |
+
print(f"\nSuccessfully synthesized {successful_files_count} out of {len(test_lines_data)} lines.")
|
219 |
+
print(f"Please check the ./{temp_output_dir}/ directory for output files.")
|
|
|
|
|
220 |
|
221 |
+
# Run the async main function
|
222 |
+
if os.name == 'nt': # Required for Windows asyncio selector policy
|
223 |
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
224 |
asyncio.run(main_test())
|
utils/script_parser.py
CHANGED
@@ -2,15 +2,15 @@ import re
|
|
2 |
import math
|
3 |
|
4 |
MAX_SCRIPT_LENGTH = 10000 # characters
|
5 |
-
TTS_1_HD_COST_PER_CHAR = 0.00003 # $30 / 1M chars
|
6 |
-
GPT_4O_MINI_TTS_COST_PER_SECOND = 0.015 / 60 # $0.015 / minute
|
7 |
-
CHARS_PER_SECOND_ESTIMATE =
|
8 |
|
9 |
def parse_dialogue_script(script_text):
|
10 |
"""
|
11 |
-
Parses a dialogue script into a list of
|
12 |
Input format: "[Speaker] Utterance" per line.
|
13 |
-
Lines not matching the format are
|
14 |
"""
|
15 |
lines = script_text.strip().split('\n')
|
16 |
parsed_lines = []
|
@@ -22,22 +22,24 @@ def parse_dialogue_script(script_text):
|
|
22 |
for i, line_content in enumerate(lines):
|
23 |
line_content = line_content.strip()
|
24 |
if not line_content:
|
25 |
-
continue
|
26 |
|
27 |
match = re.match(r'\[(.*?)\]\s*(.*)', line_content)
|
28 |
if match:
|
29 |
speaker, utterance = match.groups()
|
|
|
30 |
utterance = utterance.strip()
|
|
|
|
|
31 |
else:
|
32 |
-
# If no speaker tag, assign
|
33 |
-
|
34 |
-
|
35 |
-
utterance = line_content.strip()
|
36 |
|
37 |
-
if not utterance: # Skip if utterance is empty after parsing
|
38 |
continue
|
39 |
|
40 |
-
parsed_lines.append({"id": i, "speaker": speaker
|
41 |
total_chars += len(utterance)
|
42 |
|
43 |
return parsed_lines, total_chars
|
@@ -46,38 +48,68 @@ def calculate_cost(total_chars, num_lines, model_name="tts-1-hd"):
|
|
46 |
"""
|
47 |
Calculates the estimated cost for TTS processing.
|
48 |
"""
|
49 |
-
|
50 |
-
|
|
|
51 |
elif model_name == "gpt-4o-mini-tts":
|
52 |
-
# Estimate duration: total_chars / X chars per second
|
53 |
-
#
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
cost = estimated_seconds * GPT_4O_MINI_TTS_COST_PER_SECOND
|
56 |
-
else:
|
57 |
-
|
|
|
58 |
return cost
|
59 |
|
60 |
if __name__ == '__main__':
|
61 |
-
|
62 |
[Alice] Hello Bob, how are you?
|
63 |
[Bob] I'm fine, Alice. And you?
|
64 |
This is a line without a speaker tag.
|
65 |
[Charlie] Just listening in.
|
|
|
|
|
66 |
"""
|
67 |
-
|
|
|
68 |
print("Parsed Lines:")
|
69 |
for p_line in parsed:
|
70 |
print(p_line)
|
71 |
-
print(f"\nTotal Characters: {chars}")
|
72 |
|
73 |
cost_hd = calculate_cost(chars, len(parsed), "tts-1-hd")
|
74 |
print(f"Estimated cost for tts-1-hd: ${cost_hd:.6f}")
|
75 |
|
|
|
|
|
|
|
|
|
76 |
cost_gpt_mini = calculate_cost(chars, len(parsed), "gpt-4o-mini-tts")
|
77 |
-
print(f"Estimated cost for gpt-4o-mini-tts: ${cost_gpt_mini:.6f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
-
long_script = "a" * (MAX_SCRIPT_LENGTH + 1)
|
80 |
try:
|
81 |
-
|
|
|
82 |
except ValueError as e:
|
83 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import math
|
3 |
|
4 |
MAX_SCRIPT_LENGTH = 10000 # characters
|
5 |
+
TTS_1_HD_COST_PER_CHAR = 0.00003 # $30 / 1M chars for tts-1-hd and tts-1
|
6 |
+
GPT_4O_MINI_TTS_COST_PER_SECOND = 0.015 / 60 # $0.015 / minute for gpt-4o-mini-tts
|
7 |
+
CHARS_PER_SECOND_ESTIMATE = 12 # Average characters spoken per second, for estimation
|
8 |
|
9 |
def parse_dialogue_script(script_text):
|
10 |
"""
|
11 |
+
Parses a dialogue script into a list of dictionaries, each representing a line.
|
12 |
Input format: "[Speaker] Utterance" per line.
|
13 |
+
Lines not matching the format are assigned to a "Narrator" speaker.
|
14 |
"""
|
15 |
lines = script_text.strip().split('\n')
|
16 |
parsed_lines = []
|
|
|
22 |
for i, line_content in enumerate(lines):
|
23 |
line_content = line_content.strip()
|
24 |
if not line_content:
|
25 |
+
continue # Skip empty lines
|
26 |
|
27 |
match = re.match(r'\[(.*?)\]\s*(.*)', line_content)
|
28 |
if match:
|
29 |
speaker, utterance = match.groups()
|
30 |
+
speaker = speaker.strip()
|
31 |
utterance = utterance.strip()
|
32 |
+
if not speaker: # If speaker tag is empty like "[] Text"
|
33 |
+
speaker = "UnknownSpeaker"
|
34 |
else:
|
35 |
+
# If no speaker tag, assign the whole line as utterance by "Narrator"
|
36 |
+
speaker = "Narrator"
|
37 |
+
utterance = line_content # Already stripped
|
|
|
38 |
|
39 |
+
if not utterance: # Skip if utterance is empty after parsing (e.g. "[Speaker]" with no text)
|
40 |
continue
|
41 |
|
42 |
+
parsed_lines.append({"id": i, "speaker": speaker, "text": utterance})
|
43 |
total_chars += len(utterance)
|
44 |
|
45 |
return parsed_lines, total_chars
|
|
|
48 |
"""
|
49 |
Calculates the estimated cost for TTS processing.
|
50 |
"""
|
51 |
+
cost = 0.0
|
52 |
+
if model_name in ["tts-1", "tts-1-hd"]: # OpenAI charges same for tts-1 and tts-1-hd
|
53 |
+
cost = total_chars * TTS_1_HD_COST_PER_CHAR
|
54 |
elif model_name == "gpt-4o-mini-tts":
|
55 |
+
# Estimate duration: total_chars / X chars per second. This is a rough estimate.
|
56 |
+
# OpenAI pricing for gpt-4o-mini's TTS is by character, similar to tts-1.
|
57 |
+
# As of latest check, gpt-4o-mini is priced same as tts-1.
|
58 |
+
# $0.000015 / char ($15.00 / 1M characters)
|
59 |
+
# Let's update cost for gpt-4o-mini-tts if it differs.
|
60 |
+
# The prompt says: "# seconds × $0.015 for gpt‑4o‑mini‑tts (0.015 USD / minute)"
|
61 |
+
# This conflicts with OpenAI's typical character-based TTS pricing.
|
62 |
+
# Assuming prompt's per-second pricing is the requirement for gpt-4o-mini-tts for this exercise.
|
63 |
+
if CHARS_PER_SECOND_ESTIMATE <= 0: # Avoid division by zero
|
64 |
+
estimated_seconds = total_chars / 10.0 # Fallback chars/sec
|
65 |
+
else:
|
66 |
+
estimated_seconds = total_chars / CHARS_PER_SECOND_ESTIMATE
|
67 |
cost = estimated_seconds * GPT_4O_MINI_TTS_COST_PER_SECOND
|
68 |
+
else: # Fallback to character-based costing for any other tts-1 like model
|
69 |
+
cost = total_chars * TTS_1_HD_COST_PER_CHAR
|
70 |
+
# raise ValueError(f"Unknown model for cost calculation: {model_name}") # Or assume default if model not matched
|
71 |
return cost
|
72 |
|
73 |
if __name__ == '__main__':
|
74 |
+
sample_script_1 = """
|
75 |
[Alice] Hello Bob, how are you?
|
76 |
[Bob] I'm fine, Alice. And you?
|
77 |
This is a line without a speaker tag.
|
78 |
[Charlie] Just listening in.
|
79 |
+
[] This line has an empty speaker tag.
|
80 |
+
[EmptySpeakerText]
|
81 |
"""
|
82 |
+
print(f"--- Test Case 1: Mixed Script ---")
|
83 |
+
parsed, chars = parse_dialogue_script(sample_script_1)
|
84 |
print("Parsed Lines:")
|
85 |
for p_line in parsed:
|
86 |
print(p_line)
|
87 |
+
print(f"\nTotal Characters for TTS: {chars}")
|
88 |
|
89 |
cost_hd = calculate_cost(chars, len(parsed), "tts-1-hd")
|
90 |
print(f"Estimated cost for tts-1-hd: ${cost_hd:.6f}")
|
91 |
|
92 |
+
cost_tts1 = calculate_cost(chars, len(parsed), "tts-1")
|
93 |
+
print(f"Estimated cost for tts-1: ${cost_tts1:.6f}")
|
94 |
+
|
95 |
+
# Test cost for gpt-4o-mini-tts using the per-second formula
|
96 |
cost_gpt_mini = calculate_cost(chars, len(parsed), "gpt-4o-mini-tts")
|
97 |
+
print(f"Estimated cost for gpt-4o-mini-tts (at {CHARS_PER_SECOND_ESTIMATE} chars/sec): ${cost_gpt_mini:.6f}")
|
98 |
+
|
99 |
+
|
100 |
+
print(f"\n--- Test Case 2: Long Script (Boundary Check) ---")
|
101 |
+
long_script_text = "[SpeakerA] " + "a" * (MAX_SCRIPT_LENGTH - 11) # 11 chars for "[SpeakerA] "
|
102 |
+
parsed_long, chars_long = parse_dialogue_script(long_script_text)
|
103 |
+
print(f"Long script (length {len(long_script_text)} chars) parsed successfully. TTS Chars: {chars_long}")
|
104 |
|
|
|
105 |
try:
|
106 |
+
too_long_script = "a" * (MAX_SCRIPT_LENGTH + 1)
|
107 |
+
parse_dialogue_script(too_long_script)
|
108 |
except ValueError as e:
|
109 |
+
print(f"Correctly caught error for too long script: {e}")
|
110 |
+
|
111 |
+
print(f"\n--- Test Case 3: Empty and Invalid Scripts ---")
|
112 |
+
parsed_empty, chars_empty = parse_dialogue_script("")
|
113 |
+
print(f"Empty script: Parsed lines: {len(parsed_empty)}, Chars: {chars_empty}")
|
114 |
+
parsed_blank_lines, chars_blank_lines = parse_dialogue_script("\n\n[Speaker]\n\n")
|
115 |
+
print(f"Script with blank/invalid lines: Parsed lines: {len(parsed_blank_lines)}, Chars: {chars_blank_lines} (Result: {parsed_blank_lines})")
|