Spaces:

abocha
/

esl-dialogue-tts

Running

App Files Files Community

esl-dialogue-tts / utils /script_parser.py

abocha

fix

b26f1ff 4 months ago

raw

history blame

3.16 kB

	import re
	import math

	MAX_SCRIPT_LENGTH = 10000 # characters
	TTS_1_HD_COST_PER_CHAR = 0.00003 # $30 / 1M chars
	GPT_4O_MINI_TTS_COST_PER_SECOND = 0.015 / 60 # $0.015 / minute
	CHARS_PER_SECOND_ESTIMATE = 10 # Rough estimate for TTS duration

	def parse_dialogue_script(script_text):
	"""
	Parses a dialogue script into a list of (index, speaker, utterance) tuples.
	Input format: "[Speaker] Utterance" per line.
	Lines not matching the format are attempted to be parsed as "[Default] Utterance".
	"""
	lines = script_text.strip().split('\n')
	parsed_lines = []
	total_chars = 0

	if len(script_text) > MAX_SCRIPT_LENGTH:
	raise ValueError(f"Script is too long. Maximum {MAX_SCRIPT_LENGTH} characters allowed. Your script has {len(script_text)} characters.")

	for i, line_content in enumerate(lines):
	line_content = line_content.strip()
	if not line_content:
	continue

	match = re.match(r'\[(.?)\]\s(.*)', line_content)
	if match:
	speaker, utterance = match.groups()
	utterance = utterance.strip()
	else:
	# If no speaker tag, assign a default speaker or handle as per requirements
	# For now, let's assume the whole line is an utterance by a "Narrator" or similar
	speaker = "Narrator" # Or consider raising an error/warning
	utterance = line_content.strip()

	if not utterance: # Skip if utterance is empty after parsing
	continue

	parsed_lines.append({"id": i, "speaker": speaker.strip(), "text": utterance})
	total_chars += len(utterance)

	return parsed_lines, total_chars

	def calculate_cost(total_chars, num_lines, model_name="tts-1-hd"):
	"""
	Calculates the estimated cost for TTS processing.
	"""
	if model_name == "tts-1-hd":
	cost = total_chars * TTS_1_HD_COST_PER_CHAR
	elif model_name == "gpt-4o-mini-tts":
	# Estimate duration: total_chars / X chars per second
	# This is a very rough estimate. Actual duration depends on OpenAI's model.
	estimated_seconds = total_chars / CHARS_PER_SECOND_ESTIMATE
	cost = estimated_seconds * GPT_4O_MINI_TTS_COST_PER_SECOND
	else:
	raise ValueError(f"Unknown model for cost calculation: {model_name}")
	return cost

	if __name__ == '__main__':
	sample_script = """
	[Alice] Hello Bob, how are you?
	[Bob] I'm fine, Alice. And you?
	This is a line without a speaker tag.
	[Charlie] Just listening in.
	"""
	parsed, chars = parse_dialogue_script(sample_script)
	print("Parsed Lines:")
	for p_line in parsed:
	print(p_line)
	print(f"\nTotal Characters: {chars}")

	cost_hd = calculate_cost(chars, len(parsed), "tts-1-hd")
	print(f"Estimated cost for tts-1-hd: ${cost_hd:.6f}")

	cost_gpt_mini = calculate_cost(chars, len(parsed), "gpt-4o-mini-tts")
	print(f"Estimated cost for gpt-4o-mini-tts: ${cost_gpt_mini:.6f}")

	long_script = "a" * (MAX_SCRIPT_LENGTH + 1)
	try:
	parse_dialogue_script(long_script)
	except ValueError as e:
	print(f"Error for long script: {e}")