Spaces:

abocha
/

esl-dialogue-tts

Running

App Files Files Community

esl-dialogue-tts / utils /openai_tts.py

abocha

release

b65373e 3 months ago

raw

history blame contribute delete

8.05 kB

	import asyncio
	import os
	import time
	from openai import AsyncOpenAI, OpenAIError, RateLimitError
	import httpx # For NSFW check
	import urllib.parse # For URL encoding text in NSFW check

	OPENAI_VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "sage", "nova", "shimmer", "verse"]

	# Concurrency limiter for OpenAI API calls
	MAX_CONCURRENT_REQUESTS = 2
	semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)

	# Retry mechanism parameters
	MAX_RETRIES = 3
	INITIAL_BACKOFF_SECONDS = 1.0 # Start with 1 second
	MAX_BACKOFF_SECONDS = 16.0 # Cap backoff to avoid excessively long waits

	async def is_content_safe(text: str, api_url_template: str \| None) -> bool:
	"""
	Checks if the content is safe using an external NSFW API.
	Returns True if safe, API URL is not provided, or check fails open.
	Returns False if content is flagged as unsafe by the API.
	"""
	if not api_url_template:
	return True # No NSFW check configured, assume safe

	if "{text}" not in api_url_template:
	print(f"Warning: NSFW_API_URL_TEMPLATE ('{api_url_template}') does not contain {{text}} placeholder. Skipping NSFW check.")
	return True # Configuration error, fail open (assume safe)

	try:
	encoded_text = urllib.parse.quote(text) # Ensure text is URL-safe
	url = api_url_template.replace("{text}", encoded_text) # Use replace for simplicity

	# Using a timeout for the external API call
	async with httpx.AsyncClient(timeout=10.0) as client:
	response = await client.get(url)

	response.raise_for_status() # Will raise an exception for 4xx/5xx responses

	# Assuming the API returns a specific response to indicate safety.
	# This part needs to be adapted to the actual API's response format.
	# For example, if it returns JSON: `data = response.json()`
	# If it returns 200 for safe, and non-200 for unsafe, raise_for_status handles it.
	# For this placeholder, we'll assume 200 means safe.
	return True # Content is safe based on API response

	except httpx.HTTPStatusError as e:
	# Log specific HTTP errors from the NSFW API
	print(f"NSFW Check: API request failed. Status: {e.response.status_code}. URL: {e.request.url}. Response: {e.response.text[:200]}")
	# Depending on policy, you might "fail closed" (treat as unsafe) or "fail open"
	return False # Content flagged as unsafe or API error
	except httpx.RequestError as e:
	print(f"NSFW Check: API request error: {e}. URL: {e.request.url if e.request else 'N/A'}")
	return True # Fail open (assume safe) on network/request errors to not block TTS
	except Exception as e:
	print(f"NSFW Check: An unexpected error occurred: {e}")
	return True # Fail open (assume safe) on other unexpected errors

	async def synthesize_speech_line(
	client: AsyncOpenAI,
	text: str,
	voice: str,
	output_path: str,
	model: str = "tts-1-hd",
	speed: float = 1.0, # Speed parameter (0.25 to 4.0). Default 1.0.
	instructions: str \| None = None, # For models like gpt-4o-mini-tts potentially
	nsfw_api_url_template: str \| None = None,
	line_index: int = -1 # For logging purposes
	) -> str \| None:
	"""
	Synthesizes a single line of text to speech using OpenAI TTS.
	Handles rate limiting with exponential backoff and NSFW checks.
	Returns the output_path if successful, None otherwise.
	"""
	if not text.strip():
	print(f"Line {line_index if line_index != -1 else '(unknown)'}: Input text is empty. Skipping synthesis.")
	return None

	if nsfw_api_url_template:
	if not await is_content_safe(text, nsfw_api_url_template):
	print(f"Line {line_index if line_index != -1 else '(unknown)'}: Content flagged as potentially unsafe. Skipping synthesis.")
	return None # Skip synthesis for flagged content

	current_retry = 0
	backoff_seconds = INITIAL_BACKOFF_SECONDS

	# Acquire semaphore before entering retry loop
	async with semaphore:
	while current_retry <= MAX_RETRIES:
	try:
	request_params = {
	"model": model,
	"input": text,
	"voice": voice,
	"response_format": "mp3" # Explicitly request mp3
	}

	# Add speed if model is tts-1 or tts-1-hd and speed is not default 1.0
	if model in ["tts-1", "tts-1-hd"]:
	# OpenAI API speed range is 0.25 to 4.0.
	# Clamp speed to be safe, although UI should also enforce this.
	clamped_speed = max(0.25, min(float(speed), 4.0))
	if clamped_speed != 1.0: # Only send if not default
	request_params["speed"] = clamped_speed

	# Add instructions if provided and model is gpt-4o-mini-tts (or other future models supporting it)
	# tts-1 and tts-1-hd do not support an 'instructions' parameter.
	if model == "gpt-4o-mini-tts" and instructions and instructions.strip():
	request_params["instructions"] = instructions.strip()

	# Log the request params being sent (excluding sensitive parts like full text if too long)
	# print(f"Line {line_index}: Sending request to OpenAI TTS with params: {{'model': '{model}', 'voice': '{voice}', 'speed': {request_params.get('speed', 1.0)}, 'has_instructions': {bool(request_params.get('instructions'))}}}")

	response = await client.audio.speech.create(**request_params)

	# Stream response to file
	await response.astream_to_file(output_path)

	# Verify file was created and has content
	if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
	return output_path
	else:
	print(f"Line {line_index if line_index != -1 else ''}: Synthesis appeared to succeed but output file is missing or empty: {output_path}")
	return None # File not created or empty

	except RateLimitError as e:
	current_retry += 1
	if current_retry > MAX_RETRIES:
	print(f"Line {line_index if line_index != -1 else ''}: Max retries reached due to RateLimitError. Error: {e}")
	return None

	# Exponential backoff with jitter could be added, but simple exponential for now
	print(f"Line {line_index if line_index != -1 else ''}: Rate limit hit (Attempt {current_retry}/{MAX_RETRIES}). Retrying in {backoff_seconds:.2f}s...")
	await asyncio.sleep(backoff_seconds)
	backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS) # Increase backoff, cap at max

	except OpenAIError as e: # Catch other specific OpenAI errors
	print(f"Line {line_index if line_index != -1 else ''}: OpenAI API error during synthesis: {type(e).__name__} - {e}")
	return None

	except Exception as e: # Catch any other unexpected errors
	print(f"Line {line_index if line_index != -1 else ''}: An unexpected error occurred during synthesis: {type(e).__name__} - {e}")
	# current_retry += 1 # Could also retry on generic errors if deemed transient
	# if current_retry > MAX_RETRIES: return None
	# await asyncio.sleep(backoff_seconds)
	# backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS)
	return None # For most unexpected errors, safer not to retry indefinitely

	# If loop finishes due to max retries without returning output_path
	print(f"Line {line_index if line_index != -1 else ''}: Failed to synthesize after all retries or due to non-retryable error.")
	return None