Spaces:
Running
Running
| import asyncio | |
| import os | |
| import time | |
| from openai import AsyncOpenAI, OpenAIError, RateLimitError | |
| import httpx # For NSFW check | |
| import urllib.parse # For URL encoding text in NSFW check | |
| OPENAI_VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "sage", "nova", "shimmer", "verse"] | |
| # Concurrency limiter for OpenAI API calls | |
| MAX_CONCURRENT_REQUESTS = 2 | |
| semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS) | |
| # Retry mechanism parameters | |
| MAX_RETRIES = 3 | |
| INITIAL_BACKOFF_SECONDS = 1.0 # Start with 1 second | |
| MAX_BACKOFF_SECONDS = 16.0 # Cap backoff to avoid excessively long waits | |
| async def is_content_safe(text: str, api_url_template: str | None) -> bool: | |
| """ | |
| Checks if the content is safe using an external NSFW API. | |
| Returns True if safe, API URL is not provided, or check fails open. | |
| Returns False if content is flagged as unsafe by the API. | |
| """ | |
| if not api_url_template: | |
| return True # No NSFW check configured, assume safe | |
| if "{text}" not in api_url_template: | |
| print(f"Warning: NSFW_API_URL_TEMPLATE ('{api_url_template}') does not contain {{text}} placeholder. Skipping NSFW check.") | |
| return True # Configuration error, fail open (assume safe) | |
| try: | |
| encoded_text = urllib.parse.quote(text) # Ensure text is URL-safe | |
| url = api_url_template.replace("{text}", encoded_text) # Use replace for simplicity | |
| # Using a timeout for the external API call | |
| async with httpx.AsyncClient(timeout=10.0) as client: | |
| response = await client.get(url) | |
| response.raise_for_status() # Will raise an exception for 4xx/5xx responses | |
| # Assuming the API returns a specific response to indicate safety. | |
| # This part needs to be adapted to the actual API's response format. | |
| # For example, if it returns JSON: `data = response.json()` | |
| # If it returns 200 for safe, and non-200 for unsafe, raise_for_status handles it. | |
| # For this placeholder, we'll assume 200 means safe. | |
| return True # Content is safe based on API response | |
| except httpx.HTTPStatusError as e: | |
| # Log specific HTTP errors from the NSFW API | |
| print(f"NSFW Check: API request failed. Status: {e.response.status_code}. URL: {e.request.url}. Response: {e.response.text[:200]}") | |
| # Depending on policy, you might "fail closed" (treat as unsafe) or "fail open" | |
| return False # Content flagged as unsafe or API error | |
| except httpx.RequestError as e: | |
| print(f"NSFW Check: API request error: {e}. URL: {e.request.url if e.request else 'N/A'}") | |
| return True # Fail open (assume safe) on network/request errors to not block TTS | |
| except Exception as e: | |
| print(f"NSFW Check: An unexpected error occurred: {e}") | |
| return True # Fail open (assume safe) on other unexpected errors | |
| async def synthesize_speech_line( | |
| client: AsyncOpenAI, | |
| text: str, | |
| voice: str, | |
| output_path: str, | |
| model: str = "tts-1-hd", | |
| speed: float = 1.0, # Speed parameter (0.25 to 4.0). Default 1.0. | |
| instructions: str | None = None, # For models like gpt-4o-mini-tts potentially | |
| nsfw_api_url_template: str | None = None, | |
| line_index: int = -1 # For logging purposes | |
| ) -> str | None: | |
| """ | |
| Synthesizes a single line of text to speech using OpenAI TTS. | |
| Handles rate limiting with exponential backoff and NSFW checks. | |
| Returns the output_path if successful, None otherwise. | |
| """ | |
| if not text.strip(): | |
| print(f"Line {line_index if line_index != -1 else '(unknown)'}: Input text is empty. Skipping synthesis.") | |
| return None | |
| if nsfw_api_url_template: | |
| if not await is_content_safe(text, nsfw_api_url_template): | |
| print(f"Line {line_index if line_index != -1 else '(unknown)'}: Content flagged as potentially unsafe. Skipping synthesis.") | |
| return None # Skip synthesis for flagged content | |
| current_retry = 0 | |
| backoff_seconds = INITIAL_BACKOFF_SECONDS | |
| # Acquire semaphore before entering retry loop | |
| async with semaphore: | |
| while current_retry <= MAX_RETRIES: | |
| try: | |
| request_params = { | |
| "model": model, | |
| "input": text, | |
| "voice": voice, | |
| "response_format": "mp3" # Explicitly request mp3 | |
| } | |
| # Add speed if model is tts-1 or tts-1-hd and speed is not default 1.0 | |
| if model in ["tts-1", "tts-1-hd"]: | |
| # OpenAI API speed range is 0.25 to 4.0. | |
| # Clamp speed to be safe, although UI should also enforce this. | |
| clamped_speed = max(0.25, min(float(speed), 4.0)) | |
| if clamped_speed != 1.0: # Only send if not default | |
| request_params["speed"] = clamped_speed | |
| # Add instructions if provided and model is gpt-4o-mini-tts (or other future models supporting it) | |
| # tts-1 and tts-1-hd do not support an 'instructions' parameter. | |
| if model == "gpt-4o-mini-tts" and instructions and instructions.strip(): | |
| request_params["instructions"] = instructions.strip() | |
| # Log the request params being sent (excluding sensitive parts like full text if too long) | |
| # print(f"Line {line_index}: Sending request to OpenAI TTS with params: {{'model': '{model}', 'voice': '{voice}', 'speed': {request_params.get('speed', 1.0)}, 'has_instructions': {bool(request_params.get('instructions'))}}}") | |
| response = await client.audio.speech.create(**request_params) | |
| # Stream response to file | |
| await response.astream_to_file(output_path) | |
| # Verify file was created and has content | |
| if os.path.exists(output_path) and os.path.getsize(output_path) > 0: | |
| return output_path | |
| else: | |
| print(f"Line {line_index if line_index != -1 else ''}: Synthesis appeared to succeed but output file is missing or empty: {output_path}") | |
| return None # File not created or empty | |
| except RateLimitError as e: | |
| current_retry += 1 | |
| if current_retry > MAX_RETRIES: | |
| print(f"Line {line_index if line_index != -1 else ''}: Max retries reached due to RateLimitError. Error: {e}") | |
| return None | |
| # Exponential backoff with jitter could be added, but simple exponential for now | |
| print(f"Line {line_index if line_index != -1 else ''}: Rate limit hit (Attempt {current_retry}/{MAX_RETRIES}). Retrying in {backoff_seconds:.2f}s...") | |
| await asyncio.sleep(backoff_seconds) | |
| backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS) # Increase backoff, cap at max | |
| except OpenAIError as e: # Catch other specific OpenAI errors | |
| print(f"Line {line_index if line_index != -1 else ''}: OpenAI API error during synthesis: {type(e).__name__} - {e}") | |
| return None | |
| except Exception as e: # Catch any other unexpected errors | |
| print(f"Line {line_index if line_index != -1 else ''}: An unexpected error occurred during synthesis: {type(e).__name__} - {e}") | |
| # current_retry += 1 # Could also retry on generic errors if deemed transient | |
| # if current_retry > MAX_RETRIES: return None | |
| # await asyncio.sleep(backoff_seconds) | |
| # backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS) | |
| return None # For most unexpected errors, safer not to retry indefinitely | |
| # If loop finishes due to max retries without returning output_path | |
| print(f"Line {line_index if line_index != -1 else ''}: Failed to synthesize after all retries or due to non-retryable error.") | |
| return None | |