Spaces:
Running
Running
File size: 8,051 Bytes
1190db4 d48101f 1190db4 7c38a46 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 5c85d81 d48101f 1190db4 5c85d81 d48101f 5c85d81 d48101f 5c85d81 d48101f 5c85d81 d48101f 5c85d81 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 1190db4 d48101f 5c85d81 d48101f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import asyncio
import os
import time
from openai import AsyncOpenAI, OpenAIError, RateLimitError
import httpx # For NSFW check
import urllib.parse # For URL encoding text in NSFW check
OPENAI_VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "sage", "nova", "shimmer", "verse"]
# Concurrency limiter for OpenAI API calls
MAX_CONCURRENT_REQUESTS = 2
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
# Retry mechanism parameters
MAX_RETRIES = 3
INITIAL_BACKOFF_SECONDS = 1.0 # Start with 1 second
MAX_BACKOFF_SECONDS = 16.0 # Cap backoff to avoid excessively long waits
async def is_content_safe(text: str, api_url_template: str | None) -> bool:
"""
Checks if the content is safe using an external NSFW API.
Returns True if safe, API URL is not provided, or check fails open.
Returns False if content is flagged as unsafe by the API.
"""
if not api_url_template:
return True # No NSFW check configured, assume safe
if "{text}" not in api_url_template:
print(f"Warning: NSFW_API_URL_TEMPLATE ('{api_url_template}') does not contain {{text}} placeholder. Skipping NSFW check.")
return True # Configuration error, fail open (assume safe)
try:
encoded_text = urllib.parse.quote(text) # Ensure text is URL-safe
url = api_url_template.replace("{text}", encoded_text) # Use replace for simplicity
# Using a timeout for the external API call
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url)
response.raise_for_status() # Will raise an exception for 4xx/5xx responses
# Assuming the API returns a specific response to indicate safety.
# This part needs to be adapted to the actual API's response format.
# For example, if it returns JSON: `data = response.json()`
# If it returns 200 for safe, and non-200 for unsafe, raise_for_status handles it.
# For this placeholder, we'll assume 200 means safe.
return True # Content is safe based on API response
except httpx.HTTPStatusError as e:
# Log specific HTTP errors from the NSFW API
print(f"NSFW Check: API request failed. Status: {e.response.status_code}. URL: {e.request.url}. Response: {e.response.text[:200]}")
# Depending on policy, you might "fail closed" (treat as unsafe) or "fail open"
return False # Content flagged as unsafe or API error
except httpx.RequestError as e:
print(f"NSFW Check: API request error: {e}. URL: {e.request.url if e.request else 'N/A'}")
return True # Fail open (assume safe) on network/request errors to not block TTS
except Exception as e:
print(f"NSFW Check: An unexpected error occurred: {e}")
return True # Fail open (assume safe) on other unexpected errors
async def synthesize_speech_line(
client: AsyncOpenAI,
text: str,
voice: str,
output_path: str,
model: str = "tts-1-hd",
speed: float = 1.0, # Speed parameter (0.25 to 4.0). Default 1.0.
instructions: str | None = None, # For models like gpt-4o-mini-tts potentially
nsfw_api_url_template: str | None = None,
line_index: int = -1 # For logging purposes
) -> str | None:
"""
Synthesizes a single line of text to speech using OpenAI TTS.
Handles rate limiting with exponential backoff and NSFW checks.
Returns the output_path if successful, None otherwise.
"""
if not text.strip():
print(f"Line {line_index if line_index != -1 else '(unknown)'}: Input text is empty. Skipping synthesis.")
return None
if nsfw_api_url_template:
if not await is_content_safe(text, nsfw_api_url_template):
print(f"Line {line_index if line_index != -1 else '(unknown)'}: Content flagged as potentially unsafe. Skipping synthesis.")
return None # Skip synthesis for flagged content
current_retry = 0
backoff_seconds = INITIAL_BACKOFF_SECONDS
# Acquire semaphore before entering retry loop
async with semaphore:
while current_retry <= MAX_RETRIES:
try:
request_params = {
"model": model,
"input": text,
"voice": voice,
"response_format": "mp3" # Explicitly request mp3
}
# Add speed if model is tts-1 or tts-1-hd and speed is not default 1.0
if model in ["tts-1", "tts-1-hd"]:
# OpenAI API speed range is 0.25 to 4.0.
# Clamp speed to be safe, although UI should also enforce this.
clamped_speed = max(0.25, min(float(speed), 4.0))
if clamped_speed != 1.0: # Only send if not default
request_params["speed"] = clamped_speed
# Add instructions if provided and model is gpt-4o-mini-tts (or other future models supporting it)
# tts-1 and tts-1-hd do not support an 'instructions' parameter.
if model == "gpt-4o-mini-tts" and instructions and instructions.strip():
request_params["instructions"] = instructions.strip()
# Log the request params being sent (excluding sensitive parts like full text if too long)
# print(f"Line {line_index}: Sending request to OpenAI TTS with params: {{'model': '{model}', 'voice': '{voice}', 'speed': {request_params.get('speed', 1.0)}, 'has_instructions': {bool(request_params.get('instructions'))}}}")
response = await client.audio.speech.create(**request_params)
# Stream response to file
await response.astream_to_file(output_path)
# Verify file was created and has content
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
return output_path
else:
print(f"Line {line_index if line_index != -1 else ''}: Synthesis appeared to succeed but output file is missing or empty: {output_path}")
return None # File not created or empty
except RateLimitError as e:
current_retry += 1
if current_retry > MAX_RETRIES:
print(f"Line {line_index if line_index != -1 else ''}: Max retries reached due to RateLimitError. Error: {e}")
return None
# Exponential backoff with jitter could be added, but simple exponential for now
print(f"Line {line_index if line_index != -1 else ''}: Rate limit hit (Attempt {current_retry}/{MAX_RETRIES}). Retrying in {backoff_seconds:.2f}s...")
await asyncio.sleep(backoff_seconds)
backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS) # Increase backoff, cap at max
except OpenAIError as e: # Catch other specific OpenAI errors
print(f"Line {line_index if line_index != -1 else ''}: OpenAI API error during synthesis: {type(e).__name__} - {e}")
return None
except Exception as e: # Catch any other unexpected errors
print(f"Line {line_index if line_index != -1 else ''}: An unexpected error occurred during synthesis: {type(e).__name__} - {e}")
# current_retry += 1 # Could also retry on generic errors if deemed transient
# if current_retry > MAX_RETRIES: return None
# await asyncio.sleep(backoff_seconds)
# backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS)
return None # For most unexpected errors, safer not to retry indefinitely
# If loop finishes due to max retries without returning output_path
print(f"Line {line_index if line_index != -1 else ''}: Failed to synthesize after all retries or due to non-retryable error.")
return None
|