Spaces:
Running
Running
File size: 5,206 Bytes
6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc 6dde081 dc46bfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import io
import logging
import time
from fastapi import FastAPI, HTTPException, Body, Response
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field # Field for adding validation/defaults
from gtts import gTTS, gTTSError
# --- Configuration ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- Pydantic Model for Request Body ---
class TTSRequest(BaseModel):
text: str = Field(..., min_length=1, description="The text to be converted to speech.")
lang: str = Field("en", description="Language code for the speech (e.g., 'en', 'es', 'fr'). See gTTS documentation for supported languages.")
# Optional: Add tld if you need specific accents tied to Google domains
# tld: str = Field("com", description="Top-level domain for Google TTS endpoint (e.g., 'com', 'co.uk', 'com.au')")
# --- Initialize FastAPI App ---
app = FastAPI(
title="gTTS API Service",
description="A simple API service that uses gTTS (Google Text-to-Speech) to convert text into speech (MP3 audio).",
version="1.0.0",
)
# --- API Endpoint for Text-to-Speech ---
@app.post(
"/api/tts",
tags=["TTS"],
summary="Generate Speech using gTTS",
description="""Send a JSON object with 'text' and optionally 'lang' fields.
Returns the generated speech as an MP3 audio stream.""",
responses={
200: {
"content": {"audio/mpeg": {}}, # MP3 content type
"description": "Successful response returning the MP3 audio stream.",
},
400: {"description": "Bad Request (e.g., empty text, invalid language)"},
500: {"description": "Internal Server Error (e.g., gTTS failed)"},
},
)
async def generate_speech_gtts_api(
tts_request: TTSRequest = Body(...)
):
"""
Receives text and language via POST request, uses gTTS to generate
speech, and returns the MP3 audio directly as a stream.
"""
text = tts_request.text
lang = tts_request.lang
# tld = tts_request.tld # Uncomment if using tld
if not text or not text.strip():
# The pydantic model validation (min_length=1) should catch this,
# but belt-and-suspenders approach is fine.
raise HTTPException(status_code=400, detail="Input text cannot be empty.")
logger.info(f"Received gTTS request: lang='{lang}', text='{text[:50]}...'")
start_synth_time = time.time()
try:
# --- Generate Audio using gTTS ---
# Create gTTS object
tts = gTTS(text=text, lang=lang, slow=False) # Add tld=tld if using
# --- Prepare Audio for Streaming ---
# Use an in-memory buffer (avoids temporary files)
mp3_fp = io.BytesIO()
tts.write_to_fp(mp3_fp)
mp3_fp.seek(0) # Rewind the buffer to the beginning for reading
synthesis_time = time.time() - start_synth_time
logger.info(f"gTTS audio generated in {synthesis_time:.2f} seconds.")
# --- Return Streaming Response ---
return StreamingResponse(
mp3_fp,
media_type="audio/mpeg", # Standard MIME type for MP3
headers={'Content-Disposition': 'attachment; filename="speech.mp3"'} # Suggest filename
)
except gTTSError as e:
logger.error(f"gTTS Error: {e}", exc_info=True)
# Check for common errors like invalid language
if "Language not supported" in str(e):
raise HTTPException(status_code=400, detail=f"Language '{lang}' not supported by gTTS. Error: {e}")
else:
raise HTTPException(status_code=500, detail=f"gTTS failed to generate speech. Error: {e}")
except Exception as e:
logger.error(f"An unexpected error occurred during speech generation: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"An unexpected error occurred. Error: {str(e)}")
# --- Health Check Endpoint (Good Practice) ---
@app.get("/health", tags=["System"], summary="Check API Health")
async def health_check():
"""
Simple health check endpoint. Returns status ok if the service is running.
"""
# Can add a quick gTTS test here if needed, but might slow down health check
# try:
# gTTS(text='test', lang='en').save('test.mp3') # Dummy generation
# os.remove('test.mp3')
# except Exception as e:
# return {"status": "unhealthy", "reason": f"gTTS basic test failed: {e}"}
return {"status": "ok"}
# --- Root Endpoint (Optional Information) ---
@app.get("/", tags=["System"], summary="API Information")
async def read_root():
"""
Provides basic information about the API.
"""
return {
"message": "Welcome to the gTTS API Service!",
"tts_engine": "gTTS (Google Text-to-Speech)",
"tts_endpoint": "/api/tts",
"health_endpoint": "/health",
"expected_request_body": {"text": "string", "lang": "string (optional, default 'en')"},
"response_content_type": "audio/mpeg",
"documentation": "/docs" # Link to FastAPI auto-generated docs
}
# --- How to Run Locally (for testing) ---
# if __name__ == "__main__":
# import uvicorn
# uvicorn.run("app:app", host="127.0.0.1", port=8000, reload=True) |