File size: 5,206 Bytes
6dde081
 
dc46bfc
6dde081
dc46bfc
 
 
 
6dde081
 
 
 
 
 
 
dc46bfc
 
 
 
6dde081
 
 
dc46bfc
 
 
6dde081
 
 
 
 
 
dc46bfc
 
 
6dde081
 
dc46bfc
 
6dde081
dc46bfc
 
6dde081
 
dc46bfc
6dde081
 
 
dc46bfc
 
6dde081
 
dc46bfc
 
 
6dde081
dc46bfc
 
6dde081
 
dc46bfc
6dde081
 
 
dc46bfc
 
 
6dde081
dc46bfc
 
 
 
 
6dde081
 
dc46bfc
6dde081
dc46bfc
 
 
 
 
6dde081
 
dc46bfc
 
 
 
 
 
 
6dde081
dc46bfc
 
6dde081
 
 
 
 
 
dc46bfc
6dde081
dc46bfc
 
 
 
 
 
 
6dde081
 
 
 
 
 
 
 
dc46bfc
 
6dde081
 
dc46bfc
 
6dde081
 
 
 
 
 
dc46bfc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import io
import logging
import time

from fastapi import FastAPI, HTTPException, Body, Response
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field # Field for adding validation/defaults
from gtts import gTTS, gTTSError

# --- Configuration ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- Pydantic Model for Request Body ---
class TTSRequest(BaseModel):
    text: str = Field(..., min_length=1, description="The text to be converted to speech.")
    lang: str = Field("en", description="Language code for the speech (e.g., 'en', 'es', 'fr'). See gTTS documentation for supported languages.")
    # Optional: Add tld if you need specific accents tied to Google domains
    # tld: str = Field("com", description="Top-level domain for Google TTS endpoint (e.g., 'com', 'co.uk', 'com.au')")

# --- Initialize FastAPI App ---
app = FastAPI(
    title="gTTS API Service",
    description="A simple API service that uses gTTS (Google Text-to-Speech) to convert text into speech (MP3 audio).",
    version="1.0.0",
)

# --- API Endpoint for Text-to-Speech ---
@app.post(
    "/api/tts",
    tags=["TTS"],
    summary="Generate Speech using gTTS",
    description="""Send a JSON object with 'text' and optionally 'lang' fields.
    Returns the generated speech as an MP3 audio stream.""",
    responses={
        200: {
            "content": {"audio/mpeg": {}}, # MP3 content type
            "description": "Successful response returning the MP3 audio stream.",
        },
        400: {"description": "Bad Request (e.g., empty text, invalid language)"},
        500: {"description": "Internal Server Error (e.g., gTTS failed)"},
    },
)
async def generate_speech_gtts_api(
    tts_request: TTSRequest = Body(...)
):
    """
    Receives text and language via POST request, uses gTTS to generate
    speech, and returns the MP3 audio directly as a stream.
    """
    text = tts_request.text
    lang = tts_request.lang
    # tld = tts_request.tld # Uncomment if using tld

    if not text or not text.strip():
        # The pydantic model validation (min_length=1) should catch this,
        # but belt-and-suspenders approach is fine.
        raise HTTPException(status_code=400, detail="Input text cannot be empty.")

    logger.info(f"Received gTTS request: lang='{lang}', text='{text[:50]}...'")
    start_synth_time = time.time()

    try:
        # --- Generate Audio using gTTS ---
        # Create gTTS object
        tts = gTTS(text=text, lang=lang, slow=False) # Add tld=tld if using

        # --- Prepare Audio for Streaming ---
        # Use an in-memory buffer (avoids temporary files)
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0) # Rewind the buffer to the beginning for reading

        synthesis_time = time.time() - start_synth_time
        logger.info(f"gTTS audio generated in {synthesis_time:.2f} seconds.")

        # --- Return Streaming Response ---
        return StreamingResponse(
            mp3_fp,
            media_type="audio/mpeg", # Standard MIME type for MP3
            headers={'Content-Disposition': 'attachment; filename="speech.mp3"'} # Suggest filename
        )

    except gTTSError as e:
        logger.error(f"gTTS Error: {e}", exc_info=True)
        # Check for common errors like invalid language
        if "Language not supported" in str(e):
             raise HTTPException(status_code=400, detail=f"Language '{lang}' not supported by gTTS. Error: {e}")
        else:
             raise HTTPException(status_code=500, detail=f"gTTS failed to generate speech. Error: {e}")
    except Exception as e:
        logger.error(f"An unexpected error occurred during speech generation: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"An unexpected error occurred. Error: {str(e)}")


# --- Health Check Endpoint (Good Practice) ---
@app.get("/health", tags=["System"], summary="Check API Health")
async def health_check():
    """
    Simple health check endpoint. Returns status ok if the service is running.
    """
    # Can add a quick gTTS test here if needed, but might slow down health check
    # try:
    #     gTTS(text='test', lang='en').save('test.mp3') # Dummy generation
    #     os.remove('test.mp3')
    # except Exception as e:
    #     return {"status": "unhealthy", "reason": f"gTTS basic test failed: {e}"}
    return {"status": "ok"}

# --- Root Endpoint (Optional Information) ---
@app.get("/", tags=["System"], summary="API Information")
async def read_root():
    """
    Provides basic information about the API.
    """
    return {
        "message": "Welcome to the gTTS API Service!",
        "tts_engine": "gTTS (Google Text-to-Speech)",
        "tts_endpoint": "/api/tts",
        "health_endpoint": "/health",
        "expected_request_body": {"text": "string", "lang": "string (optional, default 'en')"},
        "response_content_type": "audio/mpeg",
        "documentation": "/docs" # Link to FastAPI auto-generated docs
    }

# --- How to Run Locally (for testing) ---
# if __name__ == "__main__":
#     import uvicorn
#     uvicorn.run("app:app", host="127.0.0.1", port=8000, reload=True)