Spaces:
Sleeping
Sleeping
File size: 6,214 Bytes
1b567fa 5efbc82 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 5efbc82 9a88d9c 1b567fa 5efbc82 1b567fa 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 5efbc82 9a88d9c 9acb9c3 5efbc82 1b567fa 9acb9c3 1b567fa 9acb9c3 9a88d9c 9acb9c3 1b567fa 9acb9c3 1b567fa 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c 9acb9c3 9a88d9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# Import configuration first to setup environment
import app_config
from fastapi import FastAPI, HTTPException, Form
from fastapi.responses import FileResponse
from pydantic import BaseModel
from kokoro import KPipeline
import soundfile as sf
import torch
import os
import tempfile
import uuid
import logging
from typing import Optional
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Kokoro TTS API", description="Text-to-Speech API using Kokoro", version="1.0.0")
class TTSRequest(BaseModel):
text: str
voice: str = "af_heart"
lang_code: str = "a"
class KokoroTTSService:
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {self.device}")
if app_config.is_hf_spaces():
logger.info("Running on Hugging Face Spaces")
try:
# Initialize Kokoro pipeline following the working example pattern
logger.info("Initializing Kokoro TTS pipeline...")
self.pipeline = KPipeline(lang_code='a')
logger.info("Kokoro TTS pipeline loaded successfully")
except Exception as e:
logger.error(f"Failed to load Kokoro TTS pipeline: {e}")
raise e
def generate_speech(self, text: str, voice: str = "af_heart", lang_code: str = "a") -> str:
"""Generate speech and return the path to the output file"""
try:
# Create a unique filename for the output
output_filename = f"kokoro_output_{uuid.uuid4().hex}.wav"
output_path = os.path.join(app_config.get_temp_dir(), output_filename)
# Update pipeline language if different
if self.pipeline.lang_code != lang_code:
logger.info(f"Switching language from {self.pipeline.lang_code} to {lang_code}")
self.pipeline = KPipeline(lang_code=lang_code)
# Generate speech using Kokoro (following the working example pattern)
generator = self.pipeline(text, voice=voice)
# Get the first (and typically only) audio output
for i, (gs, ps, audio) in enumerate(generator):
logger.info(f"Generated audio segment {i}: gs={gs}, ps={ps}")
# Save the audio to file
sf.write(output_path, audio, 24000)
break # Take the first generated audio
return output_path
except Exception as e:
logger.error(f"Error generating speech: {e}")
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
def get_available_voices(self):
"""Return list of available voices"""
# Extended list based on the working example
return [
"af_heart", "af_bella", "af_nicole", "af_aoede", "af_kore",
"af_sarah", "af_nova", "af_sky", "af_alloy", "af_jessica", "af_river",
"am_michael", "am_fenrir", "am_puck", "am_echo", "am_eric",
"am_liam", "am_onyx", "am_santa", "am_adam",
"bf_emma", "bf_isabella", "bf_alice", "bf_lily",
"bm_george", "bm_fable", "bm_lewis", "bm_daniel"
]
# Initialize Kokoro TTS service
tts_service = KokoroTTSService()
@app.get("/")
async def root():
return {"message": "Kokoro TTS API is running", "status": "healthy"}
@app.get("/health")
async def health_check():
return {"status": "healthy", "device": tts_service.device}
@app.get("/voices")
async def get_voices():
"""Get list of available voices"""
return {"voices": tts_service.get_available_voices()}
@app.post("/tts")
async def text_to_speech(
text: str = Form(...),
voice: str = Form("af_heart"),
lang_code: str = Form("a")
):
"""
Convert text to speech using Kokoro TTS
- **text**: The text to convert to speech
- **voice**: Voice to use (default: "af_heart")
- **lang_code**: Language code (default: "a" for auto-detect)
"""
if not text.strip():
raise HTTPException(status_code=400, detail="Text cannot be empty")
# Validate voice
available_voices = tts_service.get_available_voices()
if voice not in available_voices:
raise HTTPException(
status_code=400,
detail=f"Voice '{voice}' not available. Available voices: {available_voices}"
)
try:
# Generate speech
output_path = tts_service.generate_speech(text, voice, lang_code)
# Return the generated audio file
return FileResponse(
output_path,
media_type="audio/wav",
filename=f"kokoro_tts_{voice}_{uuid.uuid4().hex}.wav",
headers={"Content-Disposition": "attachment"}
)
except Exception as e:
logger.error(f"Error in TTS endpoint: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/tts-json")
async def text_to_speech_json(request: TTSRequest):
"""
Convert text to speech using JSON request body
- **request**: TTSRequest containing text, voice, and lang_code
"""
if not request.text.strip():
raise HTTPException(status_code=400, detail="Text cannot be empty")
# Validate voice
available_voices = tts_service.get_available_voices()
if request.voice not in available_voices:
raise HTTPException(
status_code=400,
detail=f"Voice '{request.voice}' not available. Available voices: {available_voices}"
)
try:
# Generate speech
output_path = tts_service.generate_speech(request.text, request.voice, request.lang_code)
# Return the generated audio file
return FileResponse(
output_path,
media_type="audio/wav",
filename=f"kokoro_tts_{request.voice}_{uuid.uuid4().hex}.wav",
headers={"Content-Disposition": "attachment"}
)
except Exception as e:
logger.error(f"Error in TTS JSON endpoint: {e}")
raise HTTPException(status_code=500, detail=str(e)) |