from fastapi import FastAPI, HTTPException from fastapi.responses import FileResponse from kokoro import KPipeline import soundfile as sf import os import tempfile # Configure cache directories before initializing pipeline os.environ["TRANSFORMERS_CACHE"] = "/app/cache" os.environ["HF_HOME"] = "/app/cache" os.environ["XDG_CACHE_HOME"] = "/app/cache" app = FastAPI(title="Text-to-Speech Converter") # Initialize pipeline with explicit cache settings pipeline = KPipeline(lang_code='a') @app.post("/generate_audio/") async def generate_audio(text: str = None): if not text: raise HTTPException(status_code=400, detail="No text provided") try: # Create temporary directory with tempfile.TemporaryDirectory() as tmpdir: # Generate audio generator = pipeline( text, voice='af_heart', speed=1, split_pattern=r'\n+' ) # Process first audio segment only (modify as needed) i, (gs, ps, audio) = next(enumerate(generator)) # Save to temporary file output_path = f"{tmpdir}/output.wav" sf.write(output_path, audio, 24000) return FileResponse( output_path, media_type='audio/wav', filename="generated_audio.wav" ) except Exception as e: raise HTTPException(status_code=500, detail=str(e))