File size: 1,515 Bytes
a3044d1
ff704b5
 
 
 
 
 
a3044d1
 
 
 
 
ff704b5
 
a3044d1
ff704b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from kokoro import KPipeline
import soundfile as sf
import os
import tempfile

# Configure cache directories before initializing pipeline
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
os.environ["HF_HOME"] = "/app/cache"
os.environ["XDG_CACHE_HOME"] = "/app/cache"

app = FastAPI(title="Text-to-Speech Converter")

# Initialize pipeline with explicit cache settings
pipeline = KPipeline(lang_code='a')

@app.post("/generate_audio/")
async def generate_audio(text: str = None):
    if not text:
        raise HTTPException(status_code=400, detail="No text provided")
    
    try:
        # Create temporary directory
        with tempfile.TemporaryDirectory() as tmpdir:
            # Generate audio
            generator = pipeline(
                text,
                voice='af_heart',
                speed=1,
                split_pattern=r'\n+'
            )
            
            # Process first audio segment only (modify as needed)
            i, (gs, ps, audio) = next(enumerate(generator))
            
            # Save to temporary file
            output_path = f"{tmpdir}/output.wav"
            sf.write(output_path, audio, 24000)
            
            return FileResponse(
                output_path,
                media_type='audio/wav',
                filename="generated_audio.wav"
            )
            
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))