Spaces:
Sleeping
Sleeping
File size: 1,831 Bytes
e3f5ff0 ff704b5 e3f5ff0 a3044d1 e3f5ff0 ff704b5 e3f5ff0 fce7c66 e3f5ff0 fce7c66 e3f5ff0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from fastapi import FastAPI, Response
from fastapi.responses import FileResponse
from kokoro import KPipeline
import soundfile as sf
import os
app = FastAPI()
# Initialize pipeline once at startup
pipeline = KPipeline(lang_code='a')
@app.post("/generate")
async def generate_audio(text: str, voice: str = "af_heart", speed: float = 1.0):
# Generate audio
generator = pipeline(
text,
voice=voice,
speed=speed,
split_pattern=r'\n+'
)
# # Save first segment only for demo
# for i, (gs, ps, audio) in enumerate(generator):
# sf.write(f"output_{i}.wav", audio, 24000)
# return FileResponse(
# f"output_{i}.wav",
# media_type="audio/wav",
# filename="output.wav"
# )
# return Response("No audio generated", status_code=400)
# Process only the first segment for demo
for i, (gs, ps, audio) in enumerate(generator):
# Convert to 16-bit PCM
# Ensure the audio is in the range [-1, 1] first
audio = np.clip(audio, -1, 1)
# Convert to 16-bit signed integers
pcm_data = (audio * 32767).astype(np.int16)
# Convert to bytes, ensuring little-endian
raw_audio = pcm_data.tobytes(order='C')
# Return PCM data with appropriate headers
return Response(
content=raw_audio,
media_type="audio/l16", # Linear PCM
headers={
"Content-Disposition": f'attachment; filename="output.pcm"',
"X-Sample-Rate": "24000",
"X-Bits-Per-Sample": "16",
"X-Channels": "1",
"X-Encoding": "signed-integer",
"X-Endian": "little"
}
)
return Response("No audio generated", status_code=400) |