Spaces:

Luigi
/

Streaming-Zipformer

Running

File size: 4,756 Bytes

2318eae
 
 
 
0d6899b
516e9a4
2318eae
 
 
 
 
 
 
 
 
 
 
 
 
1d28d11
2318eae
1d28d11
2318eae
454a10d
 
7c3f2af
 
2318eae
 
7c3f2af
1d28d11
 
454a10d
1d28d11
 
 
454a10d
 
 
 
516e9a4
 
 
 
1d28d11
 
 
 
 
7c3f2af
 
 
0d6899b
1d28d11
 
 
7c3f2af
1d28d11
454a10d
 
 
 
 
 
 
 
 
 
 
 
 
 
7c3f2af
1d28d11
 
 
 
 
 
 
 
 
 
7c3f2af
 
1d28d11
 
 
7c3f2af
1d28d11
 
 
 
7c3f2af
 
1d28d11
7c3f2af
 
1d28d11
454a10d
 
516e9a4

from fastapi import FastAPI, WebSocket
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse
from app.asr_worker import create_recognizer, stream_audio, finalize_stream
import json
from starlette.websockets import WebSocketDisconnect

app = FastAPI()

app.mount("/static", StaticFiles(directory="app/static"), name="static")

@app.get("/")
async def root():
    with open("app/static/index.html") as f:
        return HTMLResponse(f.read())


@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
    print("[DEBUG main] ▶ Attempting to accept WebSocket…")
    await websocket.accept()
    print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!")

    recognizer = None
    stream = None
    orig_sr = 48000  # default fallback

    try:
        while True:
            data = await websocket.receive()
            kind = data.get("type")

            # Handle control frames
            if kind not in ("websocket.receive", "websocket.receive_bytes"):
                print(f"[DEBUG main] Received control/frame: {data}")
                if kind == "websocket.disconnect":
                    # On client disconnect, flush final transcript if possible
                    if stream and recognizer:
                        print(f"[INFO main] Client disconnected (code={data.get('code')}). Flushing final transcript...")
                        final = finalize_stream(stream, recognizer)
                        try:
                            await websocket.send_json({"final": final})
                        except (WebSocketDisconnect, RuntimeError):
                            pass
                    break
                continue

            # Handle text (config) frame
            if kind == "websocket.receive" and "text" in data:
                raw = data["text"]
                try:
                    config_msg = json.loads(raw)
                except Exception as e:
                    print(f"[ERROR main] JSON parse failed: {e}")
                    continue
                if config_msg.get("type") == "config":
                    orig_sr = int(config_msg["sampleRate"])
                    print(f"[INFO main] Set original sample rate to {orig_sr}")

                    # New: dynamic model & precision
                    model_id = config_msg.get("model")
                    precision = config_msg.get("precision")
                    print(f"[INFO main] Selected model: {model_id}, precision: {precision}")

                    recognizer = create_recognizer(model_id, precision)
                    stream = recognizer.create_stream()
                    print("[INFO main] WebSocket connection accepted; created a streaming context.")
                continue

            # Don't process audio until after config
            if recognizer is None or stream is None:
                continue

            # If it’s a text payload but with bytes (some FastAPI versions put audio under 'text'!)  
            if kind == "websocket.receive" and "bytes" in data:
                raw_audio = data["bytes"]
                print(f"[INFO main] (text+bytes) Received audio chunk: {len(raw_audio)} bytes")
                result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
                vol_to_send = min(rms * 20.0, 1.0)
                print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
                await websocket.send_json({"partial": result, "volume": vol_to_send})
                continue

            elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes":
                raw_audio = data["bytes"]
                print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes")

                # This will also print its own debug info (see asr_worker.py)
                result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)

                vol_to_send = min(rms * 20.0, 1.0)
                print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")

                await websocket.send_json({
                    "partial": result,
                    "volume": vol_to_send
                })
    except Exception as e:
        print(f"[ERROR main] Unexpected exception: {e}")
        if stream and recognizer:
            final = finalize_stream(stream, recognizer)
            try:
                await websocket.send_json({"final": final})
            except (WebSocketDisconnect, RuntimeError):
                pass
        # Ensure connection is closed
        try:
            await websocket.close()
        except:
            pass
        print("[INFO main] WebSocket closed, cleanup complete.")