Spaces:

Luigi
/

Streaming-Zipformer

Running

File size: 4,361 Bytes

2318eae
 
 
a1d489d
0d6899b
516e9a4
2318eae
 
 
 
 
 
 
 
 
 
 
 
 
1d28d11
2318eae
1d28d11
2318eae
454a10d
 
7c3f2af
 
2318eae
 
7c3f2af
1d28d11
 
a1d489d
1d28d11
a1d489d
 
1d28d11
7c3f2af
 
 
0d6899b
1d28d11
 
 
ab74fc2
7c3f2af
1d28d11
454a10d
ab74fc2
 
454a10d
 
 
ab74fc2
 
 
 
 
 
 
 
 
 
 
 
454a10d
 
 
 
 
 
 
7c3f2af
1d28d11
 
 
b7b0486
1d28d11
 
b7b0486
1d28d11
 
 
a1d489d
7c3f2af
b7b0486
1d28d11
 
7c3f2af
1d28d11
 
b7b0486
1d28d11
7c3f2af
 
a1d489d
7c3f2af
 
1d28d11
516e9a4

from fastapi import FastAPI, WebSocket
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse
from app.asr_worker import create_recognizer, stream_audio
import json
from starlette.websockets import WebSocketDisconnect

app = FastAPI()

app.mount("/static", StaticFiles(directory="app/static"), name="static")

@app.get("/")
async def root():
    with open("app/static/index.html") as f:
        return HTMLResponse(f.read())


@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
    print("[DEBUG main] ▶ Attempting to accept WebSocket…")
    await websocket.accept()
    print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!")

    recognizer = None
    stream = None
    orig_sr = 48000  # default fallback

    try:
        while True:
            data = await websocket.receive()
            kind = data.get("type")

            # Handle config messages
            if kind not in ("websocket.receive", "websocket.receive_bytes"):
                            print(f"[DEBUG main] Received control/frame: {data}")
                            continue
            if kind == "websocket.receive" and "text" in data:
                raw = data["text"]
                try:
                    config_msg = json.loads(raw)
                except Exception as e:
                    print(f"[ERROR main] JSON parse failed: {e}")
                    continue
                if config_msg.get("type") == "config":
                    # 1) sample rate
                    orig_sr = int(config_msg["sampleRate"])
                    print(f"[INFO main] Set original sample rate to {orig_sr}")

                    # 2) model & precision
                    model_id  = config_msg.get("model")
                    precision = config_msg.get("precision")
                    print(f"[INFO main] Selected model: {model_id}, precision: {precision}")

                    # 3) hotwords & boost score
                    hotwords      = config_msg.get("hotwords", [])
                    hotwords_score = float(config_msg.get("hotwordsScore", 0.0))
                    print(f"[INFO main] Hotwords: {hotwords}, score: {hotwords_score}")

                    # 4) create recognizer with biasing
                    recognizer = create_recognizer(
                        model_id,
                        precision,
                        hotwords=hotwords,
                        hotwords_score=hotwords_score
                    )
                    stream = recognizer.create_stream()
                    print("[INFO main] WebSocket connection accepted; created a streaming context.")
                continue

            # Don't process audio until after config
            if recognizer is None or stream is None:
                continue

            # If it’s a text payload but with bytes (some FastAPI versions put audio under 'text'!)  
            if kind == "websocket.receive" and "bytes" in data:
                raw_audio = data["bytes"]
                # print(f"[INFO main] (text+bytes) Received audio chunk: {len(raw_audio)} bytes")
                result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
                vol_to_send = min(rms * 20.0, 1.0)
                # print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
                await websocket.send_json({"partial": result, "volume": vol_to_send})
                continue

            elif kind == "websocket.receive_bytes":
                raw_audio = data["bytes"]
                # print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes")

                # This will also print its own debug info (see asr_worker.py)
                result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)

                vol_to_send = min(rms * 20.0, 1.0)
                # print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")

                await websocket.send_json({
                    "partial": result,
                    "volume": min(rms * 20.0, 1.0)
                })
    except Exception as e:
        print(f"[ERROR main] Unexpected exception: {e}")
        try:
            await websocket.close()
        except:
            pass
        print("[INFO main] WebSocket closed, cleanup complete.")