Spaces:
Running
Running
from fastapi import FastAPI, WebSocket | |
from fastapi.staticfiles import StaticFiles | |
from fastapi.responses import HTMLResponse | |
from app.asr_worker import create_recognizer, stream_audio, finalize_stream | |
import json | |
from starlette.websockets import WebSocketDisconnect | |
app = FastAPI() | |
app.mount("/static", StaticFiles(directory="app/static"), name="static") | |
async def root(): | |
with open("app/static/index.html") as f: | |
return HTMLResponse(f.read()) | |
async def websocket_endpoint(websocket: WebSocket): | |
print("[DEBUG main] ▶ Attempting to accept WebSocket…") | |
await websocket.accept() | |
print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!") | |
recognizer = None | |
stream = None | |
orig_sr = 48000 # default fallback | |
try: | |
while True: | |
data = await websocket.receive() | |
kind = data.get("type") | |
# Handle control frames | |
if kind not in ("websocket.receive", "websocket.receive_bytes"): | |
print(f"[DEBUG main] Received control/frame: {data}") | |
if kind == "websocket.disconnect": | |
# On client disconnect, flush final transcript if possible | |
if stream and recognizer: | |
print(f"[INFO main] Client disconnected (code={data.get('code')}). Flushing final transcript...") | |
final = finalize_stream(stream, recognizer) | |
try: | |
await websocket.send_json({"final": final}) | |
except (WebSocketDisconnect, RuntimeError): | |
pass | |
break | |
continue | |
# Handle text (config) frame | |
if kind == "websocket.receive" and "text" in data: | |
raw = data["text"] | |
try: | |
config_msg = json.loads(raw) | |
except Exception as e: | |
print(f"[ERROR main] JSON parse failed: {e}") | |
continue | |
if config_msg.get("type") == "config": | |
orig_sr = int(config_msg["sampleRate"]) | |
print(f"[INFO main] Set original sample rate to {orig_sr}") | |
# New: dynamic model & precision | |
model_id = config_msg.get("model") | |
precision = config_msg.get("precision") | |
print(f"[INFO main] Selected model: {model_id}, precision: {precision}") | |
recognizer = create_recognizer(model_id, precision) | |
stream = recognizer.create_stream() | |
print("[INFO main] WebSocket connection accepted; created a streaming context.") | |
continue | |
# Don't process audio until after config | |
if recognizer is None or stream is None: | |
continue | |
# If it’s a text payload but with bytes (some FastAPI versions put audio under 'text'!) | |
if kind == "websocket.receive" and "bytes" in data: | |
raw_audio = data["bytes"] | |
print(f"[INFO main] (text+bytes) Received audio chunk: {len(raw_audio)} bytes") | |
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr) | |
vol_to_send = min(rms * 20.0, 1.0) | |
print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}") | |
await websocket.send_json({"partial": result, "volume": vol_to_send}) | |
continue | |
elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes": | |
raw_audio = data["bytes"] | |
print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes") | |
# This will also print its own debug info (see asr_worker.py) | |
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr) | |
vol_to_send = min(rms * 20.0, 1.0) | |
print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}") | |
await websocket.send_json({ | |
"partial": result, | |
"volume": vol_to_send | |
}) | |
except Exception as e: | |
print(f"[ERROR main] Unexpected exception: {e}") | |
if stream and recognizer: | |
final = finalize_stream(stream, recognizer) | |
try: | |
await websocket.send_json({"final": final}) | |
except (WebSocketDisconnect, RuntimeError): | |
pass | |
# Ensure connection is closed | |
try: | |
await websocket.close() | |
except: | |
pass | |
print("[INFO main] WebSocket closed, cleanup complete.") | |