Spaces:

Luigi
/

Streaming-Zipformer

Running

App Files Files Community

Luigi commited on Jun 6

Commit

1d28d11

1 Parent(s): 0d6899b

1. improve type handling 2. add debug message

Browse files

Files changed (2) hide show

app/main.py +44 -9
app/static/index.html +23 -2

app/main.py CHANGED Viewed

@@ -18,36 +18,71 @@ async def root():
 @app.websocket("/ws")
 async def websocket_endpoint(websocket: WebSocket):
     await websocket.accept()
-    stream = recognizer.create_stream()
     orig_sr = 48000  # default fallback
-    print("[INFO] WebSocket connection accepted.")
     try:
         while True:
             data = await websocket.receive()
-            if isinstance(data, dict) and data.get("type") == "websocket.receive":
                 raw = data["text"]
-                config_msg = None
                 try:
                     config_msg = json.loads(raw)
                 except Exception as e:
-                    print(f'Error: {e}')
-                if config_msg and config_msg.get("type") == "config":
                     orig_sr = int(config_msg["sampleRate"])
-                    print(f"[INFO] Set original sample rate to {orig_sr}")
                     continue
             elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes":
                 raw_audio = data["bytes"]
                 result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
                 await websocket.send_json({
                     "partial": result,
-                    "volume": min(rms * 5.0, 1.0)
                 })
     except Exception as e:
-        print(f"[ERROR] {e}")
         final = finalize_stream(stream, recognizer)
         await websocket.send_json({"final": final})
         await websocket.close()

 @app.websocket("/ws")
 async def websocket_endpoint(websocket: WebSocket):
+    print("[DEBUG main] ▶ Attempting to accept WebSocket…")
     await websocket.accept()
+    print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!")
+    # Immediately create a new stream per client
+    stream = recognizer.create_stream()
     orig_sr = 48000  # default fallback
+    print("[INFO main] WebSocket connection accepted; created a streaming context.")
     try:
         while True:
             data = await websocket.receive()
+            kind = data.get("type")
+            # Debug: log any event we don't handle explicitly
+            if kind not in ("websocket.receive", "websocket.receive_bytes"):
+                print(f"[DEBUG main] Received control/frame: {data}")
+                # If client cleanly disconnected, finalize and break
+                if kind == "websocket.disconnect":
+                    print(f"[INFO main] Client disconnected (code={data.get('code')}). Flushing final transcript...")
+                    final = finalize_stream(stream, recognizer)
+                    await websocket.send_json({"final": final})
+                    break
+                continue
+            # Handle text (config) frame
+            if kind == "websocket.receive" and "text" in data:
                 raw = data["text"]
                 try:
                     config_msg = json.loads(raw)
                 except Exception as e:
+                    print(f"[ERROR main] JSON parse failed: {e}")
+                    continue
+                if config_msg.get("type") == "config":
                     orig_sr = int(config_msg["sampleRate"])
+                    print(f"[INFO main] Set original sample rate to {orig_sr}")
                     continue
+            # If it’s a text payload but with bytes (some FastAPI versions put audio under 'text'!)
+            if kind == "websocket.receive" and "bytes" in data:
+                raw_audio = data["bytes"]
+                print(f"[INFO main] (text+bytes) Received audio chunk: {len(raw_audio)} bytes")
+                result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
+                vol_to_send = min(rms * 20.0, 1.0)
+                print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
+                await websocket.send_json({"partial": result, "volume": vol_to_send})
+                continue
             elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes":
                 raw_audio = data["bytes"]
+                print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes")
+                # This will also print its own debug info (see asr_worker.py)
                 result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
+                vol_to_send = min(rms * 20.0, 1.0)
+                print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
                 await websocket.send_json({
                     "partial": result,
+                    "volume": vol_to_send
                 })
     except Exception as e:
+        print(f"[ERROR main] Unexpected exception: {e}")
         final = finalize_stream(stream, recognizer)
         await websocket.send_json({"final": final})
         await websocket.close()
+        print("[INFO main] WebSocket closed, cleanup complete.")

app/static/index.html CHANGED Viewed

@@ -39,6 +39,11 @@
       border-radius: 8px;
       transition: width 0.2s;
     }
     .output {
       width: 90%;
@@ -78,7 +83,8 @@
   <script>
     let orig_sample_rate;
-    const ws = new WebSocket("wss://" + location.host + "/ws");
     const vol = document.getElementById("vol");
     const partial = document.getElementById("partial");
@@ -89,8 +95,16 @@
       orig_sample_rate = context.sampleRate;
       ws.onopen = () => {
         ws.send(JSON.stringify({ type: "config", sampleRate: orig_sample_rate }));
       };
       const source = context.createMediaStreamSource(stream);
       const processor = context.createScriptProcessor(4096, 1, 1);
@@ -99,15 +113,22 @@
       processor.onaudioprocess = e => {
         const input = e.inputBuffer.getChannelData(0);
         ws.send(new Float32Array(input).buffer);
       };
       ws.onmessage = e => {
         const msg = JSON.parse(e.data);
         if (msg.partial) {
           partial.textContent = msg.partial;
-          vol.value = msg.volume;
         } else if (msg.final) {
           finalText.textContent = msg.final;
         }
       };

       border-radius: 8px;
       transition: width 0.2s;
     }
+    #vol::-moz-progress-bar {
+      background-color: #44bd32;
+      border-radius: 8px;
+      transition: width 0.2s;
+    }
     .output {
       width: 90%;
   <script>
     let orig_sample_rate;
+    console.log("[DEBUG client] Attempting to open WebSocket to ws://" + location.host + "/ws");
+    const ws = new WebSocket("ws://" + location.host + "/ws");
     const vol = document.getElementById("vol");
     const partial = document.getElementById("partial");
       orig_sample_rate = context.sampleRate;
       ws.onopen = () => {
+        console.log("[DEBUG client] WebSocket.onopen fired!");
         ws.send(JSON.stringify({ type: "config", sampleRate: orig_sample_rate }));
       };
+      ws.onerror = err => {
+        console.error("[DEBUG client] WebSocket.onerror:", err);
+      };
+      ws.onclose = () => {
+        console.log("[DEBUG client] WebSocket.onclose fired!");
+      };
       const source = context.createMediaStreamSource(stream);
       const processor = context.createScriptProcessor(4096, 1, 1);
       processor.onaudioprocess = e => {
         const input = e.inputBuffer.getChannelData(0);
+        // Log the first few samples so you know it’s not all zeros
+        console.log("[DEBUG client] Sending audio chunk, first5 samples:", input.slice(0,5));
         ws.send(new Float32Array(input).buffer);
       };
       ws.onmessage = e => {
+        console.log("[DEBUG client] Received server message:", e.data);
         const msg = JSON.parse(e.data);
         if (msg.partial) {
           partial.textContent = msg.partial;
+          console.log(`[DEBUG client] Raw volume from server: ${msg.volume.toFixed(5)}`);
+          const amplified = Math.min(msg.volume * 20.0, 1.0);
+          console.log(`[DEBUG client] Amplified (×20): ${amplified.toFixed(3)}`);
+          vol.value = amplified;
         } else if (msg.final) {
+          console.log("[DEBUG client] Final:", msg.final);
           finalText.textContent = msg.final;
         }
       };