Spaces:

Luigi
/

Streaming-Zipformer

Running

App Files Files Community

Luigi commited on Jun 6

Commit

7c3f2af

1 Parent(s): a6143a5

auto-detect mic sample rate

Browse files

Files changed (3) hide show

app/asr_worker.py +2 -2
app/main.py +25 -28
app/static/index.html +19 -10

app/asr_worker.py CHANGED Viewed

@@ -25,12 +25,12 @@ def create_recognizer():
         decoding_method="greedy_search"
     )
-def stream_audio(raw_pcm_bytes, stream, recognizer):
     audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
     if audio.size == 0:
         return "", 0.0
-    resampled = resample_audio(audio, 48000, 16000)
     rms = float(np.sqrt(np.mean(resampled ** 2)))
     stream.accept_waveform(16000, resampled)

         decoding_method="greedy_search"
     )
+def stream_audio(raw_pcm_bytes, stream, recognizer, orig_sr):
     audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
     if audio.size == 0:
         return "", 0.0
+    resampled = resample_audio(audio, orig_sr, 16000)
     rms = float(np.sqrt(np.mean(resampled ** 2)))
     stream.accept_waveform(16000, resampled)

app/main.py CHANGED Viewed

@@ -5,27 +5,6 @@ from app.asr_worker import create_recognizer, stream_audio, finalize_stream
 app = FastAPI()
-@app.websocket("/ws")
-async def websocket_endpoint(websocket: WebSocket):
-    await websocket.accept()
-    print("[INFO] WebSocket connection accepted.")
-    stream = recognizer.create_stream()
-    try:
-        while True:
-            data = await websocket.receive_bytes()
-            print(f"[DEBUG] Received {len(data)} bytes")
-            result, rms = stream_audio(data, stream, recognizer)
-            await websocket.send_json({
-                "partial": result,
-                "volume": min(rms * 5.0, 1.0)
-            })
-    except Exception as e:
-        print(f"[ERROR] {e}")
-        final = finalize_stream(stream, recognizer)
-        await websocket.send_json({"final": final})
-        await websocket.close()
 app.mount("/static", StaticFiles(directory="app/static"), name="static")
 recognizer = create_recognizer()
@@ -41,15 +20,33 @@ async def websocket_endpoint(websocket: WebSocket):
     await websocket.accept()
     stream = recognizer.create_stream()
     try:
         while True:
-            data = await websocket.receive_bytes()
-            result, rms = stream_audio(data, stream, recognizer)
-            await websocket.send_json({
-                "partial": result,
-                "volume": min(rms * 5.0, 1.0)
-            })
-    except Exception:
         final = finalize_stream(stream, recognizer)
         await websocket.send_json({"final": final})
         await websocket.close()

 app = FastAPI()
 app.mount("/static", StaticFiles(directory="app/static"), name="static")
 recognizer = create_recognizer()
     await websocket.accept()
     stream = recognizer.create_stream()
+    orig_sr = 48000  # default fallback
+    print("[INFO] WebSocket connection accepted.")
     try:
         while True:
+            data = await websocket.receive()
+            if isinstance(data, dict) and data.get("type") == "websocket.receive":
+                raw = data["text"]
+                config_msg = None
+                try:
+                    config_msg = json.loads(raw)
+                except Exception:
+                    pass
+                if config_msg and config_msg.get("type") == "config":
+                    orig_sr = int(config_msg["sampleRate"])
+                    print(f"[INFO] Set original sample rate to {orig_sr}")
+                    continue
+            elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes":
+                raw_audio = data["bytes"]
+                result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
+                await websocket.send_json({
+                    "partial": result,
+                    "volume": min(rms * 5.0, 1.0)
+                })
+    except Exception as e:
+        print(f"[ERROR] {e}")
         final = finalize_stream(stream, recognizer)
         await websocket.send_json({"final": final})
         await websocket.close()

app/static/index.html CHANGED Viewed

@@ -77,13 +77,21 @@
   </div>
   <script>
     const ws = new WebSocket("wss://" + location.host + "/ws");
     const vol = document.getElementById("vol");
     const partial = document.getElementById("partial");
     const finalText = document.getElementById("final");
     navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
       const context = new AudioContext();
       const source = context.createMediaStreamSource(stream);
       const processor = context.createScriptProcessor(4096, 1, 1);
       source.connect(processor);
@@ -93,17 +101,18 @@
         const input = e.inputBuffer.getChannelData(0);
         ws.send(new Float32Array(input).buffer);
       };
-    });
-    ws.onmessage = e => {
-      const msg = JSON.parse(e.data);
-      if (msg.partial) {
-        partial.textContent = msg.partial;
-        vol.value = msg.volume;
-      } else if (msg.final) {
-        finalText.textContent = msg.final;
-      }
-    };
   </script>
 </body>
 </html>

   </div>
   <script>
+    let orig_sample_rate;
     const ws = new WebSocket("wss://" + location.host + "/ws");
     const vol = document.getElementById("vol");
     const partial = document.getElementById("partial");
     const finalText = document.getElementById("final");
     navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
       const context = new AudioContext();
+      orig_sample_rate = context.sampleRate;
+      ws.onopen = () => {
+        ws.send(JSON.stringify({ type: "config", sampleRate: orig_sample_rate }));
+      };
       const source = context.createMediaStreamSource(stream);
       const processor = context.createScriptProcessor(4096, 1, 1);
       source.connect(processor);
         const input = e.inputBuffer.getChannelData(0);
         ws.send(new Float32Array(input).buffer);
       };
+      ws.onmessage = e => {
+        const msg = JSON.parse(e.data);
+        if (msg.partial) {
+          partial.textContent = msg.partial;
+          vol.value = msg.volume;
+        } else if (msg.final) {
+          finalText.textContent = msg.final;
+        }
+      };
+    });
   </script>
 </body>
 </html>