Luigi commited on
Commit
7c3f2af
·
1 Parent(s): a6143a5

auto-detect mic sample rate

Browse files
Files changed (3) hide show
  1. app/asr_worker.py +2 -2
  2. app/main.py +25 -28
  3. app/static/index.html +19 -10
app/asr_worker.py CHANGED
@@ -25,12 +25,12 @@ def create_recognizer():
25
  decoding_method="greedy_search"
26
  )
27
 
28
- def stream_audio(raw_pcm_bytes, stream, recognizer):
29
  audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
30
  if audio.size == 0:
31
  return "", 0.0
32
 
33
- resampled = resample_audio(audio, 48000, 16000)
34
  rms = float(np.sqrt(np.mean(resampled ** 2)))
35
 
36
  stream.accept_waveform(16000, resampled)
 
25
  decoding_method="greedy_search"
26
  )
27
 
28
+ def stream_audio(raw_pcm_bytes, stream, recognizer, orig_sr):
29
  audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
30
  if audio.size == 0:
31
  return "", 0.0
32
 
33
+ resampled = resample_audio(audio, orig_sr, 16000)
34
  rms = float(np.sqrt(np.mean(resampled ** 2)))
35
 
36
  stream.accept_waveform(16000, resampled)
app/main.py CHANGED
@@ -5,27 +5,6 @@ from app.asr_worker import create_recognizer, stream_audio, finalize_stream
5
 
6
  app = FastAPI()
7
 
8
- @app.websocket("/ws")
9
- async def websocket_endpoint(websocket: WebSocket):
10
- await websocket.accept()
11
- print("[INFO] WebSocket connection accepted.")
12
- stream = recognizer.create_stream()
13
-
14
- try:
15
- while True:
16
- data = await websocket.receive_bytes()
17
- print(f"[DEBUG] Received {len(data)} bytes")
18
- result, rms = stream_audio(data, stream, recognizer)
19
- await websocket.send_json({
20
- "partial": result,
21
- "volume": min(rms * 5.0, 1.0)
22
- })
23
- except Exception as e:
24
- print(f"[ERROR] {e}")
25
- final = finalize_stream(stream, recognizer)
26
- await websocket.send_json({"final": final})
27
- await websocket.close()
28
-
29
  app.mount("/static", StaticFiles(directory="app/static"), name="static")
30
 
31
  recognizer = create_recognizer()
@@ -41,15 +20,33 @@ async def websocket_endpoint(websocket: WebSocket):
41
  await websocket.accept()
42
  stream = recognizer.create_stream()
43
 
 
 
 
44
  try:
45
  while True:
46
- data = await websocket.receive_bytes()
47
- result, rms = stream_audio(data, stream, recognizer)
48
- await websocket.send_json({
49
- "partial": result,
50
- "volume": min(rms * 5.0, 1.0)
51
- })
52
- except Exception:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  final = finalize_stream(stream, recognizer)
54
  await websocket.send_json({"final": final})
55
  await websocket.close()
 
5
 
6
  app = FastAPI()
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  app.mount("/static", StaticFiles(directory="app/static"), name="static")
9
 
10
  recognizer = create_recognizer()
 
20
  await websocket.accept()
21
  stream = recognizer.create_stream()
22
 
23
+ orig_sr = 48000 # default fallback
24
+ print("[INFO] WebSocket connection accepted.")
25
+
26
  try:
27
  while True:
28
+ data = await websocket.receive()
29
+ if isinstance(data, dict) and data.get("type") == "websocket.receive":
30
+ raw = data["text"]
31
+ config_msg = None
32
+ try:
33
+ config_msg = json.loads(raw)
34
+ except Exception:
35
+ pass
36
+ if config_msg and config_msg.get("type") == "config":
37
+ orig_sr = int(config_msg["sampleRate"])
38
+ print(f"[INFO] Set original sample rate to {orig_sr}")
39
+ continue
40
+
41
+ elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes":
42
+ raw_audio = data["bytes"]
43
+ result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
44
+ await websocket.send_json({
45
+ "partial": result,
46
+ "volume": min(rms * 5.0, 1.0)
47
+ })
48
+ except Exception as e:
49
+ print(f"[ERROR] {e}")
50
  final = finalize_stream(stream, recognizer)
51
  await websocket.send_json({"final": final})
52
  await websocket.close()
app/static/index.html CHANGED
@@ -77,13 +77,21 @@
77
  </div>
78
 
79
  <script>
 
80
  const ws = new WebSocket("wss://" + location.host + "/ws");
 
81
  const vol = document.getElementById("vol");
82
  const partial = document.getElementById("partial");
83
  const finalText = document.getElementById("final");
84
 
85
  navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
86
  const context = new AudioContext();
 
 
 
 
 
 
87
  const source = context.createMediaStreamSource(stream);
88
  const processor = context.createScriptProcessor(4096, 1, 1);
89
  source.connect(processor);
@@ -93,17 +101,18 @@
93
  const input = e.inputBuffer.getChannelData(0);
94
  ws.send(new Float32Array(input).buffer);
95
  };
96
- });
97
 
98
- ws.onmessage = e => {
99
- const msg = JSON.parse(e.data);
100
- if (msg.partial) {
101
- partial.textContent = msg.partial;
102
- vol.value = msg.volume;
103
- } else if (msg.final) {
104
- finalText.textContent = msg.final;
105
- }
106
- };
 
107
  </script>
 
108
  </body>
109
  </html>
 
77
  </div>
78
 
79
  <script>
80
+ let orig_sample_rate;
81
  const ws = new WebSocket("wss://" + location.host + "/ws");
82
+
83
  const vol = document.getElementById("vol");
84
  const partial = document.getElementById("partial");
85
  const finalText = document.getElementById("final");
86
 
87
  navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
88
  const context = new AudioContext();
89
+ orig_sample_rate = context.sampleRate;
90
+
91
+ ws.onopen = () => {
92
+ ws.send(JSON.stringify({ type: "config", sampleRate: orig_sample_rate }));
93
+ };
94
+
95
  const source = context.createMediaStreamSource(stream);
96
  const processor = context.createScriptProcessor(4096, 1, 1);
97
  source.connect(processor);
 
101
  const input = e.inputBuffer.getChannelData(0);
102
  ws.send(new Float32Array(input).buffer);
103
  };
 
104
 
105
+ ws.onmessage = e => {
106
+ const msg = JSON.parse(e.data);
107
+ if (msg.partial) {
108
+ partial.textContent = msg.partial;
109
+ vol.value = msg.volume;
110
+ } else if (msg.final) {
111
+ finalText.textContent = msg.final;
112
+ }
113
+ };
114
+ });
115
  </script>
116
+
117
  </body>
118
  </html>