Spaces:
Running
Running
auto-detect mic sample rate
Browse files- app/asr_worker.py +2 -2
- app/main.py +25 -28
- app/static/index.html +19 -10
app/asr_worker.py
CHANGED
@@ -25,12 +25,12 @@ def create_recognizer():
|
|
25 |
decoding_method="greedy_search"
|
26 |
)
|
27 |
|
28 |
-
def stream_audio(raw_pcm_bytes, stream, recognizer):
|
29 |
audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
|
30 |
if audio.size == 0:
|
31 |
return "", 0.0
|
32 |
|
33 |
-
resampled = resample_audio(audio,
|
34 |
rms = float(np.sqrt(np.mean(resampled ** 2)))
|
35 |
|
36 |
stream.accept_waveform(16000, resampled)
|
|
|
25 |
decoding_method="greedy_search"
|
26 |
)
|
27 |
|
28 |
+
def stream_audio(raw_pcm_bytes, stream, recognizer, orig_sr):
|
29 |
audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
|
30 |
if audio.size == 0:
|
31 |
return "", 0.0
|
32 |
|
33 |
+
resampled = resample_audio(audio, orig_sr, 16000)
|
34 |
rms = float(np.sqrt(np.mean(resampled ** 2)))
|
35 |
|
36 |
stream.accept_waveform(16000, resampled)
|
app/main.py
CHANGED
@@ -5,27 +5,6 @@ from app.asr_worker import create_recognizer, stream_audio, finalize_stream
|
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
8 |
-
@app.websocket("/ws")
|
9 |
-
async def websocket_endpoint(websocket: WebSocket):
|
10 |
-
await websocket.accept()
|
11 |
-
print("[INFO] WebSocket connection accepted.")
|
12 |
-
stream = recognizer.create_stream()
|
13 |
-
|
14 |
-
try:
|
15 |
-
while True:
|
16 |
-
data = await websocket.receive_bytes()
|
17 |
-
print(f"[DEBUG] Received {len(data)} bytes")
|
18 |
-
result, rms = stream_audio(data, stream, recognizer)
|
19 |
-
await websocket.send_json({
|
20 |
-
"partial": result,
|
21 |
-
"volume": min(rms * 5.0, 1.0)
|
22 |
-
})
|
23 |
-
except Exception as e:
|
24 |
-
print(f"[ERROR] {e}")
|
25 |
-
final = finalize_stream(stream, recognizer)
|
26 |
-
await websocket.send_json({"final": final})
|
27 |
-
await websocket.close()
|
28 |
-
|
29 |
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
30 |
|
31 |
recognizer = create_recognizer()
|
@@ -41,15 +20,33 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
41 |
await websocket.accept()
|
42 |
stream = recognizer.create_stream()
|
43 |
|
|
|
|
|
|
|
44 |
try:
|
45 |
while True:
|
46 |
-
data = await websocket.
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
final = finalize_stream(stream, recognizer)
|
54 |
await websocket.send_json({"final": final})
|
55 |
await websocket.close()
|
|
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
9 |
|
10 |
recognizer = create_recognizer()
|
|
|
20 |
await websocket.accept()
|
21 |
stream = recognizer.create_stream()
|
22 |
|
23 |
+
orig_sr = 48000 # default fallback
|
24 |
+
print("[INFO] WebSocket connection accepted.")
|
25 |
+
|
26 |
try:
|
27 |
while True:
|
28 |
+
data = await websocket.receive()
|
29 |
+
if isinstance(data, dict) and data.get("type") == "websocket.receive":
|
30 |
+
raw = data["text"]
|
31 |
+
config_msg = None
|
32 |
+
try:
|
33 |
+
config_msg = json.loads(raw)
|
34 |
+
except Exception:
|
35 |
+
pass
|
36 |
+
if config_msg and config_msg.get("type") == "config":
|
37 |
+
orig_sr = int(config_msg["sampleRate"])
|
38 |
+
print(f"[INFO] Set original sample rate to {orig_sr}")
|
39 |
+
continue
|
40 |
+
|
41 |
+
elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes":
|
42 |
+
raw_audio = data["bytes"]
|
43 |
+
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
|
44 |
+
await websocket.send_json({
|
45 |
+
"partial": result,
|
46 |
+
"volume": min(rms * 5.0, 1.0)
|
47 |
+
})
|
48 |
+
except Exception as e:
|
49 |
+
print(f"[ERROR] {e}")
|
50 |
final = finalize_stream(stream, recognizer)
|
51 |
await websocket.send_json({"final": final})
|
52 |
await websocket.close()
|
app/static/index.html
CHANGED
@@ -77,13 +77,21 @@
|
|
77 |
</div>
|
78 |
|
79 |
<script>
|
|
|
80 |
const ws = new WebSocket("wss://" + location.host + "/ws");
|
|
|
81 |
const vol = document.getElementById("vol");
|
82 |
const partial = document.getElementById("partial");
|
83 |
const finalText = document.getElementById("final");
|
84 |
|
85 |
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
|
86 |
const context = new AudioContext();
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
const source = context.createMediaStreamSource(stream);
|
88 |
const processor = context.createScriptProcessor(4096, 1, 1);
|
89 |
source.connect(processor);
|
@@ -93,17 +101,18 @@
|
|
93 |
const input = e.inputBuffer.getChannelData(0);
|
94 |
ws.send(new Float32Array(input).buffer);
|
95 |
};
|
96 |
-
});
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
107 |
</script>
|
|
|
108 |
</body>
|
109 |
</html>
|
|
|
77 |
</div>
|
78 |
|
79 |
<script>
|
80 |
+
let orig_sample_rate;
|
81 |
const ws = new WebSocket("wss://" + location.host + "/ws");
|
82 |
+
|
83 |
const vol = document.getElementById("vol");
|
84 |
const partial = document.getElementById("partial");
|
85 |
const finalText = document.getElementById("final");
|
86 |
|
87 |
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
|
88 |
const context = new AudioContext();
|
89 |
+
orig_sample_rate = context.sampleRate;
|
90 |
+
|
91 |
+
ws.onopen = () => {
|
92 |
+
ws.send(JSON.stringify({ type: "config", sampleRate: orig_sample_rate }));
|
93 |
+
};
|
94 |
+
|
95 |
const source = context.createMediaStreamSource(stream);
|
96 |
const processor = context.createScriptProcessor(4096, 1, 1);
|
97 |
source.connect(processor);
|
|
|
101 |
const input = e.inputBuffer.getChannelData(0);
|
102 |
ws.send(new Float32Array(input).buffer);
|
103 |
};
|
|
|
104 |
|
105 |
+
ws.onmessage = e => {
|
106 |
+
const msg = JSON.parse(e.data);
|
107 |
+
if (msg.partial) {
|
108 |
+
partial.textContent = msg.partial;
|
109 |
+
vol.value = msg.volume;
|
110 |
+
} else if (msg.final) {
|
111 |
+
finalText.textContent = msg.final;
|
112 |
+
}
|
113 |
+
};
|
114 |
+
});
|
115 |
</script>
|
116 |
+
|
117 |
</body>
|
118 |
</html>
|