Spaces:
Running
Running
remove final, as this demo perform asr forever
Browse files- app/asr_worker.py +1 -10
- app/main.py +6 -25
- app/static/index.html +1 -11
app/asr_worker.py
CHANGED
@@ -170,13 +170,4 @@ def stream_audio(raw_pcm_bytes, stream, recognizer, orig_sr):
|
|
170 |
if recognizer.is_ready(stream):
|
171 |
recognizer.decode_streams([stream])
|
172 |
result = recognizer.get_result(stream)
|
173 |
-
return converter.convert(result), rms
|
174 |
-
|
175 |
-
def finalize_stream(stream, recognizer):
|
176 |
-
tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
|
177 |
-
stream.accept_waveform(16000, tail)
|
178 |
-
stream.input_finished()
|
179 |
-
while recognizer.is_ready(stream):
|
180 |
-
recognizer.decode_streams([stream])
|
181 |
-
result = recognizer.get_result(stream)
|
182 |
-
return converter.convert(result)
|
|
|
170 |
if recognizer.is_ready(stream):
|
171 |
recognizer.decode_streams([stream])
|
172 |
result = recognizer.get_result(stream)
|
173 |
+
return converter.convert(result), rms
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from fastapi import FastAPI, WebSocket
|
2 |
from fastapi.staticfiles import StaticFiles
|
3 |
from fastapi.responses import HTMLResponse
|
4 |
-
from app.asr_worker import create_recognizer, stream_audio
|
5 |
import json
|
6 |
from starlette.websockets import WebSocketDisconnect
|
7 |
|
@@ -30,22 +30,10 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
30 |
data = await websocket.receive()
|
31 |
kind = data.get("type")
|
32 |
|
33 |
-
# Handle
|
34 |
if kind not in ("websocket.receive", "websocket.receive_bytes"):
|
35 |
-
|
36 |
-
|
37 |
-
# On client disconnect, flush final transcript if possible
|
38 |
-
if stream and recognizer:
|
39 |
-
print(f"[INFO main] Client disconnected (code={data.get('code')}). Flushing final transcript...")
|
40 |
-
final = finalize_stream(stream, recognizer)
|
41 |
-
try:
|
42 |
-
await websocket.send_json({"final": final})
|
43 |
-
except (WebSocketDisconnect, RuntimeError):
|
44 |
-
pass
|
45 |
-
break
|
46 |
-
continue
|
47 |
-
|
48 |
-
# Handle text (config) frame
|
49 |
if kind == "websocket.receive" and "text" in data:
|
50 |
raw = data["text"]
|
51 |
try:
|
@@ -81,7 +69,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
81 |
await websocket.send_json({"partial": result, "volume": vol_to_send})
|
82 |
continue
|
83 |
|
84 |
-
elif
|
85 |
raw_audio = data["bytes"]
|
86 |
print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes")
|
87 |
|
@@ -93,17 +81,10 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
93 |
|
94 |
await websocket.send_json({
|
95 |
"partial": result,
|
96 |
-
"volume":
|
97 |
})
|
98 |
except Exception as e:
|
99 |
print(f"[ERROR main] Unexpected exception: {e}")
|
100 |
-
if stream and recognizer:
|
101 |
-
final = finalize_stream(stream, recognizer)
|
102 |
-
try:
|
103 |
-
await websocket.send_json({"final": final})
|
104 |
-
except (WebSocketDisconnect, RuntimeError):
|
105 |
-
pass
|
106 |
-
# Ensure connection is closed
|
107 |
try:
|
108 |
await websocket.close()
|
109 |
except:
|
|
|
1 |
from fastapi import FastAPI, WebSocket
|
2 |
from fastapi.staticfiles import StaticFiles
|
3 |
from fastapi.responses import HTMLResponse
|
4 |
+
from app.asr_worker import create_recognizer, stream_audio
|
5 |
import json
|
6 |
from starlette.websockets import WebSocketDisconnect
|
7 |
|
|
|
30 |
data = await websocket.receive()
|
31 |
kind = data.get("type")
|
32 |
|
33 |
+
# Handle config messages
|
34 |
if kind not in ("websocket.receive", "websocket.receive_bytes"):
|
35 |
+
print(f"[DEBUG main] Received control/frame: {data}")
|
36 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
if kind == "websocket.receive" and "text" in data:
|
38 |
raw = data["text"]
|
39 |
try:
|
|
|
69 |
await websocket.send_json({"partial": result, "volume": vol_to_send})
|
70 |
continue
|
71 |
|
72 |
+
elif kind == "websocket.receive_bytes":
|
73 |
raw_audio = data["bytes"]
|
74 |
print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes")
|
75 |
|
|
|
81 |
|
82 |
await websocket.send_json({
|
83 |
"partial": result,
|
84 |
+
"volume": min(rms * 20.0, 1.0)
|
85 |
})
|
86 |
except Exception as e:
|
87 |
print(f"[ERROR main] Unexpected exception: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
try:
|
89 |
await websocket.close()
|
90 |
except:
|
app/static/index.html
CHANGED
@@ -66,11 +66,6 @@
|
|
66 |
color: #353b48;
|
67 |
}
|
68 |
|
69 |
-
#final {
|
70 |
-
font-size: 1.4rem;
|
71 |
-
color: #e84118;
|
72 |
-
}
|
73 |
-
|
74 |
.controls {
|
75 |
display: flex;
|
76 |
gap: 1rem;
|
@@ -132,8 +127,7 @@
|
|
132 |
<progress id="vol" max="1" value="0"></progress>
|
133 |
|
134 |
<div class="output">
|
135 |
-
<div><span class="label">
|
136 |
-
<div><span class="label">Final:</span> <b id="final">...</b></div>
|
137 |
</div>
|
138 |
|
139 |
<script>
|
@@ -157,7 +151,6 @@
|
|
157 |
|
158 |
const vol = document.getElementById("vol");
|
159 |
const partial = document.getElementById("partial");
|
160 |
-
const finalText = document.getElementById("final");
|
161 |
const modelSelect = document.getElementById("modelSelect");
|
162 |
const precisionSelect = document.getElementById("precisionSelect");
|
163 |
const modelLangs = document.getElementById("modelLangs");
|
@@ -218,9 +211,6 @@
|
|
218 |
if (msg.partial) {
|
219 |
partial.textContent = msg.partial;
|
220 |
}
|
221 |
-
if (msg.final) {
|
222 |
-
finalText.textContent = msg.final;
|
223 |
-
}
|
224 |
};
|
225 |
});
|
226 |
</script>
|
|
|
66 |
color: #353b48;
|
67 |
}
|
68 |
|
|
|
|
|
|
|
|
|
|
|
69 |
.controls {
|
70 |
display: flex;
|
71 |
gap: 1rem;
|
|
|
127 |
<progress id="vol" max="1" value="0"></progress>
|
128 |
|
129 |
<div class="output">
|
130 |
+
<div><span class="label">Transcript:</span> <span id="partial">...</span></div>
|
|
|
131 |
</div>
|
132 |
|
133 |
<script>
|
|
|
151 |
|
152 |
const vol = document.getElementById("vol");
|
153 |
const partial = document.getElementById("partial");
|
|
|
154 |
const modelSelect = document.getElementById("modelSelect");
|
155 |
const precisionSelect = document.getElementById("precisionSelect");
|
156 |
const modelLangs = document.getElementById("modelLangs");
|
|
|
211 |
if (msg.partial) {
|
212 |
partial.textContent = msg.partial;
|
213 |
}
|
|
|
|
|
|
|
214 |
};
|
215 |
});
|
216 |
</script>
|