Spaces:
Running
Running
1. improve type handling 2. add debug message
Browse files- app/main.py +44 -9
- app/static/index.html +23 -2
app/main.py
CHANGED
@@ -18,36 +18,71 @@ async def root():
|
|
18 |
|
19 |
@app.websocket("/ws")
|
20 |
async def websocket_endpoint(websocket: WebSocket):
|
|
|
21 |
await websocket.accept()
|
22 |
-
|
23 |
|
|
|
|
|
24 |
orig_sr = 48000 # default fallback
|
25 |
-
print("[INFO] WebSocket connection accepted.")
|
26 |
|
27 |
try:
|
28 |
while True:
|
29 |
data = await websocket.receive()
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
raw = data["text"]
|
32 |
-
config_msg = None
|
33 |
try:
|
34 |
config_msg = json.loads(raw)
|
35 |
except Exception as e:
|
36 |
-
print(f
|
37 |
-
|
|
|
38 |
orig_sr = int(config_msg["sampleRate"])
|
39 |
-
print(f"[INFO] Set original sample rate to {orig_sr}")
|
40 |
continue
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes":
|
43 |
raw_audio = data["bytes"]
|
|
|
|
|
|
|
44 |
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
|
|
|
|
|
|
|
|
|
45 |
await websocket.send_json({
|
46 |
"partial": result,
|
47 |
-
"volume":
|
48 |
})
|
49 |
except Exception as e:
|
50 |
-
print(f"[ERROR] {e}")
|
51 |
final = finalize_stream(stream, recognizer)
|
52 |
await websocket.send_json({"final": final})
|
53 |
await websocket.close()
|
|
|
|
18 |
|
19 |
@app.websocket("/ws")
|
20 |
async def websocket_endpoint(websocket: WebSocket):
|
21 |
+
print("[DEBUG main] ▶ Attempting to accept WebSocket…")
|
22 |
await websocket.accept()
|
23 |
+
print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!")
|
24 |
|
25 |
+
# Immediately create a new stream per client
|
26 |
+
stream = recognizer.create_stream()
|
27 |
orig_sr = 48000 # default fallback
|
28 |
+
print("[INFO main] WebSocket connection accepted; created a streaming context.")
|
29 |
|
30 |
try:
|
31 |
while True:
|
32 |
data = await websocket.receive()
|
33 |
+
kind = data.get("type")
|
34 |
+
|
35 |
+
# Debug: log any event we don't handle explicitly
|
36 |
+
if kind not in ("websocket.receive", "websocket.receive_bytes"):
|
37 |
+
print(f"[DEBUG main] Received control/frame: {data}")
|
38 |
+
# If client cleanly disconnected, finalize and break
|
39 |
+
if kind == "websocket.disconnect":
|
40 |
+
print(f"[INFO main] Client disconnected (code={data.get('code')}). Flushing final transcript...")
|
41 |
+
final = finalize_stream(stream, recognizer)
|
42 |
+
await websocket.send_json({"final": final})
|
43 |
+
break
|
44 |
+
continue
|
45 |
+
|
46 |
+
# Handle text (config) frame
|
47 |
+
if kind == "websocket.receive" and "text" in data:
|
48 |
raw = data["text"]
|
|
|
49 |
try:
|
50 |
config_msg = json.loads(raw)
|
51 |
except Exception as e:
|
52 |
+
print(f"[ERROR main] JSON parse failed: {e}")
|
53 |
+
continue
|
54 |
+
if config_msg.get("type") == "config":
|
55 |
orig_sr = int(config_msg["sampleRate"])
|
56 |
+
print(f"[INFO main] Set original sample rate to {orig_sr}")
|
57 |
continue
|
58 |
|
59 |
+
# If it’s a text payload but with bytes (some FastAPI versions put audio under 'text'!)
|
60 |
+
if kind == "websocket.receive" and "bytes" in data:
|
61 |
+
raw_audio = data["bytes"]
|
62 |
+
print(f"[INFO main] (text+bytes) Received audio chunk: {len(raw_audio)} bytes")
|
63 |
+
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
|
64 |
+
vol_to_send = min(rms * 20.0, 1.0)
|
65 |
+
print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
|
66 |
+
await websocket.send_json({"partial": result, "volume": vol_to_send})
|
67 |
+
continue
|
68 |
+
|
69 |
elif isinstance(data, dict) and data.get("type") == "websocket.receive_bytes":
|
70 |
raw_audio = data["bytes"]
|
71 |
+
print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes")
|
72 |
+
|
73 |
+
# This will also print its own debug info (see asr_worker.py)
|
74 |
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
|
75 |
+
|
76 |
+
vol_to_send = min(rms * 20.0, 1.0)
|
77 |
+
print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
|
78 |
+
|
79 |
await websocket.send_json({
|
80 |
"partial": result,
|
81 |
+
"volume": vol_to_send
|
82 |
})
|
83 |
except Exception as e:
|
84 |
+
print(f"[ERROR main] Unexpected exception: {e}")
|
85 |
final = finalize_stream(stream, recognizer)
|
86 |
await websocket.send_json({"final": final})
|
87 |
await websocket.close()
|
88 |
+
print("[INFO main] WebSocket closed, cleanup complete.")
|
app/static/index.html
CHANGED
@@ -39,6 +39,11 @@
|
|
39 |
border-radius: 8px;
|
40 |
transition: width 0.2s;
|
41 |
}
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
.output {
|
44 |
width: 90%;
|
@@ -78,7 +83,8 @@
|
|
78 |
|
79 |
<script>
|
80 |
let orig_sample_rate;
|
81 |
-
|
|
|
82 |
|
83 |
const vol = document.getElementById("vol");
|
84 |
const partial = document.getElementById("partial");
|
@@ -89,8 +95,16 @@
|
|
89 |
orig_sample_rate = context.sampleRate;
|
90 |
|
91 |
ws.onopen = () => {
|
|
|
92 |
ws.send(JSON.stringify({ type: "config", sampleRate: orig_sample_rate }));
|
93 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
const source = context.createMediaStreamSource(stream);
|
96 |
const processor = context.createScriptProcessor(4096, 1, 1);
|
@@ -99,15 +113,22 @@
|
|
99 |
|
100 |
processor.onaudioprocess = e => {
|
101 |
const input = e.inputBuffer.getChannelData(0);
|
|
|
|
|
102 |
ws.send(new Float32Array(input).buffer);
|
103 |
};
|
104 |
|
105 |
ws.onmessage = e => {
|
|
|
106 |
const msg = JSON.parse(e.data);
|
107 |
if (msg.partial) {
|
108 |
partial.textContent = msg.partial;
|
109 |
-
|
|
|
|
|
|
|
110 |
} else if (msg.final) {
|
|
|
111 |
finalText.textContent = msg.final;
|
112 |
}
|
113 |
};
|
|
|
39 |
border-radius: 8px;
|
40 |
transition: width 0.2s;
|
41 |
}
|
42 |
+
#vol::-moz-progress-bar {
|
43 |
+
background-color: #44bd32;
|
44 |
+
border-radius: 8px;
|
45 |
+
transition: width 0.2s;
|
46 |
+
}
|
47 |
|
48 |
.output {
|
49 |
width: 90%;
|
|
|
83 |
|
84 |
<script>
|
85 |
let orig_sample_rate;
|
86 |
+
console.log("[DEBUG client] Attempting to open WebSocket to ws://" + location.host + "/ws");
|
87 |
+
const ws = new WebSocket("ws://" + location.host + "/ws");
|
88 |
|
89 |
const vol = document.getElementById("vol");
|
90 |
const partial = document.getElementById("partial");
|
|
|
95 |
orig_sample_rate = context.sampleRate;
|
96 |
|
97 |
ws.onopen = () => {
|
98 |
+
console.log("[DEBUG client] WebSocket.onopen fired!");
|
99 |
ws.send(JSON.stringify({ type: "config", sampleRate: orig_sample_rate }));
|
100 |
};
|
101 |
+
ws.onerror = err => {
|
102 |
+
console.error("[DEBUG client] WebSocket.onerror:", err);
|
103 |
+
};
|
104 |
+
|
105 |
+
ws.onclose = () => {
|
106 |
+
console.log("[DEBUG client] WebSocket.onclose fired!");
|
107 |
+
};
|
108 |
|
109 |
const source = context.createMediaStreamSource(stream);
|
110 |
const processor = context.createScriptProcessor(4096, 1, 1);
|
|
|
113 |
|
114 |
processor.onaudioprocess = e => {
|
115 |
const input = e.inputBuffer.getChannelData(0);
|
116 |
+
// Log the first few samples so you know it’s not all zeros
|
117 |
+
console.log("[DEBUG client] Sending audio chunk, first5 samples:", input.slice(0,5));
|
118 |
ws.send(new Float32Array(input).buffer);
|
119 |
};
|
120 |
|
121 |
ws.onmessage = e => {
|
122 |
+
console.log("[DEBUG client] Received server message:", e.data);
|
123 |
const msg = JSON.parse(e.data);
|
124 |
if (msg.partial) {
|
125 |
partial.textContent = msg.partial;
|
126 |
+
console.log(`[DEBUG client] Raw volume from server: ${msg.volume.toFixed(5)}`);
|
127 |
+
const amplified = Math.min(msg.volume * 20.0, 1.0);
|
128 |
+
console.log(`[DEBUG client] Amplified (×20): ${amplified.toFixed(3)}`);
|
129 |
+
vol.value = amplified;
|
130 |
} else if (msg.final) {
|
131 |
+
console.log("[DEBUG client] Final:", msg.final);
|
132 |
finalText.textContent = msg.final;
|
133 |
}
|
134 |
};
|