qfuxa commited on
Commit
6a8a8bf
·
1 Parent(s): de779de

format html and change mapping id <-> speakers

Browse files
Files changed (1) hide show
  1. src/web/live_transcription.html +340 -318
src/web/live_transcription.html CHANGED
@@ -1,339 +1,361 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
 
3
  <head>
4
- <meta charset="UTF-8"/>
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6
- <title>Audio Transcription</title>
7
- <style>
8
- body {
9
- font-family: 'Inter', sans-serif;
10
- margin: 20px;
11
- text-align: center;
12
- }
13
- #recordButton {
14
- width: 80px;
15
- height: 80px;
16
- font-size: 36px;
17
- border: none;
18
- border-radius: 50%;
19
- background-color: white;
20
- cursor: pointer;
21
- box-shadow: 0 0px 10px rgba(0, 0, 0, 0.2);
22
- transition: background-color 0.3s ease, transform 0.2s ease;
23
- }
24
- #recordButton.recording {
25
- background-color: #ff4d4d;
26
- color: white;
27
- }
28
- #recordButton:active {
29
- transform: scale(0.95);
30
- }
31
- #status {
32
- margin-top: 20px;
33
- font-size: 16px;
34
- color: #333;
35
- }
36
- .settings-container {
37
- display: flex;
38
- justify-content: center;
39
- align-items: center;
40
- gap: 15px;
41
- margin-top: 20px;
42
- }
43
- .settings {
44
- display: flex;
45
- flex-direction: column;
46
- align-items: flex-start;
47
- gap: 5px;
48
- }
49
- #chunkSelector,
50
- #websocketInput {
51
- font-size: 16px;
52
- padding: 5px;
53
- border-radius: 5px;
54
- border: 1px solid #ddd;
55
- background-color: #f9f9f9;
56
- }
57
- #websocketInput {
58
- width: 200px;
59
- }
60
- #chunkSelector:focus,
61
- #websocketInput:focus {
62
- outline: none;
63
- border-color: #007bff;
64
- }
65
- label {
66
- font-size: 14px;
67
- }
68
- /* Speaker-labeled transcript area */
69
- #linesTranscript {
70
- margin: 20px auto;
71
- max-width: 600px;
72
- text-align: left;
73
- font-size: 16px;
74
- }
75
- #linesTranscript p {
76
- margin: 5px 0;
77
- }
78
- #linesTranscript strong {
79
- color: #333;
80
- }
81
- #speaker {
82
- background-color: #dcefff;
83
- border-radius: 30px;
84
- padding: 2px 10px;
85
- font-size: 14px;
86
- }
87
- #timeInfo {
88
- color: #666;
89
- margin-left: 10px;
90
- }
91
- .textcontent {
92
- font-size: 16px;
93
- margin-left: 10px;
94
- padding-left: 10px;
95
- border-left: 2px solid #dcefff;
96
- margin-bottom: 10px;
97
- }
98
- .buffer {
99
- color: rgb(180, 180, 180);
100
- font-style: italic;
101
- margin-left: 4px;
102
- }
103
- .spinner {
104
- display: inline-block;
105
- width: 8px;
106
- height: 8px;
107
- border: 2px solid rgba(0, 0, 0, 0.2);
108
- border-top: 2px solid #333;
109
- border-radius: 50%;
110
- animation: spin 0.6s linear infinite;
111
- vertical-align: middle;
112
- margin-bottom: 2px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
 
115
  @keyframes spin {
116
- to {
117
- transform: rotate(360deg);
 
118
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  }
120
- .silence {
121
- color: #666;
122
- background-color: #f3f3f3;
123
- font-size: 13px;
124
- border-radius: 30px;
125
- padding: 2px 10px;
126
- }
127
- .loading {
128
- color: #666;
129
- background-color: #eff9ff;
130
- font-size: 14px;
131
- border-radius: 30px;
132
- padding: 2px 10px;
133
- }
134
- </style>
135
  </head>
 
136
  <body>
137
 
138
- <div class="settings-container">
139
- <button id="recordButton">🎙️</button>
140
- <div class="settings">
141
- <div>
142
- <label for="chunkSelector">Chunk size (ms):</label>
143
- <select id="chunkSelector">
144
- <option value="500">500 ms</option>
145
- <option value="1000" selected>1000 ms</option>
146
- <option value="2000">2000 ms</option>
147
- <option value="3000">3000 ms</option>
148
- <option value="4000">4000 ms</option>
149
- <option value="5000">5000 ms</option>
150
- </select>
151
- </div>
152
- <div>
153
- <label for="websocketInput">WebSocket URL:</label>
154
- <input id="websocketInput" type="text" value="ws://localhost:8000/asr" />
155
- </div>
 
156
  </div>
157
- </div>
158
-
159
- <p id="status"></p>
160
-
161
- <!-- Speaker-labeled transcript -->
162
- <div id="linesTranscript"></div>
163
-
164
- <script>
165
- let isRecording = false;
166
- let websocket = null;
167
- let recorder = null;
168
- let chunkDuration = 1000;
169
- let websocketUrl = "ws://localhost:8000/asr";
170
- let userClosing = false;
171
-
172
- const statusText = document.getElementById("status");
173
- const recordButton = document.getElementById("recordButton");
174
- const chunkSelector = document.getElementById("chunkSelector");
175
- const websocketInput = document.getElementById("websocketInput");
176
- const linesTranscriptDiv = document.getElementById("linesTranscript");
177
-
178
- chunkSelector.addEventListener("change", () => {
179
- chunkDuration = parseInt(chunkSelector.value);
180
- });
181
-
182
- websocketInput.addEventListener("change", () => {
183
- const urlValue = websocketInput.value.trim();
184
- if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
185
- statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)";
186
- return;
187
- }
188
- websocketUrl = urlValue;
189
- statusText.textContent = "WebSocket URL updated. Ready to connect.";
190
- });
191
-
192
- function setupWebSocket() {
193
- return new Promise((resolve, reject) => {
194
- try {
195
- websocket = new WebSocket(websocketUrl);
196
- } catch (error) {
197
- statusText.textContent = "Invalid WebSocket URL. Please check and try again.";
198
- reject(error);
199
- return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  }
201
 
202
- websocket.onopen = () => {
203
- statusText.textContent = "Connected to server.";
204
- resolve();
205
- };
206
-
207
- websocket.onclose = () => {
208
- if (userClosing) {
209
- statusText.textContent = "WebSocket closed by user.";
210
- } else {
211
- statusText.textContent =
212
- "Disconnected from the WebSocket server. (Check logs if model is loading.)";
213
- }
214
- userClosing = false;
215
- };
216
-
217
- websocket.onerror = () => {
218
- statusText.textContent = "Error connecting to WebSocket.";
219
- reject(new Error("Error connecting to WebSocket"));
220
- };
221
-
222
- // Handle messages from server
223
- websocket.onmessage = (event) => {
224
- const data = JSON.parse(event.data);
225
- /*
226
- The server might send:
227
- {
228
- "lines": [
229
- {"speaker": 0, "text": "Hello.", "beg": "00:00", "end": "00:01"},
230
- {"speaker": -2, "text": "Hi, no speaker here.", "beg": "00:01", "end": "00:02"},
231
- {"speaker": -1, "text": "...", "beg": "00:02", "end": "00:03" },
232
- ...
233
- ],
234
- "buffer": "..."
235
  }
236
- */
237
- const { lines = [], buffer = "" } = data;
238
- renderLinesWithBuffer( lines, buffer);
239
- };
240
- });
241
- }
242
-
243
- function renderLinesWithBuffer(lines, buffer) {
244
- // Clears if no lines
245
- if (!Array.isArray(lines) || lines.length === 0) {
246
- linesTranscriptDiv.innerHTML = "";
247
- return;
248
- }
249
-
250
-
251
-
252
- const linesHtml = lines.map((item, idx) => {
253
- let timeInfo = "";
254
- if (item.beg !== undefined && item.end !== undefined) {
255
- timeInfo = ` ${item.beg} - ${item.end}`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  }
257
 
258
- let speakerLabel = "";
259
- if (item.speaker === -2) {
260
- speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
261
- } else if (item.speaker == -1) {
262
- speakerLabel = `<span class='loading'> <span class="spinner"></span><span id='timeInfo'>${item.diff} second(s) of audio are undergoing diarization</span></span>`;
263
- } else if (item.speaker == -3) {
264
- speakerLabel = `<span id="speaker"><span id='timeInfo'>${timeInfo}</span>`;
265
- } else if (item.speaker !== -1) {
266
- speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
267
- }
268
-
269
-
270
-
271
- let textContent = item.text;
272
- if (idx === lines.length - 1 && buffer) {
273
- textContent += `<span class="buffer">${buffer}</span>`;
274
  }
275
-
276
- return textContent
277
- ? `<p>${speakerLabel}<br/><div class='textcontent'>${textContent}</div></p>`
278
- : `<p >${speakerLabel}<br/></p>`;
279
- }).join("");
280
-
281
- linesTranscriptDiv.innerHTML = linesHtml;
282
- }
283
-
284
- async function startRecording() {
285
- try {
286
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
287
- recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
288
- recorder.ondataavailable = (e) => {
289
- if (websocket && websocket.readyState === WebSocket.OPEN) {
290
- websocket.send(e.data);
291
- }
292
- };
293
- recorder.start(chunkDuration);
294
- isRecording = true;
295
- updateUI();
296
- } catch (err) {
297
- statusText.textContent = "Error accessing microphone. Please allow microphone access.";
298
- }
299
- }
300
-
301
- function stopRecording() {
302
- userClosing = true;
303
- if (recorder) {
304
- recorder.stop();
305
- recorder = null;
306
- }
307
- isRecording = false;
308
-
309
- if (websocket) {
310
- websocket.close();
311
- websocket = null;
312
- }
313
-
314
- updateUI();
315
- }
316
-
317
- async function toggleRecording() {
318
- if (!isRecording) {
319
- linesTranscriptDiv.innerHTML = "";
320
- try {
321
- await setupWebSocket();
322
- await startRecording();
323
- } catch (err) {
324
- statusText.textContent = "Could not connect to WebSocket or access mic. Aborted.";
325
  }
326
- } else {
327
- stopRecording();
328
- }
329
- }
330
-
331
- function updateUI() {
332
- recordButton.classList.toggle("recording", isRecording);
333
- statusText.textContent = isRecording ? "Recording..." : "Click to start transcription";
334
- }
335
-
336
- recordButton.addEventListener("click", toggleRecording);
337
- </script>
338
  </body>
 
339
  </html>
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
+
4
  <head>
5
+ <meta charset="UTF-8" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>Audio Transcription</title>
8
+ <style>
9
+ body {
10
+ font-family: 'Inter', sans-serif;
11
+ margin: 20px;
12
+ text-align: center;
13
+ }
14
+
15
+ #recordButton {
16
+ width: 80px;
17
+ height: 80px;
18
+ font-size: 36px;
19
+ border: none;
20
+ border-radius: 50%;
21
+ background-color: white;
22
+ cursor: pointer;
23
+ box-shadow: 0 0px 10px rgba(0, 0, 0, 0.2);
24
+ transition: background-color 0.3s ease, transform 0.2s ease;
25
+ }
26
+
27
+ #recordButton.recording {
28
+ background-color: #ff4d4d;
29
+ color: white;
30
+ }
31
+
32
+ #recordButton:active {
33
+ transform: scale(0.95);
34
+ }
35
+
36
+ #status {
37
+ margin-top: 20px;
38
+ font-size: 16px;
39
+ color: #333;
40
+ }
41
+
42
+ .settings-container {
43
+ display: flex;
44
+ justify-content: center;
45
+ align-items: center;
46
+ gap: 15px;
47
+ margin-top: 20px;
48
+ }
49
+
50
+ .settings {
51
+ display: flex;
52
+ flex-direction: column;
53
+ align-items: flex-start;
54
+ gap: 5px;
55
+ }
56
+
57
+ #chunkSelector,
58
+ #websocketInput {
59
+ font-size: 16px;
60
+ padding: 5px;
61
+ border-radius: 5px;
62
+ border: 1px solid #ddd;
63
+ background-color: #f9f9f9;
64
+ }
65
+
66
+ #websocketInput {
67
+ width: 200px;
68
+ }
69
+
70
+ #chunkSelector:focus,
71
+ #websocketInput:focus {
72
+ outline: none;
73
+ border-color: #007bff;
74
+ }
75
+
76
+ label {
77
+ font-size: 14px;
78
+ }
79
+
80
+ /* Speaker-labeled transcript area */
81
+ #linesTranscript {
82
+ margin: 20px auto;
83
+ max-width: 600px;
84
+ text-align: left;
85
+ font-size: 16px;
86
+ }
87
+
88
+ #linesTranscript p {
89
+ margin: 5px 0;
90
+ }
91
+
92
+ #linesTranscript strong {
93
+ color: #333;
94
+ }
95
+
96
+ #speaker {
97
+ background-color: #dcefff;
98
+ border-radius: 30px;
99
+ padding: 2px 10px;
100
+ font-size: 14px;
101
+ }
102
+
103
+ #timeInfo {
104
+ color: #666;
105
+ margin-left: 10px;
106
+ }
107
+
108
+ .textcontent {
109
+ font-size: 16px;
110
+ margin-left: 10px;
111
+ padding-left: 10px;
112
+ border-left: 2px solid #dcefff;
113
+ margin-bottom: 10px;
114
+ }
115
+
116
+ .buffer {
117
+ color: rgb(180, 180, 180);
118
+ font-style: italic;
119
+ margin-left: 4px;
120
+ }
121
+
122
+ .spinner {
123
+ display: inline-block;
124
+ width: 8px;
125
+ height: 8px;
126
+ border: 2px solid rgba(0, 0, 0, 0.2);
127
+ border-top: 2px solid #333;
128
+ border-radius: 50%;
129
+ animation: spin 0.6s linear infinite;
130
+ vertical-align: middle;
131
+ margin-bottom: 2px;
132
  }
133
 
134
  @keyframes spin {
135
+ to {
136
+ transform: rotate(360deg);
137
+ }
138
  }
139
+
140
+ .silence {
141
+ color: #666;
142
+ background-color: #f3f3f3;
143
+ font-size: 13px;
144
+ border-radius: 30px;
145
+ padding: 2px 10px;
146
+ }
147
+
148
+ .loading {
149
+ color: #666;
150
+ background-color: #eff9ff;
151
+ font-size: 14px;
152
+ border-radius: 30px;
153
+ padding: 2px 10px;
154
  }
155
+ </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  </head>
157
+
158
  <body>
159
 
160
+ <div class="settings-container">
161
+ <button id="recordButton">🎙️</button>
162
+ <div class="settings">
163
+ <div>
164
+ <label for="chunkSelector">Chunk size (ms):</label>
165
+ <select id="chunkSelector">
166
+ <option value="500">500 ms</option>
167
+ <option value="1000" selected>1000 ms</option>
168
+ <option value="2000">2000 ms</option>
169
+ <option value="3000">3000 ms</option>
170
+ <option value="4000">4000 ms</option>
171
+ <option value="5000">5000 ms</option>
172
+ </select>
173
+ </div>
174
+ <div>
175
+ <label for="websocketInput">WebSocket URL:</label>
176
+ <input id="websocketInput" type="text" value="ws://localhost:8000/asr" />
177
+ </div>
178
+ </div>
179
  </div>
180
+
181
+ <p id="status"></p>
182
+
183
+ <!-- Speaker-labeled transcript -->
184
+ <div id="linesTranscript"></div>
185
+
186
+ <script>
187
+ let isRecording = false;
188
+ let websocket = null;
189
+ let recorder = null;
190
+ let chunkDuration = 1000;
191
+ let websocketUrl = "ws://localhost:8000/asr";
192
+ let userClosing = false;
193
+
194
+ const statusText = document.getElementById("status");
195
+ const recordButton = document.getElementById("recordButton");
196
+ const chunkSelector = document.getElementById("chunkSelector");
197
+ const websocketInput = document.getElementById("websocketInput");
198
+ const linesTranscriptDiv = document.getElementById("linesTranscript");
199
+
200
+ chunkSelector.addEventListener("change", () => {
201
+ chunkDuration = parseInt(chunkSelector.value);
202
+ });
203
+
204
+ websocketInput.addEventListener("change", () => {
205
+ const urlValue = websocketInput.value.trim();
206
+ if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
207
+ statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)";
208
+ return;
209
+ }
210
+ websocketUrl = urlValue;
211
+ statusText.textContent = "WebSocket URL updated. Ready to connect.";
212
+ });
213
+
214
+ function setupWebSocket() {
215
+ return new Promise((resolve, reject) => {
216
+ try {
217
+ websocket = new WebSocket(websocketUrl);
218
+ } catch (error) {
219
+ statusText.textContent = "Invalid WebSocket URL. Please check and try again.";
220
+ reject(error);
221
+ return;
222
+ }
223
+
224
+ websocket.onopen = () => {
225
+ statusText.textContent = "Connected to server.";
226
+ resolve();
227
+ };
228
+
229
+ websocket.onclose = () => {
230
+ if (userClosing) {
231
+ statusText.textContent = "WebSocket closed by user.";
232
+ } else {
233
+ statusText.textContent =
234
+ "Disconnected from the WebSocket server. (Check logs if model is loading.)";
235
+ }
236
+ userClosing = false;
237
+ };
238
+
239
+ websocket.onerror = () => {
240
+ statusText.textContent = "Error connecting to WebSocket.";
241
+ reject(new Error("Error connecting to WebSocket"));
242
+ };
243
+
244
+ // Handle messages from server
245
+ websocket.onmessage = (event) => {
246
+ const data = JSON.parse(event.data);
247
+ /*
248
+ The server might send:
249
+ {
250
+ "lines": [
251
+ {"speaker": 0, "text": "Hello.", "beg": "00:00", "end": "00:01"},
252
+ {"speaker": -2, "text": "Hi, no speaker here.", "beg": "00:01", "end": "00:02"},
253
+ {"speaker": -1, "text": "...", "beg": "00:02", "end": "00:03" },
254
+ ...
255
+ ],
256
+ "buffer": "..."
257
+ }
258
+ */
259
+ const { lines = [], buffer = "" } = data;
260
+ renderLinesWithBuffer(lines, buffer);
261
+ };
262
+ });
263
  }
264
 
265
+ function renderLinesWithBuffer(lines, buffer) {
266
+ if (!Array.isArray(lines) || lines.length === 0) {
267
+ if (buffer) {
268
+ linesTranscriptDiv.innerHTML = `<span class="buffer">${buffer}</span>`;
269
+ } else {
270
+ linesTranscriptDiv.innerHTML = "";
271
+ }
272
+ return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  }
274
+
275
+ const linesHtml = lines.map((item, idx) => {
276
+ let timeInfo = "";
277
+ if (item.beg !== undefined && item.end !== undefined) {
278
+ timeInfo = ` ${item.beg} - ${item.end}`;
279
+ }
280
+
281
+ let speakerLabel = "";
282
+ if (item.speaker === -2) {
283
+ speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
284
+ } else if (item.speaker == 0) {
285
+ speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'>${item.diff} second(s) of audio are undergoing diarization</span></span>`;
286
+ } else if (item.speaker == -1) {
287
+ speakerLabel = `<span id="speaker"><span id='timeInfo'>${timeInfo}</span>`;
288
+ } else if (item.speaker !== -1) {
289
+ speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
290
+ }
291
+
292
+ let textContent = item.text;
293
+ if (idx === lines.length - 1 && buffer) {
294
+ textContent += `<span class="buffer">${buffer}</span>`;
295
+ }
296
+
297
+ return textContent
298
+ ? `<p>${speakerLabel}<br/><div class='textcontent'>${textContent}</div></p>`
299
+ : `<p>${speakerLabel}<br/></p>`;
300
+ }).join("");
301
+
302
+ linesTranscriptDiv.innerHTML = linesHtml;
303
+ }
304
+
305
+ async function startRecording() {
306
+ try {
307
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
308
+ recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
309
+ recorder.ondataavailable = (e) => {
310
+ if (websocket && websocket.readyState === WebSocket.OPEN) {
311
+ websocket.send(e.data);
312
+ }
313
+ };
314
+ recorder.start(chunkDuration);
315
+ isRecording = true;
316
+ updateUI();
317
+ } catch (err) {
318
+ statusText.textContent = "Error accessing microphone. Please allow microphone access.";
319
+ }
320
+ }
321
+
322
+ function stopRecording() {
323
+ userClosing = true;
324
+ if (recorder) {
325
+ recorder.stop();
326
+ recorder = null;
327
+ }
328
+ isRecording = false;
329
+
330
+ if (websocket) {
331
+ websocket.close();
332
+ websocket = null;
333
+ }
334
+
335
+ updateUI();
336
  }
337
 
338
+ async function toggleRecording() {
339
+ if (!isRecording) {
340
+ linesTranscriptDiv.innerHTML = "";
341
+ try {
342
+ await setupWebSocket();
343
+ await startRecording();
344
+ } catch (err) {
345
+ statusText.textContent = "Could not connect to WebSocket or access mic. Aborted.";
346
+ }
347
+ } else {
348
+ stopRecording();
349
+ }
 
 
 
 
350
  }
351
+
352
+ function updateUI() {
353
+ recordButton.classList.toggle("recording", isRecording);
354
+ statusText.textContent = isRecording ? "Recording..." : "Click to start transcription";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
+
357
+ recordButton.addEventListener("click", toggleRecording);
358
+ </script>
 
 
 
 
 
 
 
 
 
359
  </body>
360
+
361
  </html>