qfuxa commited on
Commit
4f4f3a0
·
1 Parent(s): 8c26e48

End of transcription : Properly sends signal back to the endpoint

Browse files
whisperlivekit/web/live_transcription.html CHANGED
@@ -308,6 +308,7 @@
308
  let waveCtx = waveCanvas.getContext("2d");
309
  let animationFrame = null;
310
  let waitingForStop = false;
 
311
  waveCanvas.width = 60 * (window.devicePixelRatio || 1);
312
  waveCanvas.height = 30 * (window.devicePixelRatio || 1);
313
  waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1);
@@ -357,18 +358,31 @@
357
 
358
  websocket.onclose = () => {
359
  if (userClosing) {
360
- if (!statusText.textContent.includes("Recording stopped. Processing final audio")) { // This is a bit of a hack. We should have a better way to handle this. eg. using a status code.
361
- statusText.textContent = "Finished processing audio! Ready to record again.";
 
 
 
 
 
 
 
 
362
  }
363
- waitingForStop = false;
 
364
  } else {
365
- statusText.textContent =
366
- "Disconnected from the WebSocket server. (Check logs if model is loading.)";
367
  if (isRecording) {
368
- stopRecording();
369
  }
370
  }
371
- userClosing = false;
 
 
 
 
 
372
  };
373
 
374
  websocket.onerror = () => {
@@ -382,24 +396,31 @@
382
 
383
  // Check for status messages
384
  if (data.type === "ready_to_stop") {
385
- console.log("Ready to stop, closing WebSocket");
386
-
387
- // signal that we are not waiting for stop anymore
388
  waitingForStop = false;
389
- recordButton.disabled = false; // this should be elsewhere
390
- console.log("Record button enabled");
391
 
392
- //Now we can close the WebSocket
393
- if (websocket) {
394
- websocket.close();
395
- websocket = null;
 
 
 
 
 
396
  }
397
-
398
-
399
 
 
 
 
 
400
  return;
401
  }
402
 
 
 
403
  // Handle normal transcription updates
404
  const {
405
  lines = [],
@@ -414,13 +435,14 @@
414
  buffer_diarization,
415
  buffer_transcription,
416
  remaining_time_diarization,
417
- remaining_time_transcription
 
418
  );
419
  };
420
  });
421
  }
422
 
423
- function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription) {
424
  const linesHtml = lines.map((item, idx) => {
425
  let timeInfo = "";
426
  if (item.beg !== undefined && item.end !== undefined) {
@@ -430,30 +452,46 @@
430
  let speakerLabel = "";
431
  if (item.speaker === -2) {
432
  speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
433
- } else if (item.speaker == 0) {
434
  speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'>${remaining_time_diarization} second(s) of audio are undergoing diarization</span></span>`;
435
  } else if (item.speaker == -1) {
436
- speakerLabel = `<span id="speaker"><span id='timeInfo'>${timeInfo}</span></span>`;
437
- } else if (item.speaker !== -1) {
438
  speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
439
  }
440
 
441
- let textContent = item.text;
442
- if (idx === lines.length - 1) {
443
- speakerLabel += `<span class="label_transcription"><span class="spinner"></span>Transcription lag <span id='timeInfo'>${remaining_time_transcription}s</span></span>`
444
- }
445
- if (idx === lines.length - 1 && buffer_diarization) {
446
- speakerLabel += `<span class="label_diarization"><span class="spinner"></span>Diarization lag<span id='timeInfo'>${remaining_time_diarization}s</span></span>`
447
- textContent += `<span class="buffer_diarization">${buffer_diarization}</span>`;
448
- }
449
- if (idx === lines.length - 1) {
450
- textContent += `<span class="buffer_transcription">${buffer_transcription}</span>`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  }
452
 
453
-
454
- return textContent
455
- ? `<p>${speakerLabel}<br/><div class='textcontent'>${textContent}</div></p>`
456
- : `<p>${speakerLabel}<br/></p>`;
457
  }).join("");
458
 
459
  linesTranscriptDiv.innerHTML = linesHtml;
@@ -578,20 +616,6 @@
578
  timerElement.textContent = "00:00";
579
  startTime = null;
580
 
581
- if (websocket && websocket.readyState === WebSocket.OPEN) {
582
- try {
583
- await websocket.send(JSON.stringify({
584
- type: "stop",
585
- message: "User stopped recording"
586
- }));
587
- statusText.textContent = "Recording stopped. Processing final audio...";
588
- } catch (e) {
589
- console.error("Could not send stop message:", e);
590
- statusText.textContent = "Recording stopped. Error during final audio processing.";
591
- websocket.close();
592
- websocket = null;
593
- }
594
- }
595
 
596
  isRecording = false;
597
  updateUI();
@@ -625,19 +649,22 @@
625
 
626
  function updateUI() {
627
  recordButton.classList.toggle("recording", isRecording);
628
-
 
629
  if (waitingForStop) {
630
- statusText.textContent = "Please wait for processing to complete...";
631
- recordButton.disabled = true; // Optionally disable the button while waiting
632
- console.log("Record button disabled");
633
  } else if (isRecording) {
634
  statusText.textContent = "Recording...";
635
- recordButton.disabled = false;
636
- console.log("Record button enabled");
637
  } else {
638
- statusText.textContent = "Click to start transcription";
 
 
 
 
 
639
  recordButton.disabled = false;
640
- console.log("Record button enabled");
641
  }
642
  }
643
 
@@ -645,4 +672,4 @@
645
  </script>
646
  </body>
647
 
648
- </html>
 
308
  let waveCtx = waveCanvas.getContext("2d");
309
  let animationFrame = null;
310
  let waitingForStop = false;
311
+ let lastReceivedData = null;
312
  waveCanvas.width = 60 * (window.devicePixelRatio || 1);
313
  waveCanvas.height = 30 * (window.devicePixelRatio || 1);
314
  waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1);
 
358
 
359
  websocket.onclose = () => {
360
  if (userClosing) {
361
+ if (waitingForStop) {
362
+ statusText.textContent = "Processing finalized or connection closed.";
363
+ if (lastReceivedData) {
364
+ renderLinesWithBuffer(
365
+ lastReceivedData.lines || [],
366
+ lastReceivedData.buffer_diarization || "",
367
+ lastReceivedData.buffer_transcription || "",
368
+ 0, 0, true // isFinalizing = true
369
+ );
370
+ }
371
  }
372
+ // If ready_to_stop was received, statusText is already "Finished processing..."
373
+ // and waitingForStop is false.
374
  } else {
375
+ statusText.textContent = "Disconnected from the WebSocket server. (Check logs if model is loading.)";
 
376
  if (isRecording) {
377
+ stopRecording();
378
  }
379
  }
380
+ isRecording = false;
381
+ waitingForStop = false;
382
+ userClosing = false;
383
+ lastReceivedData = null;
384
+ websocket = null;
385
+ updateUI();
386
  };
387
 
388
  websocket.onerror = () => {
 
396
 
397
  // Check for status messages
398
  if (data.type === "ready_to_stop") {
399
+ console.log("Ready to stop received, finalizing display and closing WebSocket.");
 
 
400
  waitingForStop = false;
 
 
401
 
402
+ if (lastReceivedData) {
403
+ renderLinesWithBuffer(
404
+ lastReceivedData.lines || [],
405
+ lastReceivedData.buffer_diarization || "",
406
+ lastReceivedData.buffer_transcription || "",
407
+ 0, // No more lag
408
+ 0, // No more lag
409
+ true // isFinalizing = true
410
+ );
411
  }
412
+ statusText.textContent = "Finished processing audio! Ready to record again.";
413
+ recordButton.disabled = false;
414
 
415
+ if (websocket) {
416
+ websocket.close(); // will trigger onclose
417
+ // websocket = null; // onclose handle setting websocket to null
418
+ }
419
  return;
420
  }
421
 
422
+ lastReceivedData = data;
423
+
424
  // Handle normal transcription updates
425
  const {
426
  lines = [],
 
435
  buffer_diarization,
436
  buffer_transcription,
437
  remaining_time_diarization,
438
+ remaining_time_transcription,
439
+ false // isFinalizing = false for normal updates
440
  );
441
  };
442
  });
443
  }
444
 
445
+ function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription, isFinalizing = false) {
446
  const linesHtml = lines.map((item, idx) => {
447
  let timeInfo = "";
448
  if (item.beg !== undefined && item.end !== undefined) {
 
452
  let speakerLabel = "";
453
  if (item.speaker === -2) {
454
  speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
455
+ } else if (item.speaker == 0 && !isFinalizing) {
456
  speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'>${remaining_time_diarization} second(s) of audio are undergoing diarization</span></span>`;
457
  } else if (item.speaker == -1) {
458
+ speakerLabel = `<span id="speaker">Speaker 1<span id='timeInfo'>${timeInfo}</span></span>`;
459
+ } else if (item.speaker !== -1 && item.speaker !== 0) {
460
  speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
461
  }
462
 
463
+
464
+ let currentLineText = item.text || "";
465
+
466
+ if (idx === lines.length - 1) {
467
+ if (!isFinalizing) {
468
+ if (remaining_time_transcription > 0) {
469
+ speakerLabel += `<span class="label_transcription"><span class="spinner"></span>Transcription lag <span id='timeInfo'>${remaining_time_transcription}s</span></span>`;
470
+ }
471
+ if (buffer_diarization && remaining_time_diarization > 0) {
472
+ speakerLabel += `<span class="label_diarization"><span class="spinner"></span>Diarization lag<span id='timeInfo'>${remaining_time_diarization}s</span></span>`;
473
+ }
474
+ }
475
+
476
+ if (buffer_diarization) {
477
+ if (isFinalizing) {
478
+ currentLineText += (currentLineText.length > 0 && buffer_diarization.trim().length > 0 ? " " : "") + buffer_diarization.trim();
479
+ } else {
480
+ currentLineText += `<span class="buffer_diarization">${buffer_diarization}</span>`;
481
+ }
482
+ }
483
+ if (buffer_transcription) {
484
+ if (isFinalizing) {
485
+ currentLineText += (currentLineText.length > 0 && buffer_transcription.trim().length > 0 ? " " : "") + buffer_transcription.trim();
486
+ } else {
487
+ currentLineText += `<span class="buffer_transcription">${buffer_transcription}</span>`;
488
+ }
489
+ }
490
  }
491
 
492
+ return currentLineText.trim().length > 0 || speakerLabel.length > 0
493
+ ? `<p>${speakerLabel}<br/><div class='textcontent'>${currentLineText}</div></p>`
494
+ : `<p>${speakerLabel}<br/></p>`;
 
495
  }).join("");
496
 
497
  linesTranscriptDiv.innerHTML = linesHtml;
 
616
  timerElement.textContent = "00:00";
617
  startTime = null;
618
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
 
620
  isRecording = false;
621
  updateUI();
 
649
 
650
  function updateUI() {
651
  recordButton.classList.toggle("recording", isRecording);
652
+ recordButton.disabled = waitingForStop;
653
+
654
  if (waitingForStop) {
655
+ if (statusText.textContent !== "Recording stopped. Processing final audio...") {
656
+ statusText.textContent = "Please wait for processing to complete...";
657
+ }
658
  } else if (isRecording) {
659
  statusText.textContent = "Recording...";
 
 
660
  } else {
661
+ if (statusText.textContent !== "Finished processing audio! Ready to record again." &&
662
+ statusText.textContent !== "Processing finalized or connection closed.") {
663
+ statusText.textContent = "Click to start transcription";
664
+ }
665
+ }
666
+ if (!waitingForStop) {
667
  recordButton.disabled = false;
 
668
  }
669
  }
670
 
 
672
  </script>
673
  </body>
674
 
675
+ </html>