Spaces:

langtech-innovation
/

WhisperLiveKitDiarization

Paused

qfuxa commited on Feb 26

Commit

d98de94

1 Parent(s): 2527697

new buffer format

Files changed (2) hide show

src/whisper_streaming/online_asr.py CHANGED Viewed

@@ -168,7 +168,7 @@ class OnlineASRProcessor:
         """
         Get the unvalidated buffer in string format.
         """
-        return self.concatenate_tokens(self.transcript_buffer.buffer).text
     def process_iter(self) -> Transcript:

         """
         Get the unvalidated buffer in string format.
         """
+        return self.concatenate_tokens(self.transcript_buffer.buffer)
     def process_iter(self) -> Transcript:

whisper_fastapi_online_server.py CHANGED Viewed

@@ -186,7 +186,6 @@ async def websocket_endpoint(websocket: WebSocket):
                 if not chunk:
                     logger.info("FFmpeg stdout closed.")
                     break
                 pcm_buffer.extend(chunk)
                 if len(pcm_buffer) >= BYTES_PER_SEC:
                     if len(pcm_buffer) > MAX_BYTES_PER_SEC:
@@ -207,7 +206,9 @@ async def websocket_endpoint(websocket: WebSocket):
                         new_tokens = online.process_iter()
                         tokens.extend(new_tokens)
                         full_transcription += sep.join([t.text for t in new_tokens])
-                        buffer = online.get_buffer()
                         if buffer in full_transcription: # With VAC, the buffer is not updated until the next chunk is processed
                             buffer = ""
                     else:
@@ -253,6 +254,7 @@ async def websocket_endpoint(websocket: WebSocket):
                             lines[-1]["diff"] = round(token.end - last_end_diarized, 2)
                     response = {"lines": lines, "buffer": buffer}
                     await websocket.send_json(response)
             except Exception as e:

                 if not chunk:
                     logger.info("FFmpeg stdout closed.")
                     break
                 pcm_buffer.extend(chunk)
                 if len(pcm_buffer) >= BYTES_PER_SEC:
                     if len(pcm_buffer) > MAX_BYTES_PER_SEC:
                         new_tokens = online.process_iter()
                         tokens.extend(new_tokens)
                         full_transcription += sep.join([t.text for t in new_tokens])
+                        _buffer = online.get_buffer()
+                        buffer = _buffer.text
+                        end_buffer = _buffer.end if _buffer.end else tokens[-1].end if tokens else 0
                         if buffer in full_transcription: # With VAC, the buffer is not updated until the next chunk is processed
                             buffer = ""
                     else:
                             lines[-1]["diff"] = round(token.end - last_end_diarized, 2)
                     response = {"lines": lines, "buffer": buffer}
+                    # response = {"lines": lines, "buffer": buffer, "time_buffer_transcription": time() + beg_loop - end_buffer, "time_buffer_diarization": time() + beg_loop - end_attributed_speaker}
                     await websocket.send_json(response)
             except Exception as e: