Logs for https://github.com/QuentinFuxa/WhisperLiveKit/issues/110 https://github.com/QuentinFuxa/WhisperLiveKit/issues/106
Browse fileshttps://github.com/QuentinFuxa/WhisperLiveKit/issues/90
https://github.com/QuentinFuxa/WhisperLiveKit/issues/87
https://github.com/QuentinFuxa/WhisperLiveKit/issues/81
https://github.com/QuentinFuxa/WhisperLiveKit/issues/2
- whisperlivekit/audio_processor.py +121 -19
whisperlivekit/audio_processor.py
CHANGED
@@ -6,7 +6,6 @@ import math
|
|
6 |
import logging
|
7 |
import traceback
|
8 |
from datetime import timedelta
|
9 |
-
from typing import List, Dict, Any
|
10 |
from whisperlivekit.timed_objects import ASRToken
|
11 |
from whisperlivekit.whisper_streaming_custom.whisper_online import online_factory
|
12 |
from whisperlivekit.core import WhisperLiveKit
|
@@ -39,7 +38,10 @@ class AudioProcessor:
|
|
39 |
self.bytes_per_sample = 2
|
40 |
self.bytes_per_sec = self.samples_per_sec * self.bytes_per_sample
|
41 |
self.max_bytes_per_sec = 32000 * 5 # 5 seconds of audio at 32 kHz
|
42 |
-
|
|
|
|
|
|
|
43 |
# State management
|
44 |
self.tokens = []
|
45 |
self.buffer_transcription = ""
|
@@ -78,14 +80,50 @@ class AudioProcessor:
|
|
78 |
|
79 |
async def restart_ffmpeg(self):
|
80 |
"""Restart the FFmpeg process after failure."""
|
|
|
|
|
81 |
if self.ffmpeg_process:
|
82 |
try:
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
except Exception as e:
|
86 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
87 |
self.ffmpeg_process = self.start_ffmpeg_decoder()
|
88 |
self.pcm_buffer = bytearray()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
async def update_transcription(self, new_tokens, buffer, end_buffer, full_transcription, sep):
|
91 |
"""Thread-safe update of transcription with new data."""
|
@@ -154,21 +192,33 @@ class AudioProcessor:
|
|
154 |
|
155 |
while True:
|
156 |
try:
|
157 |
-
|
158 |
-
elapsed_time = math.floor((
|
159 |
buffer_size = max(int(32000 * elapsed_time), 4096)
|
160 |
-
beg =
|
161 |
|
162 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
try:
|
164 |
chunk = await asyncio.wait_for(
|
165 |
loop.run_in_executor(None, self.ffmpeg_process.stdout.read, buffer_size),
|
166 |
-
timeout=
|
167 |
)
|
|
|
|
|
|
|
168 |
except asyncio.TimeoutError:
|
169 |
logger.warning("FFmpeg read timeout. Restarting...")
|
170 |
await self.restart_ffmpeg()
|
171 |
beg = time()
|
|
|
172 |
continue
|
173 |
|
174 |
if not chunk:
|
@@ -366,7 +416,7 @@ class AudioProcessor:
|
|
366 |
logger.warning(f"Exception in results_formatter: {e}")
|
367 |
logger.warning(f"Traceback: {traceback.format_exc()}")
|
368 |
await asyncio.sleep(0.5) # Back off on error
|
369 |
-
|
370 |
async def create_tasks(self):
|
371 |
"""Create and start processing tasks."""
|
372 |
|
@@ -378,6 +428,35 @@ class AudioProcessor:
|
|
378 |
tasks.append(asyncio.create_task(self.diarization_processor(self.diarization)))
|
379 |
|
380 |
tasks.append(asyncio.create_task(self.ffmpeg_stdout_reader()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
self.tasks = tasks
|
382 |
|
383 |
return self.results_formatter()
|
@@ -399,11 +478,34 @@ class AudioProcessor:
|
|
399 |
|
400 |
async def process_audio(self, message):
|
401 |
"""Process incoming audio data."""
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
self.
|
409 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import logging
|
7 |
import traceback
|
8 |
from datetime import timedelta
|
|
|
9 |
from whisperlivekit.timed_objects import ASRToken
|
10 |
from whisperlivekit.whisper_streaming_custom.whisper_online import online_factory
|
11 |
from whisperlivekit.core import WhisperLiveKit
|
|
|
38 |
self.bytes_per_sample = 2
|
39 |
self.bytes_per_sec = self.samples_per_sec * self.bytes_per_sample
|
40 |
self.max_bytes_per_sec = 32000 * 5 # 5 seconds of audio at 32 kHz
|
41 |
+
self.last_ffmpeg_activity = time()
|
42 |
+
self.ffmpeg_health_check_interval = 5
|
43 |
+
self.ffmpeg_max_idle_time = 10
|
44 |
+
|
45 |
# State management
|
46 |
self.tokens = []
|
47 |
self.buffer_transcription = ""
|
|
|
80 |
|
81 |
async def restart_ffmpeg(self):
|
82 |
"""Restart the FFmpeg process after failure."""
|
83 |
+
logger.warning("Restarting FFmpeg process...")
|
84 |
+
|
85 |
if self.ffmpeg_process:
|
86 |
try:
|
87 |
+
# we check if process is still running
|
88 |
+
if self.ffmpeg_process.poll() is None:
|
89 |
+
logger.info("Terminating existing FFmpeg process")
|
90 |
+
self.ffmpeg_process.stdin.close()
|
91 |
+
self.ffmpeg_process.terminate()
|
92 |
+
|
93 |
+
# wait for termination with timeout
|
94 |
+
try:
|
95 |
+
await asyncio.wait_for(
|
96 |
+
asyncio.get_event_loop().run_in_executor(None, self.ffmpeg_process.wait),
|
97 |
+
timeout=5.0
|
98 |
+
)
|
99 |
+
except asyncio.TimeoutError:
|
100 |
+
logger.warning("FFmpeg process did not terminate, killing forcefully")
|
101 |
+
self.ffmpeg_process.kill()
|
102 |
+
await asyncio.get_event_loop().run_in_executor(None, self.ffmpeg_process.wait)
|
103 |
except Exception as e:
|
104 |
+
logger.error(f"Error during FFmpeg process termination: {e}")
|
105 |
+
logger.error(traceback.format_exc())
|
106 |
+
|
107 |
+
# we start new process
|
108 |
+
try:
|
109 |
+
logger.info("Starting new FFmpeg process")
|
110 |
self.ffmpeg_process = self.start_ffmpeg_decoder()
|
111 |
self.pcm_buffer = bytearray()
|
112 |
+
self.last_ffmpeg_activity = time()
|
113 |
+
logger.info("FFmpeg process restarted successfully")
|
114 |
+
except Exception as e:
|
115 |
+
logger.error(f"Failed to restart FFmpeg process: {e}")
|
116 |
+
logger.error(traceback.format_exc())
|
117 |
+
# try again after 5s
|
118 |
+
await asyncio.sleep(5)
|
119 |
+
try:
|
120 |
+
self.ffmpeg_process = self.start_ffmpeg_decoder()
|
121 |
+
self.pcm_buffer = bytearray()
|
122 |
+
self.last_ffmpeg_activity = time()
|
123 |
+
logger.info("FFmpeg process restarted successfully on second attempt")
|
124 |
+
except Exception as e2:
|
125 |
+
logger.critical(f"Failed to restart FFmpeg process on second attempt: {e2}")
|
126 |
+
logger.critical(traceback.format_exc())
|
127 |
|
128 |
async def update_transcription(self, new_tokens, buffer, end_buffer, full_transcription, sep):
|
129 |
"""Thread-safe update of transcription with new data."""
|
|
|
192 |
|
193 |
while True:
|
194 |
try:
|
195 |
+
current_time = time()
|
196 |
+
elapsed_time = math.floor((current_time - beg) * 10) / 10
|
197 |
buffer_size = max(int(32000 * elapsed_time), 4096)
|
198 |
+
beg = current_time
|
199 |
|
200 |
+
# Detect idle state much more quickly
|
201 |
+
if current_time - self.last_ffmpeg_activity > self.ffmpeg_max_idle_time:
|
202 |
+
logger.warning(f"FFmpeg process idle for {current_time - self.last_ffmpeg_activity:.2f}s. Restarting...")
|
203 |
+
await self.restart_ffmpeg()
|
204 |
+
beg = time()
|
205 |
+
self.last_ffmpeg_activity = time()
|
206 |
+
continue
|
207 |
+
|
208 |
+
# Reduce timeout for reading from FFmpeg
|
209 |
try:
|
210 |
chunk = await asyncio.wait_for(
|
211 |
loop.run_in_executor(None, self.ffmpeg_process.stdout.read, buffer_size),
|
212 |
+
timeout=5.0 # Shorter timeout (5 seconds instead of 15)
|
213 |
)
|
214 |
+
if chunk:
|
215 |
+
self.last_ffmpeg_activity = time()
|
216 |
+
|
217 |
except asyncio.TimeoutError:
|
218 |
logger.warning("FFmpeg read timeout. Restarting...")
|
219 |
await self.restart_ffmpeg()
|
220 |
beg = time()
|
221 |
+
self.last_ffmpeg_activity = time()
|
222 |
continue
|
223 |
|
224 |
if not chunk:
|
|
|
416 |
logger.warning(f"Exception in results_formatter: {e}")
|
417 |
logger.warning(f"Traceback: {traceback.format_exc()}")
|
418 |
await asyncio.sleep(0.5) # Back off on error
|
419 |
+
|
420 |
async def create_tasks(self):
|
421 |
"""Create and start processing tasks."""
|
422 |
|
|
|
428 |
tasks.append(asyncio.create_task(self.diarization_processor(self.diarization)))
|
429 |
|
430 |
tasks.append(asyncio.create_task(self.ffmpeg_stdout_reader()))
|
431 |
+
|
432 |
+
# Monitor overall system health
|
433 |
+
async def watchdog():
|
434 |
+
while True:
|
435 |
+
try:
|
436 |
+
await asyncio.sleep(10) # Check every 10 seconds instead of 60
|
437 |
+
|
438 |
+
current_time = time()
|
439 |
+
# Check for stalled tasks
|
440 |
+
for i, task in enumerate(tasks):
|
441 |
+
if task.done():
|
442 |
+
exc = task.exception() if task.done() else None
|
443 |
+
task_name = task.get_name() if hasattr(task, 'get_name') else f"Task {i}"
|
444 |
+
logger.error(f"{task_name} unexpectedly completed with exception: {exc}")
|
445 |
+
|
446 |
+
# Check for FFmpeg process health with shorter thresholds
|
447 |
+
ffmpeg_idle_time = current_time - self.last_ffmpeg_activity
|
448 |
+
if ffmpeg_idle_time > 15: # 15 seconds instead of 180
|
449 |
+
logger.warning(f"FFmpeg idle for {ffmpeg_idle_time:.2f}s - may need attention")
|
450 |
+
|
451 |
+
# Force restart after 30 seconds of inactivity (instead of 600)
|
452 |
+
if ffmpeg_idle_time > 30:
|
453 |
+
logger.error("FFmpeg idle for too long, forcing restart")
|
454 |
+
await self.restart_ffmpeg()
|
455 |
+
|
456 |
+
except Exception as e:
|
457 |
+
logger.error(f"Error in watchdog task: {e}")
|
458 |
+
|
459 |
+
tasks.append(asyncio.create_task(watchdog()))
|
460 |
self.tasks = tasks
|
461 |
|
462 |
return self.results_formatter()
|
|
|
478 |
|
479 |
async def process_audio(self, message):
|
480 |
"""Process incoming audio data."""
|
481 |
+
retry_count = 0
|
482 |
+
max_retries = 3
|
483 |
+
|
484 |
+
# Log periodic heartbeats showing ongoing audio proc
|
485 |
+
current_time = time()
|
486 |
+
if not hasattr(self, '_last_heartbeat') or current_time - self._last_heartbeat >= 10:
|
487 |
+
logger.debug(f"Processing audio chunk, last FFmpeg activity: {current_time - self.last_ffmpeg_activity:.2f}s ago")
|
488 |
+
self._last_heartbeat = current_time
|
489 |
+
|
490 |
+
while retry_count < max_retries:
|
491 |
+
try:
|
492 |
+
if not self.ffmpeg_process or not hasattr(self.ffmpeg_process, 'stdin') or self.ffmpeg_process.poll() is not None:
|
493 |
+
logger.warning("FFmpeg process not available, restarting...")
|
494 |
+
await self.restart_ffmpeg()
|
495 |
+
|
496 |
+
self.ffmpeg_process.stdin.write(message)
|
497 |
+
self.ffmpeg_process.stdin.flush()
|
498 |
+
self.last_ffmpeg_activity = time() # Update activity timestamp
|
499 |
+
return
|
500 |
+
|
501 |
+
except (BrokenPipeError, AttributeError, OSError) as e:
|
502 |
+
retry_count += 1
|
503 |
+
logger.warning(f"Error writing to FFmpeg: {e}. Retry {retry_count}/{max_retries}...")
|
504 |
+
|
505 |
+
if retry_count < max_retries:
|
506 |
+
await self.restart_ffmpeg()
|
507 |
+
await asyncio.sleep(0.5) # Shorter pause between retries
|
508 |
+
else:
|
509 |
+
logger.error("Maximum retries reached for FFmpeg process")
|
510 |
+
await self.restart_ffmpeg()
|
511 |
+
return
|