Quentin Fuxa commited on
Commit
022a086
·
1 Parent(s): c64fea5

optimize ffmpeg buffer reading :round duration to nearest lower 0.1s

Browse files
Files changed (1) hide show
  1. whisper_fastapi_online_server.py +10 -13
whisper_fastapi_online_server.py CHANGED
@@ -12,6 +12,9 @@ from fastapi.middleware.cors import CORSMiddleware
12
 
13
  from src.whisper_streaming.whisper_online import backend_factory, online_factory, add_shared_args
14
 
 
 
 
15
 
16
  ##### LOAD ARGS #####
17
 
@@ -125,23 +128,17 @@ async def websocket_endpoint(websocket: WebSocket):
125
 
126
  while True:
127
  try:
128
- elapsed_time = int(time() - beg)
 
129
  beg = time()
130
  chunk = await loop.run_in_executor(
131
- None, ffmpeg_process.stdout.read, 32000 * elapsed_time
132
  )
133
- if (
134
- not chunk
135
- ): # The first chunk will be almost empty, FFmpeg is still starting up
136
- chunk = await loop.run_in_executor(
137
- None, ffmpeg_process.stdout.read, 4096
138
- )
139
- if not chunk: # FFmpeg might have closed
140
- print("FFmpeg stdout closed.")
141
- break
142
 
143
  pcm_buffer.extend(chunk)
144
-
145
  if len(pcm_buffer) >= BYTES_PER_SEC:
146
  # Convert int16 -> float32
147
  pcm_array = (
@@ -186,7 +183,7 @@ async def websocket_endpoint(websocket: WebSocket):
186
  lines.append(
187
  {
188
  "speaker": ch["speaker"][-1],
189
- "text": ch['text'],
190
  }
191
  )
192
  else:
 
12
 
13
  from src.whisper_streaming.whisper_online import backend_factory, online_factory, add_shared_args
14
 
15
+ import subprocess
16
+ import math
17
+
18
 
19
  ##### LOAD ARGS #####
20
 
 
128
 
129
  while True:
130
  try:
131
+ elapsed_time = math.floor((time() - beg) * 10) / 10 # Round to 0.1 sec
132
+ ffmpeg_buffer_from_duration = max(int(32000 * elapsed_time), 4096)
133
  beg = time()
134
  chunk = await loop.run_in_executor(
135
+ None, ffmpeg_process.stdout.read, ffmpeg_buffer_from_duration
136
  )
137
+ if not chunk:
138
+ print("FFmpeg stdout closed.")
139
+ break
 
 
 
 
 
 
140
 
141
  pcm_buffer.extend(chunk)
 
142
  if len(pcm_buffer) >= BYTES_PER_SEC:
143
  # Convert int16 -> float32
144
  pcm_array = (
 
183
  lines.append(
184
  {
185
  "speaker": ch["speaker"][-1],
186
+ "text": ch['text']
187
  }
188
  )
189
  else: