Quentin Fuxa
commited on
Commit
·
022a086
1
Parent(s):
c64fea5
optimize ffmpeg buffer reading :round duration to nearest lower 0.1s
Browse files- whisper_fastapi_online_server.py +10 -13
whisper_fastapi_online_server.py
CHANGED
@@ -12,6 +12,9 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
12 |
|
13 |
from src.whisper_streaming.whisper_online import backend_factory, online_factory, add_shared_args
|
14 |
|
|
|
|
|
|
|
15 |
|
16 |
##### LOAD ARGS #####
|
17 |
|
@@ -125,23 +128,17 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
125 |
|
126 |
while True:
|
127 |
try:
|
128 |
-
elapsed_time =
|
|
|
129 |
beg = time()
|
130 |
chunk = await loop.run_in_executor(
|
131 |
-
None, ffmpeg_process.stdout.read,
|
132 |
)
|
133 |
-
if
|
134 |
-
|
135 |
-
|
136 |
-
chunk = await loop.run_in_executor(
|
137 |
-
None, ffmpeg_process.stdout.read, 4096
|
138 |
-
)
|
139 |
-
if not chunk: # FFmpeg might have closed
|
140 |
-
print("FFmpeg stdout closed.")
|
141 |
-
break
|
142 |
|
143 |
pcm_buffer.extend(chunk)
|
144 |
-
|
145 |
if len(pcm_buffer) >= BYTES_PER_SEC:
|
146 |
# Convert int16 -> float32
|
147 |
pcm_array = (
|
@@ -186,7 +183,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
186 |
lines.append(
|
187 |
{
|
188 |
"speaker": ch["speaker"][-1],
|
189 |
-
"text": ch['text']
|
190 |
}
|
191 |
)
|
192 |
else:
|
|
|
12 |
|
13 |
from src.whisper_streaming.whisper_online import backend_factory, online_factory, add_shared_args
|
14 |
|
15 |
+
import subprocess
|
16 |
+
import math
|
17 |
+
|
18 |
|
19 |
##### LOAD ARGS #####
|
20 |
|
|
|
128 |
|
129 |
while True:
|
130 |
try:
|
131 |
+
elapsed_time = math.floor((time() - beg) * 10) / 10 # Round to 0.1 sec
|
132 |
+
ffmpeg_buffer_from_duration = max(int(32000 * elapsed_time), 4096)
|
133 |
beg = time()
|
134 |
chunk = await loop.run_in_executor(
|
135 |
+
None, ffmpeg_process.stdout.read, ffmpeg_buffer_from_duration
|
136 |
)
|
137 |
+
if not chunk:
|
138 |
+
print("FFmpeg stdout closed.")
|
139 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
pcm_buffer.extend(chunk)
|
|
|
142 |
if len(pcm_buffer) >= BYTES_PER_SEC:
|
143 |
# Convert int16 -> float32
|
144 |
pcm_array = (
|
|
|
183 |
lines.append(
|
184 |
{
|
185 |
"speaker": ch["speaker"][-1],
|
186 |
+
"text": ch['text']
|
187 |
}
|
188 |
)
|
189 |
else:
|