AshDavid12 commited on
Commit
edf4250
·
1 Parent(s): 92ce07c

change core parms

Browse files
Files changed (2) hide show
  1. client.py +2 -2
  2. infer.py +19 -10
client.py CHANGED
@@ -4,7 +4,7 @@ import requests
4
  import ssl
5
 
6
  # Parameters for reading and sending the audio
7
- AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod_serverless_whisper/main/me-hebrew.wav" # Use WAV file
8
 
9
  async def send_audio(websocket):
10
  buffer_size = 512 * 1024 # Buffer audio chunks up to 512KB before sending
@@ -56,7 +56,7 @@ async def run_client():
56
  ssl_context.check_hostname = False
57
  ssl_context.verify_mode = ssl.CERT_NONE
58
 
59
- async with websockets.connect(uri, ssl=ssl_context, timeout=30) as websocket:
60
  await asyncio.gather(
61
  send_audio(websocket),
62
  receive_transcription(websocket),
 
4
  import ssl
5
 
6
  # Parameters for reading and sending the audio
7
+ AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
8
 
9
  async def send_audio(websocket):
10
  buffer_size = 512 * 1024 # Buffer audio chunks up to 512KB before sending
 
56
  ssl_context.check_hostname = False
57
  ssl_context.verify_mode = ssl.CERT_NONE
58
 
59
+ async with websockets.connect(uri, ssl=ssl_context, timeout=60) as websocket:
60
  await asyncio.gather(
61
  send_audio(websocket),
62
  receive_transcription(websocket),
infer.py CHANGED
@@ -196,7 +196,8 @@ async def websocket_transcribe(websocket: WebSocket):
196
  processed_segments = [] # Keeps track of the segments already transcribed
197
  accumulated_audio_size = 0 # Track how much audio data has been buffered
198
  accumulated_audio_time = 0 # Track the total audio duration accumulated
199
- min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
 
200
 
201
  # A temporary file to store the growing audio data
202
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
@@ -221,17 +222,25 @@ async def websocket_transcribe(websocket: WebSocket):
221
  logging.info(f"Received and buffered {len(audio_chunk)} bytes, total buffered: {accumulated_audio_size} bytes, total time: {accumulated_audio_time:.2f} seconds")
222
 
223
  # Transcribe when enough time (audio) is accumulated (e.g., at least 5 seconds of audio)
224
- if accumulated_audio_time >= min_transcription_time:
225
- logging.info("Buffered enough audio time, starting transcription.")
226
 
227
- # Call the transcription function with the last processed time
228
- partial_result, processed_segments = transcribe_core_ws(temp_audio_file.name, processed_segments)
229
- accumulated_audio_time = 0 # Reset the accumulated audio time
230
 
231
- # Send the transcription result back to the client
232
- logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
233
- logging.info(f"partial result {partial_result}")
234
- await websocket.send_json(partial_result)
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  except WebSocketDisconnect:
237
  logging.info("WebSocket connection closed by the client.")
 
196
  processed_segments = [] # Keeps track of the segments already transcribed
197
  accumulated_audio_size = 0 # Track how much audio data has been buffered
198
  accumulated_audio_time = 0 # Track the total audio duration accumulated
199
+ last_transcribed_time = 0.0
200
+ #min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
201
 
202
  # A temporary file to store the growing audio data
203
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
 
222
  logging.info(f"Received and buffered {len(audio_chunk)} bytes, total buffered: {accumulated_audio_size} bytes, total time: {accumulated_audio_time:.2f} seconds")
223
 
224
  # Transcribe when enough time (audio) is accumulated (e.g., at least 5 seconds of audio)
225
+ #if accumulated_audio_time >= min_transcription_time:
226
+ logging.info("Buffered enough audio time, starting transcription.")
227
 
 
 
 
228
 
229
+ # Call the transcription function with the last processed time
230
+ partial_result, last_transcribed_time = transcribe_core_ws(temp_audio_file.name, processed_segments)
231
+ accumulated_audio_time = 0 # Reset the accumulated audio time
232
+ processed_segments.extend(partial_result['new_segments'])
233
+
234
+ # Reset the accumulated audio size after transcription
235
+ accumulated_audio_size = 0
236
+
237
+ # Send the transcription result back to the client with both new and all processed segments
238
+ response = {
239
+ "new_segments": partial_result['new_segments'],
240
+ "processed_segments": processed_segments
241
+ }
242
+ logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
243
+ await websocket.send_json(response)
244
 
245
  except WebSocketDisconnect:
246
  logging.info("WebSocket connection closed by the client.")