AshDavid12 commited on
Commit
153f836
·
1 Parent(s): 1cda627

took out last trasnc time arg

Browse files
Files changed (1) hide show
  1. infer.py +19 -34
infer.py CHANGED
@@ -81,7 +81,7 @@ async def read_root():
81
  return {"message": "This is the Ivrit AI Streaming service."}
82
 
83
 
84
- async def transcribe_core_ws(audio_file, last_transcribed_time):
85
  """
86
  Transcribe the audio file and return only the segments that have not been processed yet.
87
 
@@ -89,10 +89,7 @@ async def transcribe_core_ws(audio_file, last_transcribed_time):
89
  :param last_transcribed_time: The last time (in seconds) that was transcribed.
90
  :return: Newly transcribed segments and the updated last transcribed time.
91
  """
92
- logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
93
-
94
- ret = {'new_segments': []}
95
- new_last_transcribed_time = last_transcribed_time
96
 
97
  try:
98
  # Transcribe the entire audio file
@@ -109,24 +106,22 @@ async def transcribe_core_ws(audio_file, last_transcribed_time):
109
  logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
110
 
111
  # Only process segments that start after the last transcribed time
112
- if s.start >= last_transcribed_time:
113
- logging.info(f"New segment found starting at {s.start} seconds.")
114
- words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
 
 
 
 
 
 
 
115
 
116
- seg = {
117
- 'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
118
- 'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
119
- 'no_speech_prob': s.no_speech_prob, 'words': words
120
- }
121
- logging.info(f'Adding new transcription segment: {seg}')
122
- ret['new_segments'].append(seg)
123
 
124
- # Update the last transcribed time to the end of the current segment
125
- new_last_transcribed_time = s.end
126
- logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
127
 
128
- #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
129
- return ret, new_last_transcribed_time
130
 
131
 
132
  import tempfile
@@ -166,21 +161,11 @@ async def websocket_transcribe(websocket: WebSocket):
166
  chunk_duration = len(audio_chunk) / (16000 * 2) # Assuming 16kHz mono WAV (2 bytes per sample)
167
  accumulated_audio_time += chunk_duration
168
 
169
- partial_result, last_transcribed_time = await transcribe_core_ws(temp_audio_file.name,
170
- last_transcribed_time)
171
  accumulated_audio_time = 0 # Reset the accumulated audio time
172
- processed_segments.extend(partial_result['new_segments'])
173
-
174
- # Reset the accumulated audio size after transcription
175
- accumulated_audio_size = 0
176
-
177
- # Send the transcription result back to the client with both new and all processed segments
178
- response = {
179
- "new_segments": partial_result['new_segments'],
180
- "processed_segments": processed_segments
181
- }
182
- logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
183
- await websocket.send_json(response)
184
 
185
  except WebSocketDisconnect:
186
  logging.info("WebSocket connection closed by the client.")
 
81
  return {"message": "This is the Ivrit AI Streaming service."}
82
 
83
 
84
+ async def transcribe_core_ws(audio_file):
85
  """
86
  Transcribe the audio file and return only the segments that have not been processed yet.
87
 
 
89
  :param last_transcribed_time: The last time (in seconds) that was transcribed.
90
  :return: Newly transcribed segments and the updated last transcribed time.
91
  """
92
+ ret = {'segments': []}
 
 
 
93
 
94
  try:
95
  # Transcribe the entire audio file
 
106
  logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
107
 
108
  # Only process segments that start after the last transcribed time
109
+ logging.info(f"New segment found starting at {s.start} seconds.")
110
+ words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
111
+
112
+ seg = {
113
+ 'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
114
+ 'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
115
+ 'no_speech_prob': s.no_speech_prob, 'words': words
116
+ }
117
+ logging.info(f'Adding new transcription segment: {seg}')
118
+ ret['segements'].append(seg)
119
 
120
+ # Update the last transcribed time to the end of the current segment
 
 
 
 
 
 
121
 
 
 
 
122
 
123
+ #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
124
+ return ret
125
 
126
 
127
  import tempfile
 
161
  chunk_duration = len(audio_chunk) / (16000 * 2) # Assuming 16kHz mono WAV (2 bytes per sample)
162
  accumulated_audio_time += chunk_duration
163
 
164
+ partial_result = await transcribe_core_ws(temp_audio_file.name)
 
165
  accumulated_audio_time = 0 # Reset the accumulated audio time
166
+
167
+
168
+ await websocket.send_json(partial_result)
 
 
 
 
 
 
 
 
 
169
 
170
  except WebSocketDisconnect:
171
  logging.info("WebSocket connection closed by the client.")