Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
153f836
1
Parent(s):
1cda627
took out last trasnc time arg
Browse files
infer.py
CHANGED
@@ -81,7 +81,7 @@ async def read_root():
|
|
81 |
return {"message": "This is the Ivrit AI Streaming service."}
|
82 |
|
83 |
|
84 |
-
async def transcribe_core_ws(audio_file
|
85 |
"""
|
86 |
Transcribe the audio file and return only the segments that have not been processed yet.
|
87 |
|
@@ -89,10 +89,7 @@ async def transcribe_core_ws(audio_file, last_transcribed_time):
|
|
89 |
:param last_transcribed_time: The last time (in seconds) that was transcribed.
|
90 |
:return: Newly transcribed segments and the updated last transcribed time.
|
91 |
"""
|
92 |
-
|
93 |
-
|
94 |
-
ret = {'new_segments': []}
|
95 |
-
new_last_transcribed_time = last_transcribed_time
|
96 |
|
97 |
try:
|
98 |
# Transcribe the entire audio file
|
@@ -109,24 +106,22 @@ async def transcribe_core_ws(audio_file, last_transcribed_time):
|
|
109 |
logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
|
110 |
|
111 |
# Only process segments that start after the last transcribed time
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
|
117 |
-
'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
|
118 |
-
'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
|
119 |
-
'no_speech_prob': s.no_speech_prob, 'words': words
|
120 |
-
}
|
121 |
-
logging.info(f'Adding new transcription segment: {seg}')
|
122 |
-
ret['new_segments'].append(seg)
|
123 |
|
124 |
-
# Update the last transcribed time to the end of the current segment
|
125 |
-
new_last_transcribed_time = s.end
|
126 |
-
logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
|
127 |
|
128 |
-
|
129 |
-
return ret
|
130 |
|
131 |
|
132 |
import tempfile
|
@@ -166,21 +161,11 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
166 |
chunk_duration = len(audio_chunk) / (16000 * 2) # Assuming 16kHz mono WAV (2 bytes per sample)
|
167 |
accumulated_audio_time += chunk_duration
|
168 |
|
169 |
-
partial_result
|
170 |
-
last_transcribed_time)
|
171 |
accumulated_audio_time = 0 # Reset the accumulated audio time
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
accumulated_audio_size = 0
|
176 |
-
|
177 |
-
# Send the transcription result back to the client with both new and all processed segments
|
178 |
-
response = {
|
179 |
-
"new_segments": partial_result['new_segments'],
|
180 |
-
"processed_segments": processed_segments
|
181 |
-
}
|
182 |
-
logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
|
183 |
-
await websocket.send_json(response)
|
184 |
|
185 |
except WebSocketDisconnect:
|
186 |
logging.info("WebSocket connection closed by the client.")
|
|
|
81 |
return {"message": "This is the Ivrit AI Streaming service."}
|
82 |
|
83 |
|
84 |
+
async def transcribe_core_ws(audio_file):
|
85 |
"""
|
86 |
Transcribe the audio file and return only the segments that have not been processed yet.
|
87 |
|
|
|
89 |
:param last_transcribed_time: The last time (in seconds) that was transcribed.
|
90 |
:return: Newly transcribed segments and the updated last transcribed time.
|
91 |
"""
|
92 |
+
ret = {'segments': []}
|
|
|
|
|
|
|
93 |
|
94 |
try:
|
95 |
# Transcribe the entire audio file
|
|
|
106 |
logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
|
107 |
|
108 |
# Only process segments that start after the last transcribed time
|
109 |
+
logging.info(f"New segment found starting at {s.start} seconds.")
|
110 |
+
words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
|
111 |
+
|
112 |
+
seg = {
|
113 |
+
'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
|
114 |
+
'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
|
115 |
+
'no_speech_prob': s.no_speech_prob, 'words': words
|
116 |
+
}
|
117 |
+
logging.info(f'Adding new transcription segment: {seg}')
|
118 |
+
ret['segements'].append(seg)
|
119 |
|
120 |
+
# Update the last transcribed time to the end of the current segment
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
|
|
|
|
|
|
122 |
|
123 |
+
#logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
|
124 |
+
return ret
|
125 |
|
126 |
|
127 |
import tempfile
|
|
|
161 |
chunk_duration = len(audio_chunk) / (16000 * 2) # Assuming 16kHz mono WAV (2 bytes per sample)
|
162 |
accumulated_audio_time += chunk_duration
|
163 |
|
164 |
+
partial_result = await transcribe_core_ws(temp_audio_file.name)
|
|
|
165 |
accumulated_audio_time = 0 # Reset the accumulated audio time
|
166 |
+
|
167 |
+
|
168 |
+
await websocket.send_json(partial_result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
except WebSocketDisconnect:
|
171 |
logging.info("WebSocket connection closed by the client.")
|