sdafd commited on
Commit
cd84e90
·
verified ·
1 Parent(s): 0b55c27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -2
app.py CHANGED
@@ -178,6 +178,12 @@ def split_audio_by_pause(audio, sr, pause_threshold, top_db=30, energy_threshold
178
  filtered_intervals.append((start, end))
179
  return filtered_intervals
180
 
 
 
 
 
 
 
181
 
182
  # -------------------------------
183
  # Main Transcription Function
@@ -186,6 +192,7 @@ def transcribe(audio_file, model_size="base", debug=False, pause_threshold=0.0,
186
  start_time = time.time()
187
  final_result = ""
188
  debug_log = []
 
189
 
190
  try:
191
  # If vocal extraction is enabled, process the file first
@@ -240,7 +247,13 @@ def transcribe(audio_file, model_size="base", debug=False, pause_threshold=0.0,
240
  for word in segment["words"]:
241
  adjusted_start = word['start'] + seg_start/sr
242
  adjusted_end = word['end'] + seg_start/sr
243
- final_result += f"[{adjusted_start:5.2f}s-{adjusted_end:5.2f}s] {word['word']}\n"
 
 
 
 
 
 
244
  else:
245
  # Process the entire audio without splitting
246
  transcript = model.transcribe(audio, batch_size=batch_size, language=language)
@@ -249,7 +262,24 @@ def transcribe(audio_file, model_size="base", debug=False, pause_threshold=0.0,
249
  )
250
  for segment in aligned["segments"]:
251
  for word in segment["words"]:
252
- final_result += f"[{word['start']:5.2f}s-{word['end']:5.2f}s] {word['word']}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
  debug_log.append(f"Language used: {language}")
255
  debug_log.append(f"Batch size: {batch_size}")
 
178
  filtered_intervals.append((start, end))
179
  return filtered_intervals
180
 
181
+ def seconds_to_srt_time(seconds):
182
+ msec_total = int(round(seconds * 1000))
183
+ hours, msec_remainder = divmod(msec_total, 3600 * 1000)
184
+ minutes, msec_remainder = divmod(msec_remainder, 60 * 1000)
185
+ sec, msec = divmod(msec_remainder, 1000)
186
+ return f"{hours:02d}:{minutes:02d}:{sec:02d},{msec:03d}"
187
 
188
  # -------------------------------
189
  # Main Transcription Function
 
192
  start_time = time.time()
193
  final_result = ""
194
  debug_log = []
195
+ srt_entries = []
196
 
197
  try:
198
  # If vocal extraction is enabled, process the file first
 
247
  for word in segment["words"]:
248
  adjusted_start = word['start'] + seg_start/sr
249
  adjusted_end = word['end'] + seg_start/sr
250
+
251
+ srt_entries.append({
252
+ 'start': adjusted_start,
253
+ 'end': adjusted_end,
254
+ 'word': word['word'].strip()
255
+ })
256
+ #final_result += f"[{adjusted_start:5.2f}s-{adjusted_end:5.2f}s] {word['word']}\n"
257
  else:
258
  # Process the entire audio without splitting
259
  transcript = model.transcribe(audio, batch_size=batch_size, language=language)
 
262
  )
263
  for segment in aligned["segments"]:
264
  for word in segment["words"]:
265
+ #final_result += f"[{word['start']:5.2f}s-{word['end']:5.2f}s] {word['word']}\n"
266
+ srt_entries.append({
267
+ 'start': word['start'],
268
+ 'end': word['end'],
269
+ 'word': word['word'].strip()
270
+ })
271
+
272
+ srt_content = []
273
+ for idx, entry in enumerate(srt_entries, start=1):
274
+ start_time_srt = seconds_to_srt_time(entry['start'])
275
+ end_time_srt = seconds_to_srt_time(entry['end'])
276
+ srt_content.append(
277
+ f"{idx}\n"
278
+ f"{start_time_srt} --> {end_time_srt}\n"
279
+ f"{entry['word']}\n"
280
+ )
281
+
282
+ final_result = "\n".join(srt_content)
283
 
284
  debug_log.append(f"Language used: {language}")
285
  debug_log.append(f"Batch size: {batch_size}")