reab5555 commited on
Commit
b6211a3
·
verified ·
1 Parent(s): 01ddeb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -18
app.py CHANGED
@@ -3,8 +3,7 @@ import math
3
  import gradio as gr
4
  import torch
5
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
6
- from moviepy.editor import VideoFileClip
7
- import librosa # Add librosa for audio processing
8
 
9
  def transcribe(video_file, transcribe_to_text, transcribe_to_srt, language):
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -41,23 +40,18 @@ def transcribe(video_file, transcribe_to_text, transcribe_to_srt, language):
41
 
42
  audio = video.audio
43
  duration = video.duration
 
44
  transcription_txt = ""
45
  transcription_srt = []
46
-
47
- # Corrected this to use `video_path` for librosa's load function
48
- audio_samples, sr = librosa.load(video_path, sr=None)
49
- intervals = librosa.effects.split(audio_samples, top_db=30) # Adjust threshold if necessary
50
 
51
- for idx, (start_frame, end_frame) in enumerate(intervals):
52
- start_time = start_frame / sr
53
- end_time = end_frame / sr
 
 
 
 
54
 
55
- if end_time - start_time > 10: # Enforce 10-second max duration for each caption
56
- end_time = start_time + 10
57
-
58
- temp_file_path = f"temp_audio_{idx}.wav"
59
- librosa.output.write_wav(temp_file_path, audio_samples[start_frame:end_frame], sr)
60
-
61
  with open(temp_file_path, "rb") as temp_file:
62
  result = pipe(temp_file_path, generate_kwargs={"language": language})
63
  transcription_txt += result["text"]
@@ -66,14 +60,15 @@ def transcribe(video_file, transcribe_to_text, transcribe_to_srt, language):
66
  start_time, end_time = chunk["timestamp"]
67
  if start_time is not None and end_time is not None:
68
  transcription_srt.append({
69
- "start": start_time + idx * 10,
70
- "end": end_time + idx * 10,
71
  "text": chunk["text"]
72
  })
73
  else:
74
  print(f"Warning: Invalid timestamp for chunk: {chunk}")
 
75
  os.remove(temp_file_path)
76
- yield f"Progress: {int((idx / len(intervals)) * 100)}%", None
77
 
78
  output = ""
79
  srt_file_path = None
 
3
  import gradio as gr
4
  import torch
5
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
6
+ from moviepy.editor import VideoFileClip, concatenate_audioclips
 
7
 
8
  def transcribe(video_file, transcribe_to_text, transcribe_to_srt, language):
9
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
40
 
41
  audio = video.audio
42
  duration = video.duration
43
+ n_chunks = math.ceil(duration / 10) # Split into 10-second chunks
44
  transcription_txt = ""
45
  transcription_srt = []
 
 
 
 
46
 
47
+ for i in range(n_chunks):
48
+ start = i * 10
49
+ end = min((i + 1) * 10, duration)
50
+ audio_chunk = audio.subclip(start, end)
51
+
52
+ temp_file_path = f"temp_audio_{i}.wav"
53
+ audio_chunk.write_audiofile(temp_file_path, codec='pcm_s16le')
54
 
 
 
 
 
 
 
55
  with open(temp_file_path, "rb") as temp_file:
56
  result = pipe(temp_file_path, generate_kwargs={"language": language})
57
  transcription_txt += result["text"]
 
60
  start_time, end_time = chunk["timestamp"]
61
  if start_time is not None and end_time is not None:
62
  transcription_srt.append({
63
+ "start": start_time + i * 10,
64
+ "end": end_time + i * 10,
65
  "text": chunk["text"]
66
  })
67
  else:
68
  print(f"Warning: Invalid timestamp for chunk: {chunk}")
69
+
70
  os.remove(temp_file_path)
71
+ yield f"Progress: {int(((i + 1) / n_chunks) * 100)}%", None
72
 
73
  output = ""
74
  srt_file_path = None