reab5555 commited on
Commit
ec8f948
·
verified ·
1 Parent(s): ff9df88

Update transcription_diarization.py

Browse files
Files changed (1) hide show
  1. transcription_diarization.py +12 -10
transcription_diarization.py CHANGED
@@ -7,7 +7,7 @@ import datetime
7
  from collections import defaultdict
8
  from openai import OpenAI
9
  from config import openai_api_key, hf_token
10
- from pydub import AudioSegment
11
  import math
12
 
13
  client = OpenAI(api_key=openai_api_key)
@@ -31,7 +31,8 @@ def extract_audio(video_path):
31
  audio_path = f"{base_name}.wav"
32
  video = VideoFileClip(video_path)
33
  audio = video.audio
34
- audio.write_audiofile(audio_path, codec='pcm_s16le')
 
35
  return audio_path
36
 
37
  def format_timestamp(seconds):
@@ -41,16 +42,17 @@ def diarize_audio(audio_path, pipeline, max_speakers):
41
  diarization = pipeline(audio_path, num_speakers=max_speakers)
42
  return diarization
43
 
44
- def split_audio(audio_path, chunk_duration=5 * 60 * 1000): # 5 minutes per chunk
45
  audio = AudioSegment.from_wav(audio_path)
46
- duration = len(audio) / 1000 # duration in seconds
47
- chunks = math.ceil(duration / (chunk_duration / 1000))
 
 
 
 
48
 
49
  chunk_paths = []
50
- for i in range(chunks):
51
- start_time = i * chunk_duration
52
- end_time = min((i + 1) * chunk_duration, len(audio))
53
- chunk = audio[start_time:end_time]
54
  chunk_path = f"{audio_path[:-4]}_chunk_{i}.wav"
55
  chunk.export(chunk_path, format="wav")
56
  chunk_paths.append(chunk_path)
@@ -82,7 +84,7 @@ def transcribe_audio(audio_path, language):
82
  return transcription_txt, transcription_chunks
83
 
84
  def transcribe_large_audio(audio_path, language):
85
- chunk_paths = split_audio(audio_path)
86
  transcription_txt = ""
87
  transcription_chunks = []
88
 
 
7
  from collections import defaultdict
8
  from openai import OpenAI
9
  from config import openai_api_key, hf_token
10
+ from pydub import AudioSegment, silence
11
  import math
12
 
13
  client = OpenAI(api_key=openai_api_key)
 
31
  audio_path = f"{base_name}.wav"
32
  video = VideoFileClip(video_path)
33
  audio = video.audio
34
+ # Reduce audio quality to keep file size smaller
35
+ audio.write_audiofile(audio_path, codec='pcm_s16le', fps=16000, nbytes=2)
36
  return audio_path
37
 
38
  def format_timestamp(seconds):
 
42
  diarization = pipeline(audio_path, num_speakers=max_speakers)
43
  return diarization
44
 
45
+ def split_audio_on_silence(audio_path, min_silence_len=500, silence_thresh=-40, keep_silence=500):
46
  audio = AudioSegment.from_wav(audio_path)
47
+ chunks = silence.split_on_silence(
48
+ audio,
49
+ min_silence_len=min_silence_len,
50
+ silence_thresh=silence_thresh,
51
+ keep_silence=keep_silence
52
+ )
53
 
54
  chunk_paths = []
55
+ for i, chunk in enumerate(chunks):
 
 
 
56
  chunk_path = f"{audio_path[:-4]}_chunk_{i}.wav"
57
  chunk.export(chunk_path, format="wav")
58
  chunk_paths.append(chunk_path)
 
84
  return transcription_txt, transcription_chunks
85
 
86
  def transcribe_large_audio(audio_path, language):
87
+ chunk_paths = split_audio_on_silence(audio_path)
88
  transcription_txt = ""
89
  transcription_chunks = []
90