reab5555 commited on
Commit
9421df7
·
verified ·
1 Parent(s): b1fb928

Update diarization.py

Browse files
Files changed (1) hide show
  1. diarization.py +20 -7
diarization.py CHANGED
@@ -97,15 +97,22 @@ def transcribe_audio(audio_path, language):
97
 
98
  def create_combined_srt(transcription_chunks, diarization, output_path):
99
  speaker_segments = []
100
- speaker_map = {}
101
- current_speaker_num = 1
102
 
 
103
  for segment, _, speaker in diarization.itertracks(yield_label=True):
104
- if speaker not in speaker_map:
105
- speaker_map[speaker] = f"Speaker {current_speaker_num}"
106
- current_speaker_num += 1
107
- speaker_segments.append((segment.start, segment.end, speaker_map[speaker]))
 
 
 
 
 
 
108
 
 
109
  with open(output_path, 'w', encoding='utf-8') as srt_file:
110
  for i, chunk in enumerate(transcription_chunks, 1):
111
  start_time, end_time = chunk["start"], chunk["end"]
@@ -115,7 +122,7 @@ def create_combined_srt(transcription_chunks, diarization, output_path):
115
  current_speaker = "Unknown"
116
  for seg_start, seg_end, speaker in speaker_segments:
117
  if seg_start <= start_time < seg_end:
118
- current_speaker = speaker
119
  break
120
 
121
  # Format timecodes as h:mm:ss (without leading zeros for hours)
@@ -125,6 +132,12 @@ def create_combined_srt(transcription_chunks, diarization, output_path):
125
  srt_file.write(f"{i}\n")
126
  srt_file.write(f"{{{current_speaker}}}\n time: ({start_str} --> {end_str})\n text: {text}\n\n")
127
 
 
 
 
 
 
 
128
  # Calculate speaker durations
129
  speaker_durations = defaultdict(float)
130
  for seg_start, seg_end, speaker in speaker_segments:
 
97
 
98
  def create_combined_srt(transcription_chunks, diarization, output_path):
99
  speaker_segments = []
100
+ speaker_durations = defaultdict(float)
 
101
 
102
+ # First pass: calculate durations
103
  for segment, _, speaker in diarization.itertracks(yield_label=True):
104
+ speaker_durations[speaker] += segment.end - segment.start
105
+ speaker_segments.append((segment.start, segment.end, speaker))
106
+
107
+ # Sort speakers by duration
108
+ sorted_speakers = sorted(speaker_durations.items(), key=lambda x: x[1], reverse=True)
109
+
110
+ # Create mapping of original labels to Speaker 1, Speaker 2, etc.
111
+ speaker_map = {}
112
+ for i, (speaker, _) in enumerate(sorted_speakers, start=1):
113
+ speaker_map[speaker] = f"Speaker {i}"
114
 
115
+ # Write the SRT content
116
  with open(output_path, 'w', encoding='utf-8') as srt_file:
117
  for i, chunk in enumerate(transcription_chunks, 1):
118
  start_time, end_time = chunk["start"], chunk["end"]
 
122
  current_speaker = "Unknown"
123
  for seg_start, seg_end, speaker in speaker_segments:
124
  if seg_start <= start_time < seg_end:
125
+ current_speaker = speaker_map[speaker]
126
  break
127
 
128
  # Format timecodes as h:mm:ss (without leading zeros for hours)
 
132
  srt_file.write(f"{i}\n")
133
  srt_file.write(f"{{{current_speaker}}}\n time: ({start_str} --> {end_str})\n text: {text}\n\n")
134
 
135
+ # Add information about the two most frequent speakers
136
+ with open(output_path, 'a', encoding='utf-8') as srt_file:
137
+ for i, (speaker, duration) in enumerate(sorted_speakers[:2], start=1):
138
+ duration_str = format_timestamp(duration).split('.')[0].lstrip('0')
139
+ srt_file.write(f"Speaker {i} (originally {speaker}): total duration {duration_str}\n")
140
+
141
  # Calculate speaker durations
142
  speaker_durations = defaultdict(float)
143
  for seg_start, seg_end, speaker in speaker_segments: