Spaces:
Runtime error
Runtime error
Update diarization.py
Browse files- diarization.py +20 -7
diarization.py
CHANGED
@@ -97,15 +97,22 @@ def transcribe_audio(audio_path, language):
|
|
97 |
|
98 |
def create_combined_srt(transcription_chunks, diarization, output_path):
|
99 |
speaker_segments = []
|
100 |
-
|
101 |
-
current_speaker_num = 1
|
102 |
|
|
|
103 |
for segment, _, speaker in diarization.itertracks(yield_label=True):
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
|
|
109 |
with open(output_path, 'w', encoding='utf-8') as srt_file:
|
110 |
for i, chunk in enumerate(transcription_chunks, 1):
|
111 |
start_time, end_time = chunk["start"], chunk["end"]
|
@@ -115,7 +122,7 @@ def create_combined_srt(transcription_chunks, diarization, output_path):
|
|
115 |
current_speaker = "Unknown"
|
116 |
for seg_start, seg_end, speaker in speaker_segments:
|
117 |
if seg_start <= start_time < seg_end:
|
118 |
-
current_speaker = speaker
|
119 |
break
|
120 |
|
121 |
# Format timecodes as h:mm:ss (without leading zeros for hours)
|
@@ -125,6 +132,12 @@ def create_combined_srt(transcription_chunks, diarization, output_path):
|
|
125 |
srt_file.write(f"{i}\n")
|
126 |
srt_file.write(f"{{{current_speaker}}}\n time: ({start_str} --> {end_str})\n text: {text}\n\n")
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
# Calculate speaker durations
|
129 |
speaker_durations = defaultdict(float)
|
130 |
for seg_start, seg_end, speaker in speaker_segments:
|
|
|
97 |
|
98 |
def create_combined_srt(transcription_chunks, diarization, output_path):
|
99 |
speaker_segments = []
|
100 |
+
speaker_durations = defaultdict(float)
|
|
|
101 |
|
102 |
+
# First pass: calculate durations
|
103 |
for segment, _, speaker in diarization.itertracks(yield_label=True):
|
104 |
+
speaker_durations[speaker] += segment.end - segment.start
|
105 |
+
speaker_segments.append((segment.start, segment.end, speaker))
|
106 |
+
|
107 |
+
# Sort speakers by duration
|
108 |
+
sorted_speakers = sorted(speaker_durations.items(), key=lambda x: x[1], reverse=True)
|
109 |
+
|
110 |
+
# Create mapping of original labels to Speaker 1, Speaker 2, etc.
|
111 |
+
speaker_map = {}
|
112 |
+
for i, (speaker, _) in enumerate(sorted_speakers, start=1):
|
113 |
+
speaker_map[speaker] = f"Speaker {i}"
|
114 |
|
115 |
+
# Write the SRT content
|
116 |
with open(output_path, 'w', encoding='utf-8') as srt_file:
|
117 |
for i, chunk in enumerate(transcription_chunks, 1):
|
118 |
start_time, end_time = chunk["start"], chunk["end"]
|
|
|
122 |
current_speaker = "Unknown"
|
123 |
for seg_start, seg_end, speaker in speaker_segments:
|
124 |
if seg_start <= start_time < seg_end:
|
125 |
+
current_speaker = speaker_map[speaker]
|
126 |
break
|
127 |
|
128 |
# Format timecodes as h:mm:ss (without leading zeros for hours)
|
|
|
132 |
srt_file.write(f"{i}\n")
|
133 |
srt_file.write(f"{{{current_speaker}}}\n time: ({start_str} --> {end_str})\n text: {text}\n\n")
|
134 |
|
135 |
+
# Add information about the two most frequent speakers
|
136 |
+
with open(output_path, 'a', encoding='utf-8') as srt_file:
|
137 |
+
for i, (speaker, duration) in enumerate(sorted_speakers[:2], start=1):
|
138 |
+
duration_str = format_timestamp(duration).split('.')[0].lstrip('0')
|
139 |
+
srt_file.write(f"Speaker {i} (originally {speaker}): total duration {duration_str}\n")
|
140 |
+
|
141 |
# Calculate speaker durations
|
142 |
speaker_durations = defaultdict(float)
|
143 |
for seg_start, seg_end, speaker in speaker_segments:
|