reab5555 commited on
Commit
1a1ae91
·
verified ·
1 Parent(s): 3bf5b17

Update transcription_diarization.py

Browse files
Files changed (1) hide show
  1. transcription_diarization.py +46 -2
transcription_diarization.py CHANGED
@@ -76,8 +76,52 @@ def download_transcript(transcript_url):
76
  return None
77
 
78
  def extract_transcriptions_with_speakers(transcript_data):
79
- # This function remains unchanged
80
- # ... (keep the existing implementation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  def diarize_audio(video_path):
83
  # Convert video to WAV audio
 
76
  return None
77
 
78
  def extract_transcriptions_with_speakers(transcript_data):
79
+ segments = transcript_data['results']['speaker_labels']['segments']
80
+ items = transcript_data['results']['items']
81
+
82
+ current_speaker = None
83
+ current_text = []
84
+ transcriptions = []
85
+
86
+ speaker_mapping = {}
87
+ speaker_count = 0
88
+
89
+ for item in items:
90
+ if item['type'] == 'pronunciation':
91
+ start_time = float(item['start_time'])
92
+ end_time = float(item['end_time'])
93
+ content = item['alternatives'][0]['content']
94
+
95
+ speaker_segment = next((seg for seg in segments if float(seg['start_time']) <= start_time and float(seg['end_time']) >= end_time), None)
96
+
97
+ if speaker_segment:
98
+ speaker_label = speaker_segment['speaker_label']
99
+
100
+ # Map speaker labels to sequential numbers starting from 1
101
+ if speaker_label not in speaker_mapping:
102
+ speaker_count += 1
103
+ speaker_mapping[speaker_label] = f"Speaker {speaker_count}"
104
+
105
+ if speaker_mapping[speaker_label] != current_speaker:
106
+ if current_text:
107
+ transcriptions.append({
108
+ 'speaker': current_speaker,
109
+ 'text': ' '.join(current_text)
110
+ })
111
+ current_text = []
112
+ current_speaker = speaker_mapping[speaker_label]
113
+
114
+ current_text.append(content)
115
+ elif item['type'] == 'punctuation':
116
+ current_text[-1] += item['alternatives'][0]['content']
117
+
118
+ if current_text:
119
+ transcriptions.append({
120
+ 'speaker': current_speaker,
121
+ 'text': ' '.join(current_text)
122
+ })
123
+
124
+ return transcriptions
125
 
126
  def diarize_audio(video_path):
127
  # Convert video to WAV audio