Spaces:

reab5555
/

Multiple-Speakers-Personality-Analyzer

Runtime error

App Files Files Community

reab5555 commited on Aug 12, 2024

Commit

1a1ae91

verified ·

1 Parent(s): 3bf5b17

Update transcription_diarization.py

Browse files

Files changed (1) hide show

transcription_diarization.py +46 -2

transcription_diarization.py CHANGED Viewed

@@ -76,8 +76,52 @@ def download_transcript(transcript_url):
         return None
 def extract_transcriptions_with_speakers(transcript_data):
-    # This function remains unchanged
-    # ... (keep the existing implementation)
 def diarize_audio(video_path):
     # Convert video to WAV audio

         return None
 def extract_transcriptions_with_speakers(transcript_data):
+    segments = transcript_data['results']['speaker_labels']['segments']
+    items = transcript_data['results']['items']
+    current_speaker = None
+    current_text = []
+    transcriptions = []
+    speaker_mapping = {}
+    speaker_count = 0
+    for item in items:
+        if item['type'] == 'pronunciation':
+            start_time = float(item['start_time'])
+            end_time = float(item['end_time'])
+            content = item['alternatives'][0]['content']
+            speaker_segment = next((seg for seg in segments if float(seg['start_time']) <= start_time and float(seg['end_time']) >= end_time), None)
+            if speaker_segment:
+                speaker_label = speaker_segment['speaker_label']
+                # Map speaker labels to sequential numbers starting from 1
+                if speaker_label not in speaker_mapping:
+                    speaker_count += 1
+                    speaker_mapping[speaker_label] = f"Speaker {speaker_count}"
+                if speaker_mapping[speaker_label] != current_speaker:
+                    if current_text:
+                        transcriptions.append({
+                            'speaker': current_speaker,
+                            'text': ' '.join(current_text)
+                        })
+                        current_text = []
+                    current_speaker = speaker_mapping[speaker_label]
+            current_text.append(content)
+        elif item['type'] == 'punctuation':
+            current_text[-1] += item['alternatives'][0]['content']
+    if current_text:
+        transcriptions.append({
+            'speaker': current_speaker,
+            'text': ' '.join(current_text)
+        })
+    return transcriptions
 def diarize_audio(video_path):
     # Convert video to WAV audio