Spaces:

reab5555
/

Multiple-Speakers-Personality-Analyzer

Runtime error

App Files Files Community

reab5555 commited on Aug 12, 2024

Commit

3bf5b17

verified ·

1 Parent(s): cc500ff

Update transcription_diarization.py

Browse files

Files changed (1) hide show

transcription_diarization.py +19 -63

transcription_diarization.py CHANGED Viewed

@@ -3,21 +3,21 @@ import time
 import json
 import os
 import urllib.parse
-from moviepy.editor import VideoFileClip, AudioFileClip
 import requests
 from botocore.exceptions import ClientError
 from config import aws_access_key_id, aws_secret_access_key
-def convert_to_audio(video_path, output_format='wav'):
     base_name = os.path.splitext(os.path.basename(video_path))[0]
-    output_path = f"{base_name}.{output_format}"
     try:
         video = VideoFileClip(video_path)
         audio = video.audio
-        # Write the audio to file
-        audio.write_audiofile(output_path, fps=audio.fps)
         video.close()
         audio.close()
@@ -35,7 +35,7 @@ def upload_to_s3(local_file_path, bucket_name, s3_file_key):
     s3_client.upload_file(local_file_path, bucket_name, s3_file_key)
     return f's3://{bucket_name}/{s3_file_key}'
-def transcribe_video(file_uri, job_name):
     transcribe = boto3.client('transcribe',
                               aws_access_key_id=aws_access_key_id,
                               aws_secret_access_key=aws_secret_access_key,
@@ -44,7 +44,7 @@ def transcribe_video(file_uri, job_name):
     transcribe.start_transcription_job(
         TranscriptionJobName=job_name,
         Media={'MediaFileUri': file_uri},
-        MediaFormat='mp4',
         IdentifyLanguage=True,
         Settings={
             'ShowSpeakerLabels': True,
@@ -59,12 +59,11 @@ def transcribe_video(file_uri, job_name):
         time.sleep(30)
     if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
-        # Print the identified language
         identified_language = status['TranscriptionJob']['LanguageCode']
         print(f"Identified language: {identified_language}")
         return status['TranscriptionJob']['Transcript']['TranscriptFileUri']
     else:
-        print('Transcription Job return None')
         return None
 def download_transcript(transcript_url):
@@ -77,68 +76,22 @@ def download_transcript(transcript_url):
         return None
 def extract_transcriptions_with_speakers(transcript_data):
-    segments = transcript_data['results']['speaker_labels']['segments']
-    items = transcript_data['results']['items']
-    current_speaker = None
-    current_text = []
-    transcriptions = []
-    speaker_mapping = {}
-    speaker_count = 0
-    for item in items:
-        if item['type'] == 'pronunciation':
-            start_time = float(item['start_time'])
-            end_time = float(item['end_time'])
-            content = item['alternatives'][0]['content']
-            speaker_segment = next((seg for seg in segments if float(seg['start_time']) <= start_time and float(seg['end_time']) >= end_time), None)
-            if speaker_segment:
-                speaker_label = speaker_segment['speaker_label']
-                # Map speaker labels to sequential numbers starting from 1
-                if speaker_label not in speaker_mapping:
-                    speaker_count += 1
-                    speaker_mapping[speaker_label] = f"Speaker {speaker_count}"
-                if speaker_mapping[speaker_label] != current_speaker:
-                    if current_text:
-                        transcriptions.append({
-                            'speaker': current_speaker,
-                            'text': ' '.join(current_text)
-                        })
-                        current_text = []
-                    current_speaker = speaker_mapping[speaker_label]
-            current_text.append(content)
-        elif item['type'] == 'punctuation':
-            current_text[-1] += item['alternatives'][0]['content']
-    if current_text:
-        transcriptions.append({
-            'speaker': current_speaker,
-            'text': ' '.join(current_text)
-        })
-    return transcriptions
 def diarize_audio(video_path):
-    # Convert video to mono audio
-    output_format = 'wav'
-    audio_path = convert_to_audio(video_path, output_format)
-    if not audio_path:
         return "Audio conversion failed."
     bucket_name = 'transcriptionjobbucket'
-    s3_file_key = os.path.basename(video_path)
-    file_uri = upload_to_s3(audio_path, bucket_name, s3_file_key)
     job_name = f'transcription_job_{int(time.time())}'
-    transcript_url = transcribe_video(file_uri, job_name)
     print('transcript url:', transcript_url)
@@ -154,6 +107,9 @@ def diarize_audio(video_path):
         for i, trans in enumerate(transcriptions, 1):
             output.append(f"[{i}. {trans['speaker']} | text: {trans['text']}]\n")
         return '\n'.join(output)
     else:
         return "Transcription failed."

 import json
 import os
 import urllib.parse
+from moviepy.editor import VideoFileClip
 import requests
 from botocore.exceptions import ClientError
 from config import aws_access_key_id, aws_secret_access_key
+def convert_to_wav(video_path):
     base_name = os.path.splitext(os.path.basename(video_path))[0]
+    output_path = f"{base_name}.wav"
     try:
         video = VideoFileClip(video_path)
         audio = video.audio
+        # Write the audio to WAV file
+        audio.write_audiofile(output_path, codec='pcm_s16le')
         video.close()
         audio.close()
     s3_client.upload_file(local_file_path, bucket_name, s3_file_key)
     return f's3://{bucket_name}/{s3_file_key}'
+def transcribe_audio(file_uri, job_name):
     transcribe = boto3.client('transcribe',
                               aws_access_key_id=aws_access_key_id,
                               aws_secret_access_key=aws_secret_access_key,
     transcribe.start_transcription_job(
         TranscriptionJobName=job_name,
         Media={'MediaFileUri': file_uri},
+        MediaFormat='wav',
         IdentifyLanguage=True,
         Settings={
             'ShowSpeakerLabels': True,
         time.sleep(30)
     if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
         identified_language = status['TranscriptionJob']['LanguageCode']
         print(f"Identified language: {identified_language}")
         return status['TranscriptionJob']['Transcript']['TranscriptFileUri']
     else:
+        print('Transcription Job returned None')
         return None
 def download_transcript(transcript_url):
         return None
 def extract_transcriptions_with_speakers(transcript_data):
+    # This function remains unchanged
+    # ... (keep the existing implementation)
 def diarize_audio(video_path):
+    # Convert video to WAV audio
+    wav_path = convert_to_wav(video_path)
+    if not wav_path:
         return "Audio conversion failed."
     bucket_name = 'transcriptionjobbucket'
+    s3_file_key = os.path.basename(wav_path)
+    file_uri = upload_to_s3(wav_path, bucket_name, s3_file_key)
     job_name = f'transcription_job_{int(time.time())}'
+    transcript_url = transcribe_audio(file_uri, job_name)
     print('transcript url:', transcript_url)
         for i, trans in enumerate(transcriptions, 1):
             output.append(f"[{i}. {trans['speaker']} | text: {trans['text']}]\n")
+        # Clean up: remove the temporary WAV file
+        os.remove(wav_path)
         return '\n'.join(output)
     else:
         return "Transcription failed."