Spaces:

reab5555
/

Multiple-Speakers-Personality-Analyzer

Runtime error

App Files Files Community

reab5555 commited on Aug 10, 2024

Commit

a9fd016

verified ·

1 Parent(s): 6cd715f

Update transcription_diarization.py

Browse files

Files changed (1) hide show

transcription_diarization.py +9 -53

transcription_diarization.py CHANGED Viewed

@@ -39,54 +39,18 @@ def transcribe_video(file_uri, job_name, max_speakers):
         time.sleep(30)
     if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
-        transcript_url = status['TranscriptionJob']['Transcript']['TranscriptFileUri']
-        return transcript_url
     else:
         return None
 def download_transcript(transcript_url):
-    print(f"Attempting to download transcript from URL: {transcript_url}")
     try:
-        # Try to download directly using requests
         response = requests.get(transcript_url)
-        response.raise_for_status()  # Raises an HTTPError for bad responses
-        transcript_content = response.text
-        return json.loads(transcript_content)
-    except requests.RequestException as e:
-        print(f"Failed to download transcript directly: {e}")
-        # If direct download fails, try using S3 client
-        try:
-            s3_client = boto3.client('s3',
-                                     aws_access_key_id=aws_access_key_id,
-                                     aws_secret_access_key=aws_secret_access_key,
-                                     region_name='eu-central-1')
-            # Parse the URL
-            parsed_url = urllib.parse.urlparse(transcript_url)
-            # Extract bucket name and key
-            bucket_name = parsed_url.netloc.split('.')[0]
-            key = urllib.parse.unquote(parsed_url.path.lstrip('/'))
-            response = s3_client.get_object(Bucket=bucket_name, Key=key)
-            transcript_content = response['Body'].read().decode('utf-8')
-            return json.loads(transcript_content)
-        except ClientError as e:
-            error_code = e.response['Error']['Code']
-            error_message = e.response['Error']['Message']
-            print(f"S3 ClientError: {error_code} - {error_message}")
-            if error_code == 'AccessDenied':
-                print("Access Denied. Please check your AWS credentials and bucket permissions.")
-            elif error_code == 'NoSuchKey':
-                print(f"The file {key} does not exist in the bucket {bucket_name}")
-            else:
-                print(f"An unexpected error occurred: {e}")
-        except Exception as e:
-            print(f"An unexpected error occurred: {e}")
-    return None
 def extract_transcriptions_with_speakers(transcript_data):
     segments = transcript_data['results']['speaker_labels']['segments']
@@ -125,33 +89,25 @@ def extract_transcriptions_with_speakers(transcript_data):
     return transcriptions
-def process_video(video_path, bucket_name, max_speakers):
-    # Upload video to S3
     s3_file_key = os.path.basename(video_path)
     file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)
-    # Start transcription job
     job_name = f'transcription_job_{int(time.time())}'
     transcript_url = transcribe_video(file_uri, job_name, max_speakers)
     if transcript_url:
-        # Download and process transcript
         transcript_data = download_transcript(transcript_url)
         if transcript_data is None:
             return "Failed to download transcript."
         transcriptions = extract_transcriptions_with_speakers(transcript_data)
-        # Create combined SRT-like output
         output = []
         for i, trans in enumerate(transcriptions, 1):
             output.append(f"[{i}. {trans['speaker']} | text: {trans['text']}]\n")
         return '\n'.join(output)
     else:
-        return "Transcription failed."
-# This function will be called from the Gradio app
-def diarize_audio(video_path, max_speakers):
-    bucket_name = 'transcriptionjobbucket'  # Replace with your actual S3 bucket name
-    return process_video(video_path, bucket_name, max_speakers)

         time.sleep(30)
     if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
+        return status['TranscriptionJob']['Transcript']['TranscriptFileUri']
     else:
         return None
 def download_transcript(transcript_url):
     try:
         response = requests.get(transcript_url)
+        response.raise_for_status()
+        return json.loads(response.text)
+    except Exception as e:
+        print(f"Error downloading transcript: {e}")
+        return None
 def extract_transcriptions_with_speakers(transcript_data):
     segments = transcript_data['results']['speaker_labels']['segments']
     return transcriptions
+def diarize_audio(video_path, max_speakers):
+    bucket_name = 'transcriptionjobbucket'
     s3_file_key = os.path.basename(video_path)
     file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)
     job_name = f'transcription_job_{int(time.time())}'
     transcript_url = transcribe_video(file_uri, job_name, max_speakers)
     if transcript_url:
         transcript_data = download_transcript(transcript_url)
         if transcript_data is None:
             return "Failed to download transcript."
         transcriptions = extract_transcriptions_with_speakers(transcript_data)
         output = []
         for i, trans in enumerate(transcriptions, 1):
             output.append(f"[{i}. {trans['speaker']} | text: {trans['text']}]\n")
         return '\n'.join(output)
     else:
+        return "Transcription failed."