Spaces:

reab5555
/

Multiple-Speakers-Personality-Analyzer

Runtime error

App Files Files Community

reab5555 commited on Aug 10, 2024

Commit

f48b8c6

verified ·

1 Parent(s): e6af49e

Update transcription_diarization.py

Browse files

Files changed (1) hide show

transcription_diarization.py +44 -22

transcription_diarization.py CHANGED Viewed

@@ -3,6 +3,8 @@ import time
 import json
 import os
 import urllib.parse
 from config import aws_access_key_id, aws_secret_access_key
 def upload_to_s3(local_file_path, bucket_name, s3_file_key):
@@ -46,31 +48,51 @@ def transcribe_video(file_uri, job_name, max_speakers):
         return None
 def download_transcript(transcript_url):
-    s3_client = boto3.client('s3',
-                             aws_access_key_id=aws_access_key_id,
-                             aws_secret_access_key=aws_secret_access_key,
-                             region_name='eu-central-1')
-    # Parse the URL
-    parsed_url = urllib.parse.urlparse(transcript_url)
-    # Extract bucket name and key
-    bucket_name = parsed_url.netloc
-    key = urllib.parse.unquote(parsed_url.path.lstrip('/'))
-    print(f"Attempting to download from bucket: {bucket_name}")
-    print(f"Using key: {key}")
     try:
-        response = s3_client.get_object(Bucket=bucket_name, Key=key)
-        transcript_content = response['Body'].read().decode('utf-8')
         return json.loads(transcript_content)
-    except s3_client.exceptions.NoSuchKey:
-        print(f"The file {key} does not exist in the bucket {bucket_name}")
-        return None
-    except s3_client.exceptions.ClientError as e:
-        print(f"An error occurred: {e}")
-        return None
 def extract_transcriptions_with_speakers(transcript_data):
     segments = transcript_data['results']['speaker_labels']['segments']

 import json
 import os
 import urllib.parse
+import requests
+from botocore.exceptions import ClientError
 from config import aws_access_key_id, aws_secret_access_key
 def upload_to_s3(local_file_path, bucket_name, s3_file_key):
         return None
 def download_transcript(transcript_url):
+    print(f"Attempting to download transcript from URL: {transcript_url}")
     try:
+        # Try to download directly using requests
+        response = requests.get(transcript_url)
+        response.raise_for_status()  # Raises an HTTPError for bad responses
+        transcript_content = response.text
         return json.loads(transcript_content)
+    except requests.RequestException as e:
+        print(f"Failed to download transcript directly: {e}")
+        # If direct download fails, try using S3 client
+        try:
+            s3_client = boto3.client('s3',
+                                     aws_access_key_id=aws_access_key_id,
+                                     aws_secret_access_key=aws_secret_access_key,
+                                     region_name='eu-central-1')
+            # Parse the URL
+            parsed_url = urllib.parse.urlparse(transcript_url)
+            # Extract bucket name and key
+            bucket_name = parsed_url.netloc.split('.')[0]
+            key = urllib.parse.unquote(parsed_url.path.lstrip('/'))
+            print(f"Attempting to download from bucket: {bucket_name}")
+            print(f"Using key: {key}")
+            response = s3_client.get_object(Bucket=bucket_name, Key=key)
+            transcript_content = response['Body'].read().decode('utf-8')
+            return json.loads(transcript_content)
+        except ClientError as e:
+            error_code = e.response['Error']['Code']
+            error_message = e.response['Error']['Message']
+            print(f"S3 ClientError: {error_code} - {error_message}")
+            if error_code == 'AccessDenied':
+                print("Access Denied. Please check your AWS credentials and bucket permissions.")
+            elif error_code == 'NoSuchKey':
+                print(f"The file {key} does not exist in the bucket {bucket_name}")
+            else:
+                print(f"An unexpected error occurred: {e}")
+        except Exception as e:
+            print(f"An unexpected error occurred: {e}")
+    return None
 def extract_transcriptions_with_speakers(transcript_data):
     segments = transcript_data['results']['speaker_labels']['segments']