reab5555 commited on
Commit
f48b8c6
·
verified ·
1 Parent(s): e6af49e

Update transcription_diarization.py

Browse files
Files changed (1) hide show
  1. transcription_diarization.py +44 -22
transcription_diarization.py CHANGED
@@ -3,6 +3,8 @@ import time
3
  import json
4
  import os
5
  import urllib.parse
 
 
6
  from config import aws_access_key_id, aws_secret_access_key
7
 
8
  def upload_to_s3(local_file_path, bucket_name, s3_file_key):
@@ -46,31 +48,51 @@ def transcribe_video(file_uri, job_name, max_speakers):
46
  return None
47
 
48
  def download_transcript(transcript_url):
49
- s3_client = boto3.client('s3',
50
- aws_access_key_id=aws_access_key_id,
51
- aws_secret_access_key=aws_secret_access_key,
52
- region_name='eu-central-1')
53
-
54
- # Parse the URL
55
- parsed_url = urllib.parse.urlparse(transcript_url)
56
-
57
- # Extract bucket name and key
58
- bucket_name = parsed_url.netloc
59
- key = urllib.parse.unquote(parsed_url.path.lstrip('/'))
60
-
61
- print(f"Attempting to download from bucket: {bucket_name}")
62
- print(f"Using key: {key}")
63
 
64
  try:
65
- response = s3_client.get_object(Bucket=bucket_name, Key=key)
66
- transcript_content = response['Body'].read().decode('utf-8')
 
 
67
  return json.loads(transcript_content)
68
- except s3_client.exceptions.NoSuchKey:
69
- print(f"The file {key} does not exist in the bucket {bucket_name}")
70
- return None
71
- except s3_client.exceptions.ClientError as e:
72
- print(f"An error occurred: {e}")
73
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  def extract_transcriptions_with_speakers(transcript_data):
76
  segments = transcript_data['results']['speaker_labels']['segments']
 
3
  import json
4
  import os
5
  import urllib.parse
6
+ import requests
7
+ from botocore.exceptions import ClientError
8
  from config import aws_access_key_id, aws_secret_access_key
9
 
10
  def upload_to_s3(local_file_path, bucket_name, s3_file_key):
 
48
  return None
49
 
50
  def download_transcript(transcript_url):
51
+ print(f"Attempting to download transcript from URL: {transcript_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  try:
54
+ # Try to download directly using requests
55
+ response = requests.get(transcript_url)
56
+ response.raise_for_status() # Raises an HTTPError for bad responses
57
+ transcript_content = response.text
58
  return json.loads(transcript_content)
59
+ except requests.RequestException as e:
60
+ print(f"Failed to download transcript directly: {e}")
61
+
62
+ # If direct download fails, try using S3 client
63
+ try:
64
+ s3_client = boto3.client('s3',
65
+ aws_access_key_id=aws_access_key_id,
66
+ aws_secret_access_key=aws_secret_access_key,
67
+ region_name='eu-central-1')
68
+
69
+ # Parse the URL
70
+ parsed_url = urllib.parse.urlparse(transcript_url)
71
+
72
+ # Extract bucket name and key
73
+ bucket_name = parsed_url.netloc.split('.')[0]
74
+ key = urllib.parse.unquote(parsed_url.path.lstrip('/'))
75
+
76
+ print(f"Attempting to download from bucket: {bucket_name}")
77
+ print(f"Using key: {key}")
78
+
79
+ response = s3_client.get_object(Bucket=bucket_name, Key=key)
80
+ transcript_content = response['Body'].read().decode('utf-8')
81
+ return json.loads(transcript_content)
82
+ except ClientError as e:
83
+ error_code = e.response['Error']['Code']
84
+ error_message = e.response['Error']['Message']
85
+ print(f"S3 ClientError: {error_code} - {error_message}")
86
+ if error_code == 'AccessDenied':
87
+ print("Access Denied. Please check your AWS credentials and bucket permissions.")
88
+ elif error_code == 'NoSuchKey':
89
+ print(f"The file {key} does not exist in the bucket {bucket_name}")
90
+ else:
91
+ print(f"An unexpected error occurred: {e}")
92
+ except Exception as e:
93
+ print(f"An unexpected error occurred: {e}")
94
+
95
+ return None
96
 
97
  def extract_transcriptions_with_speakers(transcript_data):
98
  segments = transcript_data['results']['speaker_labels']['segments']