Spaces:
Runtime error
Runtime error
Update transcription_diarization.py
Browse files- transcription_diarization.py +11 -32
transcription_diarization.py
CHANGED
@@ -15,30 +15,22 @@ def upload_to_s3(local_file_path, bucket_name, s3_file_key):
|
|
15 |
s3_client.upload_file(local_file_path, bucket_name, s3_file_key)
|
16 |
return f's3://{bucket_name}/{s3_file_key}'
|
17 |
|
18 |
-
def transcribe_video(file_uri, job_name
|
19 |
transcribe = boto3.client('transcribe',
|
20 |
aws_access_key_id=aws_access_key_id,
|
21 |
aws_secret_access_key=aws_secret_access_key,
|
22 |
region_name='eu-central-1')
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
'
|
28 |
-
|
29 |
-
|
30 |
-
'Settings': {
|
31 |
'ShowSpeakerLabels': True,
|
32 |
'MaxSpeakerLabels': 4
|
33 |
}
|
34 |
-
|
35 |
-
|
36 |
-
if language_code:
|
37 |
-
job_params['LanguageCode'] = language_code
|
38 |
-
else:
|
39 |
-
job_params['IdentifyLanguage'] = True
|
40 |
-
|
41 |
-
transcribe.start_transcription_job(**job_params)
|
42 |
|
43 |
while True:
|
44 |
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
|
@@ -108,27 +100,14 @@ def extract_transcriptions_with_speakers(transcript_data):
|
|
108 |
|
109 |
return transcriptions
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
"English": "en-US",
|
114 |
-
"Hebrew": "he-IL",
|
115 |
-
"Arabic": "ar-SA",
|
116 |
-
"French": "fr-FR",
|
117 |
-
"German": "de-DE",
|
118 |
-
"Italian": "it-IT",
|
119 |
-
"Japanese": "ja-JP",
|
120 |
-
"Chinese": "zh-CN",
|
121 |
-
"Auto-detect": None
|
122 |
-
}
|
123 |
-
return language_codes.get(language, "en-US")
|
124 |
-
|
125 |
-
def diarize_audio(video_path, language):
|
126 |
bucket_name = 'transcriptionjobbucket'
|
127 |
s3_file_key = os.path.basename(video_path)
|
128 |
file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)
|
129 |
|
130 |
job_name = f'transcription_job_{int(time.time())}'
|
131 |
-
transcript_url = transcribe_video(file_uri, job_name
|
132 |
|
133 |
if transcript_url:
|
134 |
transcript_data = download_transcript(transcript_url)
|
|
|
15 |
s3_client.upload_file(local_file_path, bucket_name, s3_file_key)
|
16 |
return f's3://{bucket_name}/{s3_file_key}'
|
17 |
|
18 |
+
def transcribe_video(file_uri, job_name):
|
19 |
transcribe = boto3.client('transcribe',
|
20 |
aws_access_key_id=aws_access_key_id,
|
21 |
aws_secret_access_key=aws_secret_access_key,
|
22 |
region_name='eu-central-1')
|
23 |
|
24 |
+
transcribe.start_transcription_job(
|
25 |
+
TranscriptionJobName=job_name,
|
26 |
+
Media={'MediaFileUri': file_uri},
|
27 |
+
MediaFormat='mp4',
|
28 |
+
IdentifyLanguage=True,
|
29 |
+
Settings={
|
|
|
30 |
'ShowSpeakerLabels': True,
|
31 |
'MaxSpeakerLabels': 4
|
32 |
}
|
33 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
while True:
|
36 |
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
|
|
|
100 |
|
101 |
return transcriptions
|
102 |
|
103 |
+
|
104 |
+
def diarize_audio(video_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
bucket_name = 'transcriptionjobbucket'
|
106 |
s3_file_key = os.path.basename(video_path)
|
107 |
file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)
|
108 |
|
109 |
job_name = f'transcription_job_{int(time.time())}'
|
110 |
+
transcript_url = transcribe_video(file_uri, job_name)
|
111 |
|
112 |
if transcript_url:
|
113 |
transcript_data = download_transcript(transcript_url)
|