reab5555 commited on
Commit
203e76f
·
verified ·
1 Parent(s): 189e4c1

Update transcription_diarization.py

Browse files
Files changed (1) hide show
  1. transcription_diarization.py +4 -5
transcription_diarization.py CHANGED
@@ -15,7 +15,7 @@ def upload_to_s3(local_file_path, bucket_name, s3_file_key):
15
  s3_client.upload_file(local_file_path, bucket_name, s3_file_key)
16
  return f's3://{bucket_name}/{s3_file_key}'
17
 
18
- def transcribe_video(file_uri, job_name, max_speakers):
19
  transcribe = boto3.client('transcribe',
20
  aws_access_key_id=aws_access_key_id,
21
  aws_secret_access_key=aws_secret_access_key,
@@ -27,8 +27,7 @@ def transcribe_video(file_uri, job_name, max_speakers):
27
  MediaFormat='mp4',
28
  IdentifyLanguage=True,
29
  Settings={
30
- 'ShowSpeakerLabels': True,
31
- 'MaxSpeakerLabels': max_speakers
32
  }
33
  )
34
 
@@ -101,13 +100,13 @@ def extract_transcriptions_with_speakers(transcript_data):
101
  return transcriptions
102
 
103
 
104
- def diarize_audio(video_path, max_speakers):
105
  bucket_name = 'transcriptionjobbucket'
106
  s3_file_key = os.path.basename(video_path)
107
  file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)
108
 
109
  job_name = f'transcription_job_{int(time.time())}'
110
- transcript_url = transcribe_video(file_uri, job_name, max_speakers)
111
 
112
  if transcript_url:
113
  transcript_data = download_transcript(transcript_url)
 
15
  s3_client.upload_file(local_file_path, bucket_name, s3_file_key)
16
  return f's3://{bucket_name}/{s3_file_key}'
17
 
18
+ def transcribe_video(file_uri, job_name):
19
  transcribe = boto3.client('transcribe',
20
  aws_access_key_id=aws_access_key_id,
21
  aws_secret_access_key=aws_secret_access_key,
 
27
  MediaFormat='mp4',
28
  IdentifyLanguage=True,
29
  Settings={
30
+ 'ShowSpeakerLabels': True
 
31
  }
32
  )
33
 
 
100
  return transcriptions
101
 
102
 
103
+ def diarize_audio(video_path):
104
  bucket_name = 'transcriptionjobbucket'
105
  s3_file_key = os.path.basename(video_path)
106
  file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)
107
 
108
  job_name = f'transcription_job_{int(time.time())}'
109
+ transcript_url = transcribe_video(file_uri, job_name)
110
 
111
  if transcript_url:
112
  transcript_data = download_transcript(transcript_url)