Spaces:

reab5555
/

Multiple-Speakers-Personality-Analyzer

Runtime error

File size: 4,788 Bytes

818cd17
 
 
9deffb0
e6af49e
f48b8c6
 
818cd17
 
 
 
 
 
 
 
 
 
3ad4c21
818cd17
 
 
 
 
3ad4c21
 
 
 
ca6d20b
3ad4c21
4a4332b
 
818cd17
3ad4c21
2ca0ec7
818cd17
 
 
 
 
 
 
afdb01c
 
 
a9fd016
818cd17
c806b3c
818cd17
 
 
e6af49e
f48b8c6
a9fd016
 
 
 
 
818cd17
 
 
 
 
 
 
 
c31a27f
 
 
818cd17
 
 
 
 
 
 
 
 
c31a27f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818cd17
 
 
 
 
 
 
 
 
fb650ff
 
818cd17
fb650ff
3ad4c21
 
a9fd016
818cd17
 
2ca0ec7
818cd17
3ad4c21
c806b3c
1813c7e
818cd17
 
 
e6af49e
 
 
818cd17
b5750ff
818cd17
 
 
 
 
 
 
a9fd016

import boto3
import time
import json
import os
import urllib.parse
import requests
from botocore.exceptions import ClientError
from config import aws_access_key_id, aws_secret_access_key

def upload_to_s3(local_file_path, bucket_name, s3_file_key):
    s3_client = boto3.client('s3',
                             aws_access_key_id=aws_access_key_id,
                             aws_secret_access_key=aws_secret_access_key,
                             region_name='eu-central-1')
    s3_client.upload_file(local_file_path, bucket_name, s3_file_key)
    return f's3://{bucket_name}/{s3_file_key}'

def transcribe_video(file_uri, job_name):
    transcribe = boto3.client('transcribe',
                              aws_access_key_id=aws_access_key_id,
                              aws_secret_access_key=aws_secret_access_key,
                              region_name='eu-central-1')

    transcribe.start_transcription_job(
        TranscriptionJobName=job_name,
        Media={'MediaFileUri': file_uri},
        MediaFormat='mp4',
        IdentifyLanguage=True,
        Settings={
            'ShowSpeakerLabels': True,
            'MaxSpeakerLabels': 4
        }
    )

    while True:
        status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
        if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
            break
        time.sleep(30)

    if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
        # Print the identified language
        identified_language = status['TranscriptionJob']['LanguageCode']
        print(f"Identified language: {identified_language}")
        return status['TranscriptionJob']['Transcript']['TranscriptFileUri']
    else:
        print('Transcription Job return None')
        return None

def download_transcript(transcript_url):
    try:
        response = requests.get(transcript_url)
        response.raise_for_status()
        return json.loads(response.text)
    except Exception as e:
        print(f"Error downloading transcript: {e}")
        return None

def extract_transcriptions_with_speakers(transcript_data):
    segments = transcript_data['results']['speaker_labels']['segments']
    items = transcript_data['results']['items']
    
    current_speaker = None
    current_text = []
    transcriptions = []
    
    speaker_mapping = {}
    speaker_count = 0

    for item in items:
        if item['type'] == 'pronunciation':
            start_time = float(item['start_time'])
            end_time = float(item['end_time'])
            content = item['alternatives'][0]['content']

            speaker_segment = next((seg for seg in segments if float(seg['start_time']) <= start_time and float(seg['end_time']) >= end_time), None)

            if speaker_segment:
                speaker_label = speaker_segment['speaker_label']
                
                # Map speaker labels to sequential numbers starting from 1
                if speaker_label not in speaker_mapping:
                    speaker_count += 1
                    speaker_mapping[speaker_label] = f"Speaker {speaker_count}"
                
                if speaker_mapping[speaker_label] != current_speaker:
                    if current_text:
                        transcriptions.append({
                            'speaker': current_speaker,
                            'text': ' '.join(current_text)
                        })
                        current_text = []
                    current_speaker = speaker_mapping[speaker_label]

            current_text.append(content)
        elif item['type'] == 'punctuation':
            current_text[-1] += item['alternatives'][0]['content']

    if current_text:
        transcriptions.append({
            'speaker': current_speaker,
            'text': ' '.join(current_text)
        })

    return transcriptions


def diarize_audio(video_path):
    bucket_name = 'transcriptionjobbucket'
    s3_file_key = os.path.basename(video_path)
    file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)

    job_name = f'transcription_job_{int(time.time())}'
    transcript_url = transcribe_video(file_uri, job_name)
    
    print('transcript url:', transcript_url)

    if transcript_url:
        transcript_data = download_transcript(transcript_url)
        if transcript_data is None:
            return "Failed to download transcript."
        
        transcriptions = extract_transcriptions_with_speakers(transcript_data)
        print('transcriptions:', transcriptions)

        output = []
        for i, trans in enumerate(transcriptions, 1):
            output.append(f"[{i}. {trans['speaker']} | text: {trans['text']}]\n")

        return '\n'.join(output)
    else:
        return "Transcription failed."