Spaces:
Runtime error
Runtime error
Update transcription_diarization.py
Browse files- transcription_diarization.py +24 -0
transcription_diarization.py
CHANGED
@@ -3,10 +3,27 @@ import time
|
|
3 |
import json
|
4 |
import os
|
5 |
import urllib.parse
|
|
|
6 |
import requests
|
7 |
from botocore.exceptions import ClientError
|
8 |
from config import aws_access_key_id, aws_secret_access_key
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def upload_to_s3(local_file_path, bucket_name, s3_file_key):
|
11 |
s3_client = boto3.client('s3',
|
12 |
aws_access_key_id=aws_access_key_id,
|
@@ -106,6 +123,13 @@ def extract_transcriptions_with_speakers(transcript_data):
|
|
106 |
|
107 |
|
108 |
def diarize_audio(video_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
bucket_name = 'transcriptionjobbucket'
|
110 |
s3_file_key = os.path.basename(video_path)
|
111 |
file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)
|
|
|
3 |
import json
|
4 |
import os
|
5 |
import urllib.parse
|
6 |
+
from moviepy.editor import VideoFileClip
|
7 |
import requests
|
8 |
from botocore.exceptions import ClientError
|
9 |
from config import aws_access_key_id, aws_secret_access_key
|
10 |
|
11 |
+
def convert_to_mono_audio(video_path, output_format='wav'):
|
12 |
+
base_name = os.path.splitext(os.path.basename(video_path))[0]
|
13 |
+
output_path = f"{base_name}_mono.{output_format}"
|
14 |
+
|
15 |
+
try:
|
16 |
+
video = VideoFileClip(video_path)
|
17 |
+
audio = video.audio
|
18 |
+
audio = audio.set_channels(1) # Convert to mono
|
19 |
+
audio.write_audiofile(output_path)
|
20 |
+
video.close()
|
21 |
+
audio.close()
|
22 |
+
return output_path
|
23 |
+
except Exception as e:
|
24 |
+
print(f"Error during audio conversion: {str(e)}")
|
25 |
+
return None
|
26 |
+
|
27 |
def upload_to_s3(local_file_path, bucket_name, s3_file_key):
|
28 |
s3_client = boto3.client('s3',
|
29 |
aws_access_key_id=aws_access_key_id,
|
|
|
123 |
|
124 |
|
125 |
def diarize_audio(video_path):
|
126 |
+
# Convert video to mono audio
|
127 |
+
output_format = 'wav'
|
128 |
+
mono_audio_path = convert_to_mono_audio(video_path, output_format)
|
129 |
+
|
130 |
+
if not mono_audio_path:
|
131 |
+
return "Audio conversion failed."
|
132 |
+
|
133 |
bucket_name = 'transcriptionjobbucket'
|
134 |
s3_file_key = os.path.basename(video_path)
|
135 |
file_uri = upload_to_s3(video_path, bucket_name, s3_file_key)
|