Spaces:
Sleeping
Sleeping
File size: 5,792 Bytes
7b636b6 7bb3281 7b636b6 7bb3281 7b636b6 7bb3281 7b636b6 7bb3281 7b636b6 2efd73a 7b636b6 7bb3281 7b636b6 d4c5519 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import os
import tempfile
import subprocess
import streamlit as st
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
import torch
from datetime import timedelta
from deep_translator import GoogleTranslator
import ffmpeg
# Streamlit setup
st.title("Video Translator (English to Arabic)")
st.write("Upload an English video to extract speech, translate it into Arabic, and burn the subtitles into the video.")
def format_time(seconds):
"""Convert seconds to SRT format (00:00:00,000)"""
td = timedelta(seconds=seconds)
hours, remainder = divmod(td.seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = td.microseconds // 1000
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
def extract_audio(video_path):
"""Extract audio from video using ffmpeg"""
temp_dir = tempfile.gettempdir()
audio_path = os.path.join(temp_dir, "extracted_audio.wav")
# Use ffmpeg to extract audio
ffmpeg.input(video_path).output(audio_path, format='wav').run()
return audio_path
def transcribe_audio(audio_path):
"""Transcribe audio to text using the fractalego/personal-speech-to-text-model"""
try:
# Try using fractalego/personal-speech-to-text-model
device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "fractalego/personal-speech-to-text-model"
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
processor = AutoProcessor.from_pretrained(model_id)
model.to(device)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=30,
batch_size=16,
return_timestamps=True,
device=device,
)
result = pipe(audio_path)
return result["chunks"]
except Exception as e:
print(f"Error using fractalego model: {e}")
print("Using whisper model as fallback...")
# Use whisper as fallback
import whisper
model = whisper.load_model("base")
result = model.transcribe(audio_path, word_timestamps=True)
return result["segments"]
def translate_text(text):
"""Translate text from English to Arabic"""
translator = GoogleTranslator(source='en', target='ar')
return translator.translate(text)
def create_srt(segments, output_path):
"""Create an SRT file from translated segments ensuring proper encoding"""
with open(output_path, 'w', encoding='utf-8-sig') as srt_file: # UTF-8 with BOM for compatibility
for i, segment in enumerate(segments, start=1):
if hasattr(segment, 'get'): # Handle variations in output models
start_time = segment.get('start', 0)
end_time = segment.get('end', 0)
text = segment.get('text', '')
translation = segment.get('translation', '')
else:
start_time = segment.start
end_time = segment.end
text = segment.text
translation = getattr(segment, 'translation', text) # Use the original text if no translation
# Write SRT data
srt_file.write(f"{i}\n")
srt_file.write(f"{format_time(start_time)} --> {format_time(end_time)}\n")
srt_file.write(f"{translation}\n\n")
def burn_subtitles(video_path, srt_path, output_path):
"""Burn subtitles into video using FFmpeg with Arabic support"""
font_path = "/usr/share/fonts/truetype/Amiri-Regular.ttf" # Path to Amiri font
cmd = [
'ffmpeg', '-y',
'-i', video_path,
'-vf', f"subtitles='{srt_path}':force_style='FontName={font_path},FontSize=24,PrimaryColour=&HFFFFFF,OutlineColour=&H000000,BorderStyle=3,Alignment=2,Encoding=1'",
'-sub_charenc', 'UTF-8',
'-c:v', 'libx264', '-crf', '18',
'-c:a', 'copy',
output_path
]
try:
subprocess.run(cmd, check=True)
return output_path
except subprocess.CalledProcessError as e:
print(f"FFmpeg error: {e}")
return None
def process_video(video_path):
"""Process the video: extract audio, transcribe, translate, create SRT, burn subtitles"""
temp_dir = tempfile.gettempdir()
file_name = os.path.splitext(os.path.basename(video_path))[0]
audio_path = extract_audio(video_path)
segments = transcribe_audio(audio_path)
translated_segments = []
for i, segment in enumerate(segments):
text = segment.text if hasattr(segment, 'text') else segment.get('text', '')
translated_text = translate_text(text)
segment.translation = translated_text
translated_segments.append(segment)
srt_path = os.path.join(temp_dir, f"{file_name}.srt")
create_srt(translated_segments, srt_path)
output_path = os.path.join(temp_dir, f"{file_name}_translated.mp4")
result_path = burn_subtitles(video_path, srt_path, output_path)
return result_path, srt_path
# Streamlit UI
uploaded_video = st.file_uploader("Upload your video", type=["mp4", "mov", "avi"])
if uploaded_video:
# Save the uploaded video temporarily
temp_video_path = os.path.join(tempfile.gettempdir(), uploaded_video.name)
with open(temp_video_path, "wb") as f:
f.write(uploaded_video.read())
st.write("Processing your video...")
result_path, srt_path = process_video(temp_video_path)
# Show download links for processed video and subtitle file
st.video(result_path)
st.download_button("Download SRT File", srt_path) |