File size: 4,231 Bytes
9c94c47 ed41184 9c94c47 91b987a ed41184 9c94c47 ed41184 9ed77ee 91b987a 9ed77ee 91b987a 9ed77ee 91b987a 9ed77ee 91b987a 2d1f387 ed41184 91b987a ed41184 91b987a 9ed77ee 91b987a ed41184 2d1f387 ed41184 2d1f387 9ed77ee ed41184 9ed77ee 91b987a 9ed77ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import whisper
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
import os
import tempfile
import subprocess
# Load Whisper model
model = whisper.load_model("base")
def process_video(video_path, language): # Accept file path, not file object
output_video_path = os.path.join(tempfile.gettempdir(), "converted_video.mp4")
srt_path = os.path.join(tempfile.gettempdir(), "subtitles.srt")
try:
# Convert video to MP4 using ffmpeg
print("Converting video to MP4...")
subprocess.run(
["ffmpeg", "-i", video_path, "-c:v", "libx264", "-preset", "fast", output_video_path],
check=True, # Raise error if ffmpeg fails
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
print("Video converted successfully!")
# Transcribe video
print("Transcribing video...")
result = model.transcribe(output_video_path, language="en")
print("Transcription completed!")
# Translation logic
segments = []
if language == "English":
segments = result["segments"]
else:
model_map = {
"Hindi": "Helsinki-NLP/opus-mt-en-hi",
"Spanish": "Helsinki-NLP/opus-mt-en-es",
"French": "Helsinki-NLP/opus-mt-en-fr",
"German": "Helsinki-NLP/opus-mt-en-de",
"Telugu": "facebook/nllb-200-distilled-600M",
"Portuguese": "Helsinki-NLP/opus-mt-en-pt",
"Russian": "Helsinki-NLP/opus-mt-en-ru",
"Chinese": "Helsinki-NLP/opus-mt-en-zh",
"Arabic": "Helsinki-NLP/opus-mt-en-ar",
"Japanese": "Helsinki-NLP/opus-mt-en-jap"
}
model_name = model_map.get(language)
if not model_name:
return f"Unsupported language: {language}"
print(f"Loading translation model: {model_name}")
if language == "Telugu":
tokenizer = AutoTokenizer.from_pretrained(model_name)
translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tgt_lang = "tel_Telu"
for segment in result["segments"]:
inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
translated_tokens = translation_model.generate(
**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang)
)
translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
else:
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)
for segment in result["segments"]:
inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
translated = translation_model.generate(**inputs)
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
# Create SRT file
with open(srt_path, "w", encoding="utf-8") as f:
for i, segment in enumerate(segments, 1):
start = f"{segment['start']:.3f}".replace(".", ",")
end = f"{segment['end']:.3f}".replace(".", ",")
text = segment["text"].strip()
f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
print("SRT file created successfully!")
return srt_path
except subprocess.CalledProcessError as e:
print(f"FFmpeg Error: {e.stderr.decode()}")
return None
except Exception as e:
print(f"Unexpected Error: {str(e)}")
return None
finally:
# Clean up temporary files
if os.path.exists(output_video_path):
os.remove(output_video_path)
if os.path.exists(video_path):
os.remove(video_path) |