File size: 4,231 Bytes
9c94c47 ed41184 9c94c47 91b987a ed41184 9c94c47 ed41184 beae951 ed41184 beae951 91b987a beae951 91b987a beae951 91b987a 67a18a1 ed41184 91b987a ed41184 91b987a beae951 91b987a ed41184 67a18a1 ed41184 67a18a1 beae951 ed41184 beae951 91b987a beae951 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import whisper
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
import os
import tempfile
import subprocess
# Load Whisper model
model = whisper.load_model("base")
def process_video(video_path, language): # Accept file path, not file object
output_video_path = os.path.join(tempfile.gettempdir(), "converted_video.mp4")
srt_path = os.path.join(tempfile.gettempdir(), "subtitles.srt")
try:
# Convert video to MP4 using ffmpeg
print("Converting video to MP4...")
subprocess.run(
["ffmpeg", "-i", video_path, "-c:v", "libx264", "-preset", "fast", output_video_path],
check=True, # Raise error if ffmpeg fails
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
print("Video converted successfully!")
# Transcribe video
print("Transcribing video...")
result = model.transcribe(output_video_path, language="en")
print("Transcription completed!")
# Translation logic
segments = []
if language == "English":
segments = result["segments"]
else:
model_map = {
"Hindi": "Helsinki-NLP/opus-mt-en-hi",
"Spanish": "Helsinki-NLP/opus-mt-en-es",
"French": "Helsinki-NLP/opus-mt-en-fr",
"German": "Helsinki-NLP/opus-mt-en-de",
"Telugu": "facebook/nllb-200-distilled-600M",
"Portuguese": "Helsinki-NLP/opus-mt-en-pt",
"Russian": "Helsinki-NLP/opus-mt-en-ru",
"Chinese": "Helsinki-NLP/opus-mt-en-zh",
"Arabic": "Helsinki-NLP/opus-mt-en-ar",
"Japanese": "Helsinki-NLP/opus-mt-en-jap"
}
model_name = model_map.get(language)
if not model_name:
return f"Unsupported language: {language}"
print(f"Loading translation model: {model_name}")
if language == "Telugu":
tokenizer = AutoTokenizer.from_pretrained(model_name)
translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tgt_lang = "tel_Telu"
for segment in result["segments"]:
inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
translated_tokens = translation_model.generate(
**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang)
)
translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
else:
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)
for segment in result["segments"]:
inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
translated = translation_model.generate(**inputs)
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
# Create SRT file
with open(srt_path, "w", encoding="utf-8") as f:
for i, segment in enumerate(segments, 1):
start = f"{segment['start']:.3f}".replace(".", ",")
end = f"{segment['end']:.3f}".replace(".", ",")
text = segment["text"].strip()
f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
print("SRT file created successfully!")
return srt_path
except subprocess.CalledProcessError as e:
print(f"FFmpeg Error: {e.stderr.decode()}")
return None
except Exception as e:
print(f"Unexpected Error: {str(e)}")
return None
finally:
# Clean up temporary files
if os.path.exists(output_video_path):
os.remove(output_video_path)
if os.path.exists(video_path):
os.remove(video_path) |