File size: 4,260 Bytes
9c94c47 ed41184 9c94c47 91b987a ed41184 9c94c47 ed41184 fc7755f ed41184 beae951 fc7755f beae951 fc7755f beae951 91b987a beae951 91b987a 67a18a1 ed41184 91b987a ed41184 91b987a fc7755f beae951 91b987a ed41184 fc7755f ed41184 67a18a1 beae951 ed41184 beae951 91b987a beae951 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import whisper
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
import os
import tempfile
import subprocess
# Load Whisper model
model = whisper.load_model("base")
def process_video(video_path, language):
# Create a temporary directory
temp_dir = tempfile.gettempdir()
output_video_path = os.path.join(temp_dir, "converted_video.mp4")
srt_path = os.path.join(temp_dir, "subtitles.srt")
try:
# Convert video to MP4 using ffmpeg
print(f"Converting video: {video_path} to MP4...")
subprocess.run(
["ffmpeg", "-i", video_path, "-c:v", "libx264", "-preset", "fast", output_video_path],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
print("Video converted successfully!")
# Transcribe video
print("Transcribing video...")
result = model.transcribe(output_video_path, language="en")
print("Transcription completed!")
# Translation logic
segments = []
if language == "English":
segments = result["segments"]
else:
model_map = {
"Hindi": "Helsinki-NLP/opus-mt-en-hi",
"Spanish": "Helsinki-NLP/opus-mt-en-es",
"French": "Helsinki-NLP/opus-mt-en-fr",
"German": "Helsinki-NLP/opus-mt-en-de",
"Telugu": "facebook/nllb-200-distilled-600M",
"Portuguese": "Helsinki-NLP/opus-mt-en-pt",
"Russian": "Helsinki-NLP/opus-mt-en-ru",
"Chinese": "Helsinki-NLP/opus-mt-en-zh",
"Arabic": "Helsinki-NLP/opus-mt-en-ar",
"Japanese": "Helsinki-NLP/opus-mt-en-jap"
}
model_name = model_map.get(language)
if not model_name:
return f"Unsupported language: {language}"
# Load translation model
print(f"Loading translation model: {model_name}")
if language == "Telugu":
tokenizer = AutoTokenizer.from_pretrained(model_name)
translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tgt_lang = "tel_Telu"
for segment in result["segments"]:
inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
translated_tokens = translation_model.generate(
**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang)
)
translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
else:
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)
for segment in result["segments"]:
inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
translated = translation_model.generate(**inputs)
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
# Create SRT file
with open(srt_path, "w", encoding="utf-8") as f:
for i, segment in enumerate(segments, 1):
start = f"{segment['start']:.3f}".replace(".", ",")
end = f"{segment['end']:.3f}".replace(".", ",")
text = segment["text"].strip()
f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
print(f"SRT file saved to {srt_path}")
return srt_path
except subprocess.CalledProcessError as e:
print(f"FFmpeg Error: {e.stderr.decode()}")
return None
except Exception as e:
print(f"Unexpected Error: {str(e)}")
return None
finally:
# Clean up temporary files
if os.path.exists(output_video_path):
os.remove(output_video_path)
if os.path.exists(video_path):
os.remove(video_path) |