Pushkar0655g's picture
Add detailed logging to debug subtitle generation errors
67a18a1
raw
history blame
4.71 kB
import whisper
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
import os
import tempfile
import subprocess
# Load Whisper model
model = whisper.load_model("base")
def process_video(video_file, language):
# Save uploaded video to a temporary file
temp_dir = tempfile.gettempdir()
video_path = os.path.join(temp_dir, "input_video") # No extension
output_video_path = os.path.join(temp_dir, "converted_video.mp4") # Convert to MP4 for compatibility
try:
# Save the uploaded file
print("Saving uploaded video...")
with open(video_path, "wb") as f:
f.write(video_file.read())
print(f"Video saved to {video_path}")
# Convert the video to MP4 using ffmpeg
print("Converting video to MP4...")
subprocess.run(["ffmpeg", "-i", video_path, "-c:v", "libx264", "-preset", "fast", output_video_path], check=True)
print(f"Video converted and saved to {output_video_path}")
# Transcribe the video
print("Transcribing video to English...")
result = model.transcribe(output_video_path, language="en")
print("Transcription completed!")
# Translation logic
segments = []
if language == "English":
segments = result["segments"]
else:
# Define translation models
model_map = {
"Hindi": "Helsinki-NLP/opus-mt-en-hi",
"Spanish": "Helsinki-NLP/opus-mt-en-es",
"French": "Helsinki-NLP/opus-mt-en-fr",
"German": "Helsinki-NLP/opus-mt-en-de",
"Telugu": "facebook/nllb-200-distilled-600M",
"Portuguese": "Helsinki-NLP/opus-mt-en-pt",
"Russian": "Helsinki-NLP/opus-mt-en-ru",
"Chinese": "Helsinki-NLP/opus-mt-en-zh",
"Arabic": "Helsinki-NLP/opus-mt-en-ar",
"Japanese": "Helsinki-NLP/opus-mt-en-jap"
}
model_name = model_map.get(language)
if not model_name:
return f"Unsupported language: {language}"
print(f"Loading translation model for {language}: {model_name}")
if language == "Telugu":
tokenizer = AutoTokenizer.from_pretrained(model_name)
translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tgt_lang = "tel_Telu"
print(f"Translating to Telugu using NLLB-200 Distilled...")
for segment in result["segments"]:
inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
translated_tokens = translation_model.generate(
**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang)
)
translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
else:
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)
print(f"Translating to {language}...")
for segment in result["segments"]:
inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
translated = translation_model.generate(**inputs)
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
# Create SRT file
srt_path = os.path.join(tempfile.gettempdir(), "subtitles.srt")
print(f"Creating SRT file at {srt_path}")
with open(srt_path, "w", encoding="utf-8") as f:
for i, segment in enumerate(segments, 1):
start = f"{segment['start']:.3f}".replace(".", ",")
end = f"{segment['end']:.3f}".replace(".", ",")
text = segment["text"].strip()
f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
print("SRT file created successfully!")
return srt_path
except subprocess.CalledProcessError as e:
return f"FFmpeg Error: {str(e)}"
except Exception as e:
return f"Unexpected Error: {str(e)}"
finally:
# Clean up temporary files
print("Cleaning up temporary files...")
if os.path.exists(video_path):
os.remove(video_path)
if os.path.exists(output_video_path):
os.remove(output_video_path)