File size: 4,260 Bytes
9c94c47
ed41184
 
9c94c47
91b987a
ed41184
 
9c94c47
ed41184
fc7755f
 
 
 
 
ed41184
 
beae951
fc7755f
beae951
 
fc7755f
beae951
 
 
 
91b987a
beae951
 
91b987a
67a18a1
ed41184
91b987a
ed41184
 
 
 
91b987a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc7755f
beae951
91b987a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed41184
 
 
 
 
 
 
 
fc7755f
ed41184
 
67a18a1
beae951
 
ed41184
beae951
 
91b987a
 
 
beae951
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import whisper
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
import os
import tempfile
import subprocess

# Load Whisper model
model = whisper.load_model("base")

def process_video(video_path, language):
    # Create a temporary directory
    temp_dir = tempfile.gettempdir()
    output_video_path = os.path.join(temp_dir, "converted_video.mp4")
    srt_path = os.path.join(temp_dir, "subtitles.srt")

    try:
        # Convert video to MP4 using ffmpeg
        print(f"Converting video: {video_path} to MP4...")
        subprocess.run(
            ["ffmpeg", "-i", video_path, "-c:v", "libx264", "-preset", "fast", output_video_path],
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
        )
        print("Video converted successfully!")

        # Transcribe video
        print("Transcribing video...")
        result = model.transcribe(output_video_path, language="en")
        print("Transcription completed!")

        # Translation logic
        segments = []
        if language == "English":
            segments = result["segments"]
        else:
            model_map = {
                "Hindi": "Helsinki-NLP/opus-mt-en-hi",
                "Spanish": "Helsinki-NLP/opus-mt-en-es",
                "French": "Helsinki-NLP/opus-mt-en-fr",
                "German": "Helsinki-NLP/opus-mt-en-de",
                "Telugu": "facebook/nllb-200-distilled-600M",
                "Portuguese": "Helsinki-NLP/opus-mt-en-pt",
                "Russian": "Helsinki-NLP/opus-mt-en-ru",
                "Chinese": "Helsinki-NLP/opus-mt-en-zh",
                "Arabic": "Helsinki-NLP/opus-mt-en-ar",
                "Japanese": "Helsinki-NLP/opus-mt-en-jap"
            }
            model_name = model_map.get(language)
            if not model_name:
                return f"Unsupported language: {language}"

            # Load translation model
            print(f"Loading translation model: {model_name}")
            if language == "Telugu":
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
                tgt_lang = "tel_Telu"
                for segment in result["segments"]:
                    inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
                    translated_tokens = translation_model.generate(
                        **inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang)
                    )
                    translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
                    segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
            else:
                tokenizer = MarianTokenizer.from_pretrained(model_name)
                translation_model = MarianMTModel.from_pretrained(model_name)
                for segment in result["segments"]:
                    inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
                    translated = translation_model.generate(**inputs)
                    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
                    segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})

        # Create SRT file
        with open(srt_path, "w", encoding="utf-8") as f:
            for i, segment in enumerate(segments, 1):
                start = f"{segment['start']:.3f}".replace(".", ",")
                end = f"{segment['end']:.3f}".replace(".", ",")
                text = segment["text"].strip()
                f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
        print(f"SRT file saved to {srt_path}")
        return srt_path

    except subprocess.CalledProcessError as e:
        print(f"FFmpeg Error: {e.stderr.decode()}")
        return None
    except Exception as e:
        print(f"Unexpected Error: {str(e)}")
        return None
    finally:
        # Clean up temporary files
        if os.path.exists(output_video_path):
            os.remove(output_video_path)
        if os.path.exists(video_path):
            os.remove(video_path)