File size: 4,713 Bytes
9c94c47
ed41184
 
9c94c47
91b987a
ed41184
 
9c94c47
ed41184
 
91b987a
 
 
 
ed41184
 
91b987a
67a18a1
91b987a
 
67a18a1
91b987a
 
 
 
67a18a1
91b987a
 
ed41184
91b987a
67a18a1
ed41184
91b987a
ed41184
 
 
 
91b987a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67a18a1
91b987a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed41184
 
9c94c47
67a18a1
ed41184
 
 
 
 
 
67a18a1
ed41184
 
67a18a1
 
ed41184
67a18a1
91b987a
 
67a18a1
91b987a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import whisper
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
import os
import tempfile
import subprocess

# Load Whisper model
model = whisper.load_model("base")

def process_video(video_file, language):
    # Save uploaded video to a temporary file
    temp_dir = tempfile.gettempdir()
    video_path = os.path.join(temp_dir, "input_video")  # No extension
    output_video_path = os.path.join(temp_dir, "converted_video.mp4")  # Convert to MP4 for compatibility

    try:
        # Save the uploaded file
        print("Saving uploaded video...")
        with open(video_path, "wb") as f:
            f.write(video_file.read())
        print(f"Video saved to {video_path}")

        # Convert the video to MP4 using ffmpeg
        print("Converting video to MP4...")
        subprocess.run(["ffmpeg", "-i", video_path, "-c:v", "libx264", "-preset", "fast", output_video_path], check=True)
        print(f"Video converted and saved to {output_video_path}")

        # Transcribe the video
        print("Transcribing video to English...")
        result = model.transcribe(output_video_path, language="en")
        print("Transcription completed!")

        # Translation logic
        segments = []
        if language == "English":
            segments = result["segments"]
        else:
            # Define translation models
            model_map = {
                "Hindi": "Helsinki-NLP/opus-mt-en-hi",
                "Spanish": "Helsinki-NLP/opus-mt-en-es",
                "French": "Helsinki-NLP/opus-mt-en-fr",
                "German": "Helsinki-NLP/opus-mt-en-de",
                "Telugu": "facebook/nllb-200-distilled-600M",
                "Portuguese": "Helsinki-NLP/opus-mt-en-pt",
                "Russian": "Helsinki-NLP/opus-mt-en-ru",
                "Chinese": "Helsinki-NLP/opus-mt-en-zh",
                "Arabic": "Helsinki-NLP/opus-mt-en-ar",
                "Japanese": "Helsinki-NLP/opus-mt-en-jap"
            }
            model_name = model_map.get(language)
            if not model_name:
                return f"Unsupported language: {language}"

            print(f"Loading translation model for {language}: {model_name}")
            if language == "Telugu":
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
                tgt_lang = "tel_Telu"
                print(f"Translating to Telugu using NLLB-200 Distilled...")
                for segment in result["segments"]:
                    inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
                    translated_tokens = translation_model.generate(
                        **inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang)
                    )
                    translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
                    segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
            else:
                tokenizer = MarianTokenizer.from_pretrained(model_name)
                translation_model = MarianMTModel.from_pretrained(model_name)
                print(f"Translating to {language}...")
                for segment in result["segments"]:
                    inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
                    translated = translation_model.generate(**inputs)
                    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
                    segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})

        # Create SRT file
        srt_path = os.path.join(tempfile.gettempdir(), "subtitles.srt")
        print(f"Creating SRT file at {srt_path}")
        with open(srt_path, "w", encoding="utf-8") as f:
            for i, segment in enumerate(segments, 1):
                start = f"{segment['start']:.3f}".replace(".", ",")
                end = f"{segment['end']:.3f}".replace(".", ",")
                text = segment["text"].strip()
                f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
        print("SRT file created successfully!")
        return srt_path

    except subprocess.CalledProcessError as e:
        return f"FFmpeg Error: {str(e)}"
    except Exception as e:
        return f"Unexpected Error: {str(e)}"
    finally:
        # Clean up temporary files
        print("Cleaning up temporary files...")
        if os.path.exists(video_path):
            os.remove(video_path)
        if os.path.exists(output_video_path):
            os.remove(output_video_path)