Pushkar0655g commited on
Commit
ed41184
·
1 Parent(s): d6a8bc7

Add Gradio app for video subtitling

Browse files
Files changed (3) hide show
  1. app.py +52 -0
  2. requirements.txt +4 -0
  3. utils.py +66 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from utils import process_video # Import your backend logic
3
+
4
+ # Define supported languages
5
+ language_map = {
6
+ "English": None,
7
+ "Hindi": "Helsinki-NLP/opus-mt-en-hi",
8
+ "Spanish": "Helsinki-NLP/opus-mt-en-es",
9
+ "French": "Helsinki-NLP/opus-mt-en-fr",
10
+ "German": "Helsinki-NLP/opus-mt-en-de",
11
+ "Telugu": "facebook/nllb-200-distilled-600M",
12
+ "Portuguese": "Helsinki-NLP/opus-mt-en-pt",
13
+ "Russian": "Helsinki-NLP/opus-mt-en-ru",
14
+ "Chinese": "Helsinki-NLP/opus-mt-en-zh",
15
+ "Arabic": "Helsinki-NLP/opus-mt-en-ar",
16
+ "Japanese": "Helsinki-NLP/opus-mt-en-jap"
17
+ }
18
+
19
+ def generate_subtitles(video_file, language):
20
+ """
21
+ Process the uploaded video and generate subtitles.
22
+ """
23
+ try:
24
+ srt_path = process_video(video_file, language)
25
+ return srt_path # Return the path to the generated SRT file
26
+ except Exception as e:
27
+ return f"Error: {str(e)}"
28
+
29
+ # Define Gradio Interface
30
+ with gr.Blocks() as demo:
31
+ gr.Markdown("# AI-Powered Video Subtitling")
32
+ gr.Markdown("Upload a video and select a language to generate subtitles.")
33
+
34
+ with gr.Row():
35
+ video_input = gr.File(label="Upload Video File", file_types=["mp4", "mkv", "avi"])
36
+ language_dropdown = gr.Dropdown(
37
+ choices=list(language_map.keys()),
38
+ label="Select Subtitle Language",
39
+ value="English"
40
+ )
41
+
42
+ generate_button = gr.Button("Generate Subtitles")
43
+ output_srt = gr.File(label="Download Subtitles")
44
+
45
+ generate_button.click(
46
+ generate_subtitles,
47
+ inputs=[video_input, language_dropdown],
48
+ outputs=output_srt
49
+ )
50
+
51
+ # Launch Gradio App
52
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==3.41.2
2
+ transformers==4.35.2
3
+ whisper==20230314
4
+ torch==2.0.1
utils.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import os
4
+
5
+ # Load Whisper model
6
+ model = whisper.load_model("base")
7
+
8
+ def process_video(video_file, language):
9
+ # Save uploaded video locally
10
+ video_path = "/tmp/video.mp4"
11
+ with open(video_path, "wb") as f:
12
+ f.write(video_file.read())
13
+
14
+ try:
15
+ print("Transcribing video to English...")
16
+ result = model.transcribe(video_path, language="en")
17
+
18
+ segments = []
19
+ if language == "English":
20
+ segments = result["segments"]
21
+ else:
22
+ if language == "Telugu":
23
+ model_name = "facebook/nllb-200-distilled-600M"
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
25
+ translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
26
+ tgt_lang = "tel_Telu"
27
+ print(f"Translating to Telugu using NLLB-200 Distilled...")
28
+ for segment in result["segments"]:
29
+ inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
30
+ translated_tokens = translation_model.generate(**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang))
31
+ translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
32
+ segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
33
+ else:
34
+ model_map = {
35
+ "Hindi": "Helsinki-NLP/opus-mt-en-hi",
36
+ "Spanish": "Helsinki-NLP/opus-mt-en-es",
37
+ "French": "Helsinki-NLP/opus-mt-en-fr",
38
+ "German": "Helsinki-NLP/opus-mt-en-de",
39
+ "Portuguese": "Helsinki-NLP/opus-mt-en-pt",
40
+ "Russian": "Helsinki-NLP/opus-mt-en-ru",
41
+ "Chinese": "Helsinki-NLP/opus-mt-en-zh",
42
+ "Arabic": "Helsinki-NLP/opus-mt-en-ar",
43
+ "Japanese": "Helsinki-NLP/opus-mt-en-jap"
44
+ }
45
+ model_name = model_map[language]
46
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
47
+ translation_model = MarianMTModel.from_pretrained(model_name)
48
+ print(f"Translating to {language}...")
49
+ for segment in result["segments"]:
50
+ inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
51
+ translated = translation_model.generate(**inputs)
52
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
53
+ segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]})
54
+
55
+ # Create SRT file
56
+ srt_path = "/tmp/subtitles.srt"
57
+ with open(srt_path, "w", encoding="utf-8") as f:
58
+ for i, segment in enumerate(segments, 1):
59
+ start = f"{segment['start']:.3f}".replace(".", ",")
60
+ end = f"{segment['end']:.3f}".replace(".", ",")
61
+ text = segment["text"].strip()
62
+ f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
63
+ return srt_path
64
+
65
+ except Exception as e:
66
+ return f"Error: {str(e)}"