Mohitrai11 commited on
Commit
64cfcbf
·
verified ·
1 Parent(s): 8bfcfb1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +566 -0
app.py ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import tempfile
4
+ import subprocess
5
+ from pathlib import Path
6
+ import numpy as np
7
+ import soundfile as sf
8
+ from pydub import AudioSegment
9
+ from faster_whisper import WhisperModel
10
+ from openai import OpenAI
11
+ import httpx
12
+ import asyncio
13
+ import gradio as gr
14
+ import requests
15
+
16
+ # --- Demucs-based vocal separation ---
17
+ def separate_vocals(input_path, progress=gr.Progress()):
18
+ """Use Demucs to separate vocals and background music"""
19
+ progress(0.1, desc="Separating vocals and music (Demucs)")
20
+ temp_dir = tempfile.mkdtemp()
21
+ try:
22
+ output_dir = os.path.join(temp_dir, "separated")
23
+ os.makedirs(output_dir, exist_ok=True)
24
+ from demucs.separate import main as demucs_main
25
+ import sys
26
+ original_argv = sys.argv
27
+ sys.argv = [
28
+ "demucs",
29
+ "--two-stems", "vocals",
30
+ "-o", output_dir,
31
+ input_path
32
+ ]
33
+ try:
34
+ demucs_main()
35
+ finally:
36
+ sys.argv = original_argv
37
+ base_name = Path(input_path).stem
38
+ vocals_path = os.path.join(output_dir, "htdemucs", base_name, "vocals.wav")
39
+ noise_path = os.path.join(output_dir, "htdemucs", base_name, "no_vocals.wav")
40
+ if not os.path.exists(vocals_path) or not os.path.exists(noise_path):
41
+ raise FileNotFoundError("Demucs output missing")
42
+ progress(0.3, desc="Vocals separated")
43
+ return vocals_path, noise_path, temp_dir
44
+ except Exception as e:
45
+ print(f"Demucs error: {e}")
46
+ shutil.rmtree(temp_dir, ignore_errors=True)
47
+ return None, None, None
48
+
49
+ # --- AudioProcessor class ---
50
+ class AudioProcessor:
51
+ def __init__(self, device="cpu"):
52
+ self.whisper_model = WhisperModel("small", device=device)
53
+ self.openrouter_api_key = "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"
54
+ self.client = OpenAI(
55
+ base_url="https://openrouter.ai/api/v1",
56
+ api_key=self.openrouter_api_key,
57
+ http_client=httpx.Client(headers={
58
+ "Authorization": f"Bearer {self.openrouter_api_key}",
59
+ "HTTP-Referer": "https://github.com",
60
+ "X-Title": "Audio Translation App"
61
+ })
62
+ )
63
+ def transcribe_audio_with_pauses(self, audio_path, progress):
64
+ progress(0.35, desc="Transcribing audio (Whisper)")
65
+ segments, _ = self.whisper_model.transcribe(audio_path, word_timestamps=True)
66
+ previous_end = 0.0
67
+ results = []
68
+ for segment in segments:
69
+ if segment.start > previous_end + 0.5:
70
+ results.append((previous_end, segment.start, None))
71
+ results.append((segment.start, segment.end, segment.text.strip()))
72
+ previous_end = segment.end
73
+ audio_duration = get_audio_duration(audio_path)
74
+ if audio_duration and audio_duration > previous_end + 0.5:
75
+ results.append((previous_end, audio_duration, None))
76
+ progress(0.5, desc="Transcription complete")
77
+ return results
78
+
79
+ def translate_segments_batch(self, segments, target_language, progress):
80
+ """Translate all text segments in a single batch request"""
81
+ progress(0.55, desc="Translating segments")
82
+ try:
83
+ # Filter out None segments (pauses)
84
+ text_segments = [seg for seg in segments if seg is not None]
85
+ if not text_segments:
86
+ return segments # Return original if no text to translate
87
+ print(f"Translating {len(text_segments)} segments in batch...")
88
+ # Prepare the prompt with clear formatting instructions
89
+ prompt = f"""Translate the following text segments to {target_language} while maintaining EXACTLY the same format and order:
90
+ {chr(10).join(text_segments)}
91
+ IMPORTANT INSTRUCTIONS:
92
+ 1. Maintain the EXACT same order and number of segments
93
+ 2. Each line must be a separate translation
94
+ 3. Use natural conversational {target_language}
95
+ 4. Preserve meaning/context
96
+ 5. Leave proper nouns unchanged
97
+ 6.Make sure the translated sentence is meaningful also
98
+ 7. Match original word count where possible
99
+ 8. Output ONLY the translations, one per line, no numbers or bullet points
100
+ 9. Do not add any additional text or explanations
101
+ Example Input:
102
+ Hello world
103
+ How are you?
104
+ Example Output:
105
+ नमस्ते दुनिया
106
+ आप कैसे हैं?
107
+ """
108
+ completion = self.client.chat.completions.create(
109
+ model="gpt-3.5-turbo",
110
+ messages=[
111
+ {
112
+ "role": "system",
113
+ "content": f"You are a professional translator from English to {target_language}. Translate exactly as requested."
114
+ },
115
+ {
116
+ "role": "user",
117
+ "content": prompt
118
+ }
119
+ ],
120
+ temperature=0.1, # Lower temperature for more consistent results
121
+ max_tokens=2000
122
+ )
123
+ translated_text = completion.choices[0].message.content.strip()
124
+ translations = translated_text.split('\n')
125
+ # Reconstruct the segments with translations
126
+ translated_segments = []
127
+ translation_idx = 0
128
+ for seg in segments:
129
+ if seg is None:
130
+ translated_segments.append(None)
131
+ else:
132
+ if translation_idx < len(translations):
133
+ translated_segments.append(translations[translation_idx])
134
+ translation_idx += 1
135
+ else:
136
+ translated_segments.append(seg) # Fallback to original if missing translation
137
+ progress(0.7, desc="Translation complete")
138
+ return translated_segments
139
+ except Exception as e:
140
+ print(f"Batch translation error: {e}")
141
+ return segments # Return original segments if translation fails
142
+
143
+ # --- Helper functions ---
144
+ def get_audio_duration(audio_path):
145
+ try:
146
+ with sf.SoundFile(audio_path) as f:
147
+ return len(f) / f.samplerate
148
+ except Exception as e:
149
+ print(f"Duration error: {e}")
150
+ return None
151
+
152
+ async def synthesize_tts_to_wav(text, voice, target_language):
153
+ import edge_tts
154
+ temp_mp3 = "temp_tts.mp3"
155
+ communicate = edge_tts.Communicate(text, voice)
156
+ await communicate.save(temp_mp3)
157
+ audio = AudioSegment.from_file(temp_mp3)
158
+ audio = audio.set_channels(1).set_frame_rate(22050)
159
+ output_wav = "temp_tts.wav"
160
+ audio.export(output_wav, format="wav")
161
+ os.remove(temp_mp3)
162
+ return output_wav
163
+
164
+ def stretch_audio(input_wav, target_duration, api_url="https://sox-api.onrender.com/stretch"):
165
+ # Read the input audio file
166
+ with open(input_wav, "rb") as f:
167
+ files = {"file": f}
168
+ data = {"target_duration": str(target_duration)}
169
+ response = requests.post(api_url, files=files, data=data)
170
+ # Check if the request was successful
171
+ if response.status_code != 200:
172
+ raise RuntimeError(f"API error: {response.status_code} - {response.text}")
173
+ # Save the response content to a temporary file
174
+ output_wav = tempfile.mkstemp(suffix=".wav")[1]
175
+ with open(output_wav, "wb") as out:
176
+ out.write(response.content)
177
+ return output_wav
178
+
179
+ def generate_silence_wav(duration_s, output_path, sample_rate=22050):
180
+ samples = np.zeros(int(duration_s * sample_rate), dtype=np.float32)
181
+ sf.write(output_path, samples, sample_rate)
182
+
183
+ def cleanup_files(file_list):
184
+ for file in file_list:
185
+ if os.path.exists(file):
186
+ os.remove(file)
187
+
188
+ # --- Main Process Function ---
189
+ async def process_audio_chunks(input_audio_path, voice, target_language, progress):
190
+ audio_processor = AudioProcessor()
191
+ print("🔎 Separating vocals and music using Demucs...")
192
+ vocals_path, background_path, temp_dir = separate_vocals(input_audio_path, progress)
193
+ if not vocals_path:
194
+ return None, None
195
+
196
+ print("🔎 Transcribing vocals...")
197
+ segments = audio_processor.transcribe_audio_with_pauses(vocals_path, progress)
198
+ print(f"Transcribed {len(segments)} segments.")
199
+
200
+ # Extract text segments for batch processing
201
+ segment_texts = [seg[2] if seg[2] is not None else None for seg in segments]
202
+
203
+ # Batch translate all segments at once
204
+ translated_texts = audio_processor.translate_segments_batch(segment_texts, target_language, progress)
205
+
206
+ chunk_files = []
207
+ chunk_idx = 0
208
+ total_segments = len(segments)
209
+ for (start, end, _), translated in zip(segments, translated_texts):
210
+ duration = end - start
211
+ chunk_idx += 1
212
+ progress(0.7 + (chunk_idx / total_segments) * 0.15, desc=f"Processing chunk {chunk_idx}/{total_segments}")
213
+ if translated is None:
214
+ filename = f"chunk_{chunk_idx:03d}_pause.wav"
215
+ generate_silence_wav(duration, filename)
216
+ chunk_files.append(filename)
217
+ else:
218
+ print(f"🔤 {chunk_idx}: Translated: {translated}")
219
+ # Synthesize TTS audio
220
+ raw_tts = await synthesize_tts_to_wav(translated, voice, target_language)
221
+ # Stretch the audio to match the target duration
222
+ stretched = stretch_audio(raw_tts, duration)
223
+ chunk_files.append(stretched)
224
+ os.remove(raw_tts)
225
+
226
+ combined_tts = AudioSegment.empty()
227
+ for f in chunk_files:
228
+ combined_tts += AudioSegment.from_wav(f)
229
+
230
+ print("🎼 Adding original background music...")
231
+ background_music = AudioSegment.from_wav(background_path)
232
+ background_music = background_music[:len(combined_tts)]
233
+ final_mix = combined_tts.overlay(background_music)
234
+ output_path = "final_translated_with_music.wav"
235
+ final_mix.export(output_path, format="wav")
236
+ print(f"✅ Output saved as: {output_path}")
237
+
238
+ final_audio_path = output_path
239
+ final_background_path = background_path # Keep this for cleanup if needed
240
+
241
+ cleanup_files(chunk_files)
242
+ shutil.rmtree(temp_dir, ignore_errors=True)
243
+ progress(0.9, desc="Audio processing complete")
244
+ return final_audio_path, final_background_path
245
+
246
+ # --- Gradio Interface ---
247
+ def gradio_interface(video_file, voice, target_language, progress=gr.Progress()):
248
+ try:
249
+ progress(0.05, desc="Starting video dubbing process")
250
+ # Create temporary directory for processing
251
+ temp_dir = Path(tempfile.mkdtemp())
252
+ input_video_path = temp_dir / "input_video.mp4"
253
+ # Check if file is a video
254
+ if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
255
+ raise ValueError("Invalid file type. Please upload a video file.")
256
+ # Save the uploaded file to the temporary directory
257
+ shutil.copyfile(video_file.name, input_video_path)
258
+
259
+ # Extract audio from video
260
+ progress(0.1, desc="Extracting audio from video")
261
+ audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
262
+ if not audio_path:
263
+ return None
264
+
265
+ # Process audio chunks
266
+ audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language, progress))
267
+ if audio_output_path is None or background_path is None:
268
+ return None
269
+
270
+ # Combine with original video
271
+ progress(0.95, desc="Combining video and new audio")
272
+ output_video_path = temp_dir / "translated_video.mp4"
273
+ success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
274
+ if success:
275
+ progress(1.0, desc="Dubbing complete!")
276
+ # Return the path to the output video
277
+ return str(output_video_path)
278
+ else:
279
+ return None
280
+ except Exception as e:
281
+ print(f"Error processing video: {e}")
282
+ return None
283
+ finally:
284
+ # Cleanup temporary files
285
+ # Commented out for debugging purposes
286
+ # shutil.rmtree(temp_dir, ignore_errors=True)
287
+ pass
288
+
289
+ def extract_audio_from_video(video_path):
290
+ """Extract audio from video file using ffmpeg"""
291
+ temp_dir = tempfile.mkdtemp()
292
+ audio_path = os.path.join(temp_dir, "extracted_audio.wav")
293
+ try:
294
+ subprocess.run([
295
+ "ffmpeg", "-y", "-i", video_path,
296
+ "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2",
297
+ audio_path
298
+ ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
299
+ if not os.path.exists(audio_path):
300
+ raise FileNotFoundError("Audio extraction failed")
301
+ return audio_path, temp_dir
302
+ except Exception as e:
303
+ print(f"Audio extraction error: {e}")
304
+ shutil.rmtree(temp_dir, ignore_errors=True)
305
+ return None, None
306
+
307
+ def combine_video_audio(video_path, audio_path, output_path):
308
+ """Combine original video with new audio track"""
309
+ try:
310
+ subprocess.run([
311
+ "ffmpeg", "-y", "-i", video_path,
312
+ "-i", audio_path,
313
+ "-c:v", "copy", "-map", "0:v:0", "-map", "1:a:0",
314
+ "-shortest", output_path
315
+ ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
316
+ return True
317
+ except Exception as e:
318
+ print(f"Video combining error: {e}")
319
+ return False
320
+
321
+ # Voice options for each language
322
+ voice_options = {
323
+ "Hindi": [
324
+ "hi-IN-MadhurNeural", # Male
325
+ "hi-IN-SwaraNeural" # Female
326
+ ],
327
+ "English": [
328
+ "en-US-GuyNeural", # Male
329
+ "en-US-ChristopherNeural", # Male
330
+ "en-US-AriaNeural", # Female
331
+ "en-US-JessaNeural", # Female
332
+ "en-US-JennyNeural" # Female
333
+ ],
334
+ "Spanish": [
335
+ "es-ES-AlvaroNeural", # Male
336
+ "es-MX-JorgeNeural", # Male
337
+ "es-US-AlonsoNeural", # Female
338
+ "es-MX-DaliaNeural", # Female
339
+ "es-US-PalomaNeural" # Female
340
+ ],
341
+ "French": [
342
+ "fr-FR-HenriNeural", # Male
343
+ "fr-FR-RemyMultilingualNeural", # Male
344
+ "fr-CA-AntoineNeural", # Male
345
+ "fr-FR-DeniseNeural",
346
+ "fr-FR-VivienneMultilingualNeural" # Female
347
+ ],
348
+ "Japanese": [
349
+ "ja-JP-KeitaNeural",
350
+ "ja-JP-NanamiNeural"
351
+ ],
352
+ "Korean": [
353
+ "ko-KR-InJoonNeural", # Male
354
+ "ko-KR-SunHiNeural" # Female
355
+ ]}
356
+
357
+ custom_css = """
358
+ /* Overall Body Background - Deep & Vibrant Gradient */
359
+ body {
360
+ background: linear-gradient(135deg, #1A202C, #2D3748, #4A5568) !important; /* Dark blue-grey gradient */
361
+ font-family: 'Inter', sans-serif; /* Modern font, ensure it's available or use fallback */
362
+ color: #E2E8F0; /* Light text color for contrast */
363
+ overflow-x: hidden;
364
+ }
365
+ /* --- Core Gradio Block Blending --- */
366
+ /* Make Gradio's main container transparent to show body background */
367
+ .gradio-container {
368
+ background: transparent !important;
369
+ box-shadow: none !important;
370
+ border: none !important;
371
+ padding: 0 !important;
372
+ }
373
+ /* Specific Gradio block elements - subtle transparency */
374
+ .block {
375
+ background-color: hsla(210, 20%, 25%, 0.5) !important; /* Semi-transparent dark blue-grey */
376
+ backdrop-filter: blur(8px); /* Frosted glass effect */
377
+ border: 1px solid hsla(210, 20%, 35%, 0.6) !important; /* Subtle border */
378
+ border-radius: 20px !important; /* Rounded corners for the block */
379
+ box-shadow: 0 8px 30px hsla(0, 0%, 0%, 0.3) !important; /* Stronger shadow for depth */
380
+ margin-bottom: 25px !important;
381
+ padding: 25px !important; /* Add internal padding to blocks */
382
+ }
383
+ /* Remove default Gradio layout wrappers' backgrounds */
384
+ .main-wrapper, .panel-container {
385
+ background: transparent !important;
386
+ box-shadow: none !important;
387
+ border: none !important;
388
+ }
389
+ /* --- Application Title and Description --- */
390
+ .gradio-header h1 {
391
+ color: #8D5BFC !important; /* Vibrant Purple for main title */
392
+ font-size: 3em !important;
393
+ text-shadow: 0 0 15px hsla(260, 90%, 70%, 0.5); /* Glowing effect */
394
+ margin-bottom: 10px !important;
395
+ font-weight: 700 !important;
396
+ text-align: center;
397
+ }
398
+ .gradio-markdown p {
399
+ color: #CBD5E0 !important; /* Lighter text for description */
400
+ font-size: 1.25em !important;
401
+ text-align: center;
402
+ margin-bottom: 40px !important;
403
+ font-weight: 300;
404
+ }
405
+ /* --- Input Components (File, Dropdowns) --- */
406
+ .gradio-file, .gradio-dropdown {
407
+ background-color: hsla(210, 20%, 18%, 0.7) !important; /* Darker, slightly transparent */
408
+ border: 1px solid hsla(240, 60%, 70%, 0.4) !important; /* Subtle blue border */
409
+ border-radius: 15px !important;
410
+ padding: 12px 18px !important;
411
+ color: #E2E8F0 !important; /* Light text for input */
412
+ font-size: 1.1em !important;
413
+ transition: all 0.3s ease;
414
+ box-shadow: 0 4px 15px hsla(0, 0%, 0%, 0.2);
415
+ }
416
+ .gradio-file input[type="file"] {
417
+ color: #E2E8F0 !important;
418
+ }
419
+ .gradio-file:hover, .gradio-dropdown:hover {
420
+ border-color: #A78BFA !important; /* Lighter purple on hover */
421
+ box-shadow: 0 6px 20px hsla(0, 0%, 0%, 0.3);
422
+ }
423
+ /* Focus state for inputs */
424
+ .gradio-dropdown.gr-text-input:focus,
425
+ .gradio-file input:focus {
426
+ border-color: #8D5BFC !important; /* Vibrant purple on focus */
427
+ box-shadow: 0 0 20px hsla(260, 90%, 70%, 0.5);
428
+ background-color: hsla(210, 20%, 20%, 0.9) !important; /* Slightly less transparent */
429
+ }
430
+ /* Labels for inputs */
431
+ .gradio-label {
432
+ color: #A78BFA !important; /* Soft purple for labels */
433
+ font-weight: 600 !important;
434
+ font-size: 1.15em !important;
435
+ margin-bottom: 8px !important;
436
+ text-align: left;
437
+ width: 100%;
438
+ }
439
+ /* --- Submit Button --- */
440
+ .gradio-button {
441
+ background: linear-gradient(90deg, #FF6B8B, #FF8E53) !important; /* Vibrant Pink to Orange gradient */
442
+ color: white !important;
443
+ border: none !important;
444
+ border-radius: 30px !important;
445
+ padding: 15px 35px !important;
446
+ font-size: 1.3em !important;
447
+ font-weight: bold !important;
448
+ cursor: pointer !important;
449
+ transition: all 0.3s ease !important;
450
+ box-shadow: 0 8px 25px hsla(0, 0%, 0%, 0.4) !important;
451
+ margin-top: 35px !important;
452
+ min-width: 220px;
453
+ align-self: center;
454
+ text-transform: uppercase; /* Make button text uppercase */
455
+ letter-spacing: 1px;
456
+ }
457
+ .gradio-button:hover {
458
+ background: linear-gradient(90deg, #FF4B7B, #FF7E43) !important;
459
+ box-shadow: 0 10px 30px hsla(0, 0%, 0%, 0.5) !important;
460
+ transform: translateY(-3px) !important;
461
+ }
462
+ /* --- Output Video Player --- */
463
+ .gradio-video {
464
+ background-color: hsla(210, 20%, 15%, 0.8) !important; /* Darker, more opaque background for video */
465
+ border: 2px solid #8D5BFC !important; /* Vibrant purple border for the video player */
466
+ border-radius: 20px !important;
467
+ padding: 15px !important;
468
+ box-shadow: 0 10px 40px hsla(0, 0%, 0%, 0.5) !important; /* Stronger shadow */
469
+ margin-top: 40px !important;
470
+ }
471
+ /* --- Translated Text Output --- */
472
+ .gradio-markdown-output, .gradio-textbox {
473
+ background-color: hsla(210, 20%, 18%, 0.7) !important;
474
+ border: 1px solid hsla(240, 60%, 70%, 0.4) !important;
475
+ border-radius: 15px !important;
476
+ padding: 20px !important;
477
+ color: #E2E8F0 !important;
478
+ font-size: 1.0em !important;
479
+ min-height: 200px; /* Give it some height */
480
+ overflow-y: auto; /* Enable scrolling for long text */
481
+ white-space: pre-wrap; /* Preserve line breaks */
482
+ box-shadow: 0 4px 15px hsla(0, 0%, 0%, 0.2);
483
+ }
484
+ /* Flexbox for the Row to control spacing and alignment */
485
+ .gradio-row {
486
+ display: flex;
487
+ justify-content: space-around; /* Distribute items with space around */
488
+ align-items: flex-start; /* Align items to the start of the cross-axis */
489
+ gap: 20px; /* Space between items in the row */
490
+ flex-wrap: wrap; /* Allow items to wrap on smaller screens */
491
+ }
492
+ /* Ensure individual components in a row take up appropriate space */
493
+ .gradio-row > .gradio-component {
494
+ flex: 1; /* Allow components to grow and shrink */
495
+ min-width: 250px; /* Minimum width for components in a row */
496
+ }
497
+ /* Adjust padding for gr.Blocks content */
498
+ .gr-box {
499
+ padding: 0 !important; /* Remove internal padding if present to let elements breathe */
500
+ background: transparent !important;
501
+ box-shadow: none !important;
502
+ }
503
+ """
504
+ # Create Gradio interface with radio buttons for both language and voice selection
505
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(
506
+ primary_hue=gr.themes.Color(
507
+ c50='#e6e9ff', c100='#c2c9ff', c200='#9faaff', c300='#7c8bff', c400='#5a6bff',
508
+ c500='#384aff', c600='#2c38cc', c700='#202b99', c800='#141d66', c900='#080e33',
509
+ c950='#04071a'
510
+ ),
511
+ secondary_hue=gr.themes.Color(
512
+ c50='#fff0e6', c100='#ffe0cc', c200='#ffb380', c300='#ff8533', c400='#ff5700',
513
+ c500='#cc4600', c600='#993400', c700='#662200', c800='#331100', c900='#1a0900',
514
+ c950='#0d0500'
515
+ ),
516
+ neutral_hue=gr.themes.Color(
517
+ c50='#f8f8fa', c100='#f1f5f9', c200='#e2e8f0', c300='#cbd5e1', c400='#94a3b8',
518
+ c500='#64748b', c600='#475569', c700='#334155', c800='#1e293b', c900='#0f172a',
519
+ c950='#020617'
520
+ )
521
+ )) as demo:
522
+ gr.Markdown("# DeepDub : A Video Dubbing Application")
523
+ gr.Markdown("Upload a video and get a dubbed version with translated audio")
524
+
525
+
526
+ with gr.Row():
527
+ video_input = gr.File(label="Upload Video", file_types=[".mp4", ".mov", ".avi", ".mkv"])
528
+
529
+ # Use Radio buttons for language selection
530
+ language_radio = gr.Radio(
531
+ list(voice_options.keys()),
532
+ label="Target Language",
533
+ value="Hindi",
534
+ interactive=True
535
+ )
536
+
537
+ # Use Radio buttons for voice selection
538
+ voice_radio = gr.Radio(
539
+ voice_options["Hindi"],
540
+ label="Select Voice",
541
+ value=voice_options["Hindi"][0],
542
+ interactive=True
543
+ )
544
+ gr.Markdown("Note : If you see Queue that means someone is using and please wait")
545
+ output_video = gr.Video(label="Dubbed Video")
546
+ submit_btn = gr.Button("Start Dubbing")
547
+
548
+ def update_voice_options(language):
549
+ # Update voice radio buttons based on selected language
550
+ return gr.update(choices=voice_options[language], value=voice_options[language][0])
551
+
552
+ # Update voice options when language changes
553
+ language_radio.change(
554
+ update_voice_options,
555
+ inputs=[language_radio],
556
+ outputs=[voice_radio]
557
+ )
558
+
559
+ submit_btn.click(
560
+ gradio_interface,
561
+ inputs=[video_input, voice_radio, language_radio],
562
+ outputs=output_video,
563
+ api_name="dub_video"
564
+ )
565
+
566
+ demo.queue().launch(server_name="0.0.0.0", debug=True, share=True)