|
|
|
|
|
import gradio as gr |
|
|
|
|
|
def dub_video(video_url): |
|
|
|
|
|
|
|
|
|
|
|
return "Processed video path will be returned here (replace with actual function call)" |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=dub_video, |
|
|
inputs=gr.Textbox(label="Enter video URL"), |
|
|
outputs=gr.Video(label="Hindi Dubbed Video"), |
|
|
title="Video Dubbing AI (Hindi)", |
|
|
description="Enter a video URL to get it dubbed in Hindi." |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|
|
|
|
|
|
from pytube import YouTube |
|
|
|
|
|
video_url = "https://www.youtube.com/watch?v=YOUR_VIDEO_ID" |
|
|
yt = YouTube(video_url) |
|
|
stream = yt.streams.filter(only_audio=True).first() |
|
|
stream.download(filename="video_audio.mp4") |
|
|
|
|
|
from moviepy.editor import VideoFileClip |
|
|
|
|
|
video = VideoFileClip("video_audio.mp4") |
|
|
audio = video.audio |
|
|
audio.write_audiofile("output_audio.wav") |
|
|
|
|
|
|
|
|
|
|
|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
|
import torch |
|
|
from datasets import load_dataset |
|
|
|
|
|
|
|
|
processor = WhisperProcessor.from_pretrained("openai/whisper-small") |
|
|
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small") |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
model = model.to(device) |
|
|
|
|
|
|
|
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") |
|
|
sample = ds[0]["audio"] |
|
|
|
|
|
|
|
|
input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features |
|
|
input_features = input_features.to(device) |
|
|
|
|
|
|
|
|
predicted_ids = model.generate(input_features) |
|
|
|
|
|
|
|
|
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) |
|
|
print(transcription) |
|
|
|
|
|
|
|
|
|