|
|
|
import gradio as gr |
|
|
|
def dub_video(video_url): |
|
|
|
|
|
|
|
return "Processed video path will be returned here (replace with actual function call)" |
|
|
|
demo = gr.Interface( |
|
fn=dub_video, |
|
inputs=gr.Textbox(label="Enter video URL"), |
|
outputs=gr.Video(label="Hindi Dubbed Video"), |
|
title="Video Dubbing AI (Hindi)", |
|
description="Enter a video URL to get it dubbed in Hindi." |
|
) |
|
|
|
demo.launch() |
|
|
|
|
|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
import torch |
|
from datasets import load_dataset |
|
|
|
|
|
processor = WhisperProcessor.from_pretrained("openai/whisper-small") |
|
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small") |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model = model.to(device) |
|
|
|
|
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") |
|
sample = ds[0]["audio"] |
|
|
|
|
|
input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features |
|
input_features = input_features.to(device) |
|
|
|
|
|
predicted_ids = model.generate(input_features) |
|
|
|
|
|
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) |
|
print(transcription) |
|
|
|
|