Bagda commited on
Commit
f7d38d2
·
verified ·
1 Parent(s): 905dfdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -16
app.py CHANGED
@@ -1,23 +1,64 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
3
  import scipy
 
4
 
5
- # Suno Bark text-to-speech pipeline load करें
 
 
 
 
6
  bark_pipe = pipeline("text-to-speech", model="suno/bark")
7
 
8
- def bark_tts(text):
9
- result = bark_pipe(text)
10
- # WAV file return करें (Gradio को binary चाहिए)
11
- scipy.io.wavfile.write("output.wav", result["sampling_rate"], result["audio"])
12
- return "output.wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- iface = gr.Interface(
15
- fn=bark_tts,
16
- inputs=gr.Textbox(label="Enter text (Hindi/English)"),
17
- outputs=gr.Audio(type="filepath", label="Generated Speech"),
18
- title="Suno Bark Text-to-Speech Demo"
19
- )
 
 
 
 
 
 
 
 
20
 
21
- if __name__ == "__main__":
22
- iface.launch(server_name="0.0.0.0", server_port=7860)
23
-
 
1
  import gradio as gr
2
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
3
+ import librosa
4
  import scipy
5
+ import os
6
 
7
+ # Whisper-Small model setup
8
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small")
9
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
10
+
11
+ # Bark model setup
12
  bark_pipe = pipeline("text-to-speech", model="suno/bark")
13
 
14
+ def process_audio(video_file):
15
+ # Step 1: Extract audio from video (if video is uploaded)
16
+ # (Agar sirf audio hai, toh skip karein)
17
+ output_audio = "output_audio.wav"
18
+ video = gr.Video(video_file)
19
+ audio = video.audio
20
+ audio.write_audiofile(output_audio)
21
+ # Step 2: Speech-to-text
22
+ audio, sr = librosa.load(output_audio, sr=16000)
23
+ input_features = processor(audio, sampling_rate=sr, return_tensors="pt").input_features
24
+ predicted_ids = model.generate(input_features)
25
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
26
+ # Step 3: Text-to-speech
27
+ speech = bark_pipe(transcription)
28
+ output_file = "output_dubbed.wav"
29
+ scipy.io.wavfile.write(output_file, speech["sampling_rate"], speech["audio"])
30
+ # Step 4: Merge audio to video (temporary: agar video hai, toh audio replace karein)
31
+ # NOTE: Gradio ke current video component ke saath direct audio replace support nahi hai,
32
+ # toh hum sirf audio output file return karenge, jise user download kar sake
33
+ # Agar aapko video+audio merge karna hai, toh moviepy ka use karein, aur output video file return karein
34
+ # Yahan sirf audio output file return kar rahe hain
35
+ return transcription, output_file
36
+
37
+ # Moviepy se video+audio merge (optional, agar video chahiye)
38
+ def merge_audio_to_video(video_file, audio_file, output_video="output_dubbed.mp4"):
39
+ import moviepy.editor as mp
40
+ video = mp.VideoFileClip(video_file)
41
+ audio = mp.AudioFileClip(audio_file)
42
+ video = video.set_audio(audio)
43
+ video.write_videofile(output_video)
44
+ return output_video
45
+
46
+ # NOTE: Gradio Audio component sirf audio file upload karta hai, video file ke liye Gradio Video component use karein
47
+ # Lekin Gradio Video component output mein filepath return nahi karta, toh hum sirf audio file return karenge
48
 
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown("# Imagine: AI Video/Audio Dubbing")
51
+ with gr.Row():
52
+ file_in = gr.Video(label="Upload Video/Audio File")
53
+ btn = gr.Button("Generate Dubbed Audio")
54
+ transcription_out = gr.Textbox(label="Transcription")
55
+ audio_out = gr.Audio(label="Download Dubbed Audio", type="filepath")
56
+ btn.click(
57
+ fn=process_audio,
58
+ inputs=file_in,
59
+ outputs=[transcription_out, audio_out]
60
+ )
61
+ # Agar video output chahiye, toh yeh function use karein (optional, Gradio Video output ke liye thoda advanced code chahiye)
62
+ # Yahan sirf audio output hai
63
 
64
+ demo.launch(server_name="0.0.0.0", server_port=7860)