IZERE HIRWA Roger commited on
Commit
3184c99
·
1 Parent(s): b1b4f3e
Files changed (3) hide show
  1. app.py +50 -0
  2. requirements.txt +9 -0
  3. setup.sh +2 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import subprocess
4
+ from gtts import gTTS
5
+ from pydub import AudioSegment
6
+
7
+ def generate_video(image, text):
8
+ # Save input image
9
+ image_path = "input.jpg"
10
+ image.save(image_path)
11
+
12
+ # Create 5 sec mute video from image
13
+ cmd = [
14
+ "ffmpeg", "-y", "-loop", "1", "-i", image_path,
15
+ "-t", "5", "-vf", "fps=25,scale=512:512",
16
+ "-c:v", "libx264", "-pix_fmt", "yuv420p", "input.mp4"
17
+ ]
18
+ subprocess.run(cmd, check=True)
19
+
20
+ # Generate audio from text using gTTS
21
+ tts = gTTS(text, lang="en")
22
+ tts.save("tts.mp3")
23
+ AudioSegment.from_mp3("tts.mp3").export("tts.wav", format="wav")
24
+
25
+ # Run Wav2Lip inference
26
+ cmd = [
27
+ "python", "inference.py",
28
+ "--checkpoint_path", "checkpoints/wav2lip.pth",
29
+ "--face", "input.mp4",
30
+ "--audio", "tts.wav",
31
+ "--outfile", "output.mp4"
32
+ ]
33
+ subprocess.run(cmd, check=True)
34
+
35
+ return "output.mp4"
36
+
37
+ # Build Gradio interface
38
+ iface = gr.Interface(
39
+ fn=generate_video,
40
+ inputs=[
41
+ gr.Image(type="pil", label="Upload Cartoon Image"),
42
+ gr.Textbox(lines=2, placeholder="Enter text for speech...", label="Text to Speak")
43
+ ],
44
+ outputs=gr.Video(label="Talking Cartoon Video"),
45
+ title="Cartoon Talking Video Generator",
46
+ description="Upload a cartoon image and enter text to generate a talking video using Wav2Lip."
47
+ )
48
+
49
+ if __name__ == "__main__":
50
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ gTTS
3
+ pydub
4
+ ffmpeg-python
5
+ opencv-python-headless
6
+ numpy==1.21.6
7
+ scipy==1.7.3
8
+ librosa==0.8.1
9
+ torch
setup.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ mkdir -p checkpoints
2
+ wget -c https://huggingface.co/numz/wav2lip_studio/resolve/main/Wav2lip/wav2lip.pth -O checkpoints/wav2lip.pth