File size: 1,478 Bytes
b5f2882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from huggingface_hub import snapshot_download
import gradio as gr
import subprocess
import os
import uuid

# Download SadTalker model from Hugging Face (one-time setup)
def setup_models():
    if not os.path.exists("checkpoints"):
        snapshot_download(repo_id="OpenTalker/SadTalker", local_dir="checkpoints")

setup_models()

# Main function to generate video
def generate(text, image):
    session = str(uuid.uuid4())[:8]
    os.makedirs(f"results/{session}", exist_ok=True)

    # Save uploaded image
    image_path = f"results/{session}/avatar.jpg"
    image.save(image_path)

    # Generate audio using Coqui TTS
    audio_path = f"results/{session}/audio.wav"
    tts_cmd = f'tts --text "{text}" --out_path {audio_path}'
    subprocess.run(tts_cmd, shell=True)

    # Run SadTalker
    video_cmd = f'python inference.py --driven_audio {audio_path} --source_image {image_path} --result_dir results/{session}'
    subprocess.run(video_cmd, shell=True)

    return f"results/{session}/video.mp4"

# Gradio interface
gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(label="πŸ“ Script", placeholder="Enter your script here..."),
        gr.Image(label="πŸ–ΌοΈ Avatar Image", type="pil")
    ],
    outputs=gr.Video(label="🎬 Generated Video"),
    title="πŸ†“ Faceless Video Generator",
    description="Upload a face photo + script, and get a talking avatar video powered by SadTalker + Coqui TTS."
).launch()