File size: 4,024 Bytes
7ae35c9
5a834fc
ac8612f
 
 
0b60483
 
 
 
6125413
ac8612f
 
 
 
 
 
 
0b60483
 
 
aac5874
 
0b60483
859e5f2
ac8612f
0b60483
 
ac8612f
 
 
 
 
 
 
0b60483
 
 
 
 
 
 
 
ac8612f
0b60483
 
 
 
6125413
 
859e5f2
0b60483
 
 
ac8612f
0b60483
 
 
 
 
 
 
 
 
6125413
3638d85
 
0b60483
 
 
 
 
 
 
 
 
 
 
 
 
 
6125413
0b60483
 
 
 
 
 
859e5f2
0b60483
 
 
 
 
9bea5a2
3638d85
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from TTS.api import TTS
import numpy as np
from PIL import Image
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from torchvision.io import write_video
import os

# Initialize text generation model (GPT-2)
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

# Initialize TTS model
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")

# Initialize Stable Diffusion pipeline
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cpu")


def generate_text(prompt, max_length=200):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    attention_mask = torch.ones_like(input_ids)
    output = model.generate(input_ids, attention_mask=attention_mask, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id)
    return tokenizer.decode(output[0], skip_special_tokens=True)

def generate_speech(text):
    output_path = "generated_speech.wav"
    tts.tts_to_file(text=text, file_path=output_path)
    return output_path

def generate_video_frames(prompt, num_frames=30):
    frames = []
    for i in range(num_frames):
        # Add some variation to the prompt for each frame
        frame_prompt = f"{prompt}, frame {i+1} of {num_frames}"
        image = pipe(frame_prompt).images[0]
        frames.append(np.array(image))
    return frames

def create_video_from_frames(frames, output_path="output_video.mp4", fps=10):
    frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2)
    write_video(output_path, frames_tensor, fps=fps)
    return output_path

def generate_comedy_animation(prompt):
    script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
    video_prompt = f"A comedic scene with two characters: {prompt}"
    frames = generate_video_frames(video_prompt)
    video_path = create_video_from_frames(frames)
    speech_path = generate_speech(script)
    return script, video_path, speech_path

def generate_kids_music_animation(theme):
    lyrics = generate_text(f"Write short and simple lyrics for a children's song about {theme}. Each line should be on a new line. Don't include 'Verse' or 'Chorus' labels.")
    video_prompt = f"A colorful, animated music video for children about {theme}"
    frames = generate_video_frames(video_prompt)
    video_path = create_video_from_frames(frames)
    speech_path = generate_speech(lyrics)
    return lyrics, video_path, speech_path

# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("## AI-Generated Video and Audio Content")
    
    with gr.Tab("Comedy Animation"):
        comedy_prompt = gr.Textbox(label="Enter comedy prompt")
        comedy_generate_btn = gr.Button("Generate Comedy Animation")
        comedy_script = gr.Textbox(label="Generated Comedy Script")
        comedy_animation = gr.Video(label="Comedy Animation")
        comedy_audio = gr.Audio(label="Comedy Speech")

        comedy_generate_btn.click(
            generate_comedy_animation,
            inputs=comedy_prompt,
            outputs=[comedy_script, comedy_animation, comedy_audio]
        )
    
    with gr.Tab("Kids Music Animation"):
        music_theme = gr.Textbox(label="Enter music theme for kids")
        music_generate_btn = gr.Button("Generate Kids Music Animation")
        music_lyrics = gr.Textbox(label="Generated Lyrics")
        music_animation = gr.Video(label="Music Animation")
        music_audio = gr.Audio(label="Music Audio")

        music_generate_btn.click(
            generate_kids_music_animation,
            inputs=music_theme,
            outputs=[music_lyrics, music_animation, music_audio]
        )

app.launch()