File size: 4,526 Bytes
7ae35c9
5a834fc
d5bd16f
ac8612f
 
0b60483
 
 
 
d5bd16f
6125413
d5bd16f
 
c897f2c
d5bd16f
ac8612f
 
 
 
3b97054
 
0b60483
aac5874
 
d5bd16f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac8612f
 
 
 
 
 
3b97054
0b60483
 
 
3b97054
 
0b60483
 
ac8612f
3b97054
0b60483
 
 
6125413
 
d5bd16f
 
 
 
 
 
0b60483
 
 
ac8612f
0b60483
 
 
d5bd16f
 
 
 
 
0b60483
 
 
 
 
6125413
3638d85
 
d5bd16f
0b60483
 
 
 
 
 
 
 
 
 
 
 
 
6125413
0b60483
 
 
 
 
 
859e5f2
0b60483
 
 
 
 
9bea5a2
3638d85
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
import torch
from transformers import AutoTokenizer
from TTS.api import TTS
import numpy as np
from PIL import Image
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from torchvision.io import write_video
import os
import groq

# Initialize Groq client
groq_client = groq.Groq()
API_KEY = os.getenv("GROQ_API_KEY")  
groq_client.api_key = API_KEY

# Initialize TTS model
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")

# Initialize Stable Diffusion pipeline for CPU
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float32)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cpu")

def generate_text_with_groq(prompt, max_tokens=200):
    chat_completion = groq_client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are a professional comedy writer skilled in creating short, witty scripts."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        model="mixtral-8x7b-32768",
        max_tokens=max_tokens,
        temperature=0.7,
    )
    return chat_completion.choices[0].message.content

def generate_speech(text):
    output_path = "generated_speech.wav"
    tts.tts_to_file(text=text, file_path=output_path)
    return output_path

def generate_video_frames(prompt, num_frames=10):
    frames = []
    for i in range(num_frames):
        frame_prompt = f"{prompt}, frame {i+1} of {num_frames}"
        with torch.no_grad():
            image = pipe(frame_prompt, num_inference_steps=20).images[0]
        frames.append(np.array(image))
    return frames

def create_video_from_frames(frames, output_path="output_video.mp4", fps=5):
    frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2)
    write_video(output_path, frames_tensor, fps=fps)
    return output_path

def generate_comedy_animation(prompt):
    script_prompt = f"""Write a short, witty comedy script with two characters about {prompt}. 
    Use the format 'Character: Dialogue or Action' for each line. 
    Include clever wordplay, unexpected twists, and snappy dialogue. 
    Keep it concise, around 5-8 exchanges. Make it genuinely funny!"""
    
    script = generate_text_with_groq(script_prompt)
    video_prompt = f"A comedic scene with two characters: {prompt}"
    frames = generate_video_frames(video_prompt)
    video_path = create_video_from_frames(frames)
    speech_path = generate_speech(script)
    return script, video_path, speech_path

def generate_kids_music_animation(theme):
    lyrics_prompt = f"""Write short, catchy, and simple lyrics for a children's song about {theme}. 
    Each line should be on a new line. Don't include 'Verse' or 'Chorus' labels. 
    Make it educational, fun, and easy to remember. Include a repeating chorus."""
    
    lyrics = generate_text_with_groq(lyrics_prompt)
    video_prompt = f"A colorful, animated music video for children about {theme}"
    frames = generate_video_frames(video_prompt)
    video_path = create_video_from_frames(frames)
    speech_path = generate_speech(lyrics)
    return lyrics, video_path, speech_path

# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("## AI-Generated Video and Audio Content (Optimized CPU Version with Groq API)")
    
    with gr.Tab("Comedy Animation"):
        comedy_prompt = gr.Textbox(label="Enter comedy prompt")
        comedy_generate_btn = gr.Button("Generate Comedy Animation")
        comedy_script = gr.Textbox(label="Generated Comedy Script")
        comedy_animation = gr.Video(label="Comedy Animation")
        comedy_audio = gr.Audio(label="Comedy Speech")

        comedy_generate_btn.click(
            generate_comedy_animation,
            inputs=comedy_prompt,
            outputs=[comedy_script, comedy_animation, comedy_audio]
        )
    
    with gr.Tab("Kids Music Animation"):
        music_theme = gr.Textbox(label="Enter music theme for kids")
        music_generate_btn = gr.Button("Generate Kids Music Animation")
        music_lyrics = gr.Textbox(label="Generated Lyrics")
        music_animation = gr.Video(label="Music Animation")
        music_audio = gr.Audio(label="Music Audio")

        music_generate_btn.click(
            generate_kids_music_animation,
            inputs=music_theme,
            outputs=[music_lyrics, music_animation, music_audio]
        )

app.launch()