import gradio as gr import torch from transformers import AutoTokenizer from TTS.api import TTS import numpy as np from PIL import Image from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler from torchvision.io import write_video import os import groq # Initialize Groq client groq_client = groq.Groq() API_KEY = os.getenv("GROQ_API_KEY") groq_client.api_key = API_KEY # Initialize TTS model tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC") # Initialize Stable Diffusion pipeline for CPU pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float32) pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) pipe = pipe.to("cpu") def generate_text_with_groq(prompt, max_tokens=200): chat_completion = groq_client.chat.completions.create( messages=[ { "role": "system", "content": "You are a professional comedy writer skilled in creating short, witty scripts." }, { "role": "user", "content": prompt } ], model="mixtral-8x7b-32768", max_tokens=max_tokens, temperature=0.7, ) return chat_completion.choices[0].message.content def generate_speech(text): output_path = "generated_speech.wav" tts.tts_to_file(text=text, file_path=output_path) return output_path def generate_video_frames(prompt, num_frames=10): frames = [] for i in range(num_frames): frame_prompt = f"{prompt}, frame {i+1} of {num_frames}" with torch.no_grad(): image = pipe(frame_prompt, num_inference_steps=20).images[0] frames.append(np.array(image)) return frames def create_video_from_frames(frames, output_path="output_video.mp4", fps=5): frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2) write_video(output_path, frames_tensor, fps=fps) return output_path def generate_comedy_animation(prompt): script_prompt = f"""Write a short, witty comedy script with two characters about {prompt}. Use the format 'Character: Dialogue or Action' for each line. Include clever wordplay, unexpected twists, and snappy dialogue. Keep it concise, around 5-8 exchanges. Make it genuinely funny!""" script = generate_text_with_groq(script_prompt) video_prompt = f"A comedic scene with two characters: {prompt}" frames = generate_video_frames(video_prompt) video_path = create_video_from_frames(frames) speech_path = generate_speech(script) return script, video_path, speech_path def generate_kids_music_animation(theme): lyrics_prompt = f"""Write short, catchy, and simple lyrics for a children's song about {theme}. Each line should be on a new line. Don't include 'Verse' or 'Chorus' labels. Make it educational, fun, and easy to remember. Include a repeating chorus.""" lyrics = generate_text_with_groq(lyrics_prompt) video_prompt = f"A colorful, animated music video for children about {theme}" frames = generate_video_frames(video_prompt) video_path = create_video_from_frames(frames) speech_path = generate_speech(lyrics) return lyrics, video_path, speech_path # Gradio Interface with gr.Blocks() as app: gr.Markdown("## AI-Generated Video and Audio Content (Optimized CPU Version with Groq API)") with gr.Tab("Comedy Animation"): comedy_prompt = gr.Textbox(label="Enter comedy prompt") comedy_generate_btn = gr.Button("Generate Comedy Animation") comedy_script = gr.Textbox(label="Generated Comedy Script") comedy_animation = gr.Video(label="Comedy Animation") comedy_audio = gr.Audio(label="Comedy Speech") comedy_generate_btn.click( generate_comedy_animation, inputs=comedy_prompt, outputs=[comedy_script, comedy_animation, comedy_audio] ) with gr.Tab("Kids Music Animation"): music_theme = gr.Textbox(label="Enter music theme for kids") music_generate_btn = gr.Button("Generate Kids Music Animation") music_lyrics = gr.Textbox(label="Generated Lyrics") music_animation = gr.Video(label="Music Animation") music_audio = gr.Audio(label="Music Audio") music_generate_btn.click( generate_kids_music_animation, inputs=music_theme, outputs=[music_lyrics, music_animation, music_audio] ) app.launch()