import gradio as gr from transformers import pipeline from diffusers import StableDiffusionPipeline import torch from PIL import Image, ImageDraw, ImageFont import scipy.io.wavfile from TTS.api import TTS from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips import os from groq import Groq from deepgram import Deepgram # Initialize Clients groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) deepgram_client = Deepgram(api_key=os.environ.get("DEEGRAM_API_KEY")) # Use DistilGPT-2 for text generation script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100) # Use Whisper for text-to-speech tts = TTS(model_name="whisper", progress_bar=False, gpu=False) # Use MusicLM for music generation music_generator = pipeline("text-to-audio", model="musiclm", device="cpu") # Use Stable Diffusion for image generation image_generator = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float32).to("cpu") # Generate Comedy Script using DistilGPT-2 def generate_comedy_script(prompt): script = script_generator(prompt)[0]['generated_text'] return script # Convert Text to Speech using Whisper def text_to_speech(script): output_audio = 'output.wav' tts.tts_to_file(text=script, file_path=output_audio) return output_audio # Create Images Using Stable Diffusion def create_images_from_script(script): lines = script.split('. ') image_paths = [] for i, line in enumerate(lines): img = image_generator(line).images[0] img_path = f'/tmp/image_{i}.png' img.save(img_path) image_paths.append(img_path) return image_paths # Generate Fun Music Track using MusicLM def generate_fun_music(prompt, output_music_file="fun_music.wav"): # Generate music based on the prompt using MusicLM response = music_generator(prompt) # Extract audio and sampling rate from the response audio_data = response["audio"] sampling_rate = response["sampling_rate"] # Save the generated music to a file scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data) return output_music_file # Create Video from Generated Images def generate_text_video(script): image_paths = create_images_from_script(script) clips = [] for img_path in image_paths: image_clip = ImageClip(img_path).set_duration(3).set_position(('center', 'center')) clips.append(image_clip) final_video = concatenate_videoclips(clips, method="compose") final_video.write_videofile("/tmp/final_video.mp4", fps=24) return "/tmp/final_video.mp4" # Combine Audio and Video def combine_audio_video(video_path, audio_path): video = VideoFileClip(video_path) audio = AudioFileClip(audio_path) final_video = video.set_audio(audio) final_video.write_videofile("/tmp/final_comedy_video.mp4", fps=24) return "/tmp/final_comedy_video.mp4" # Main Function to Generate Comedy Animation def generate_comedy_and_animation(prompt): script = generate_comedy_script(prompt) audio_file = text_to_speech(script) video_file = generate_text_video(script) fun_music = generate_fun_music(prompt) final_video = combine_audio_video(video_file, fun_music) return script, audio_file, final_video # Generate Kids Content def generate_kids_content(theme): music_file = generate_fun_music(theme, output_music_file="kids_music.wav") clips = [] for i in range(5): img = Image.new('RGB', (800, 400), color=(0, 0, 255)) d = ImageDraw.Draw(img) fnt = ImageFont.load_default() d.text((10, 180), f"Kids Music: {theme}", font=fnt, fill=(255, 255, 0)) frame_path = f'/tmp/kids_temp_{i}.png' img.save(frame_path) clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center'))) final_video = CompositeVideoClip(clips, size=(800, 400)) final_video = final_video.set_audio(AudioFileClip(music_file)) final_video.write_videofile("/tmp/kids_animation.mp4", fps=24) return music_file, "/tmp/kids_animation.mp4" # Gradio Interface with gr.Blocks() as app: gr.Markdown("## AI Comedy and Kids Content Generator") # Comedy Animation Tab with gr.Tab("Generate Comedy Animation"): prompt_input = gr.Textbox(label="Comedy Prompt") generate_btn = gr.Button("Generate Comedy Script and Animation") comedy_script = gr.Textbox(label="Generated Script") comedy_audio = gr.Audio(label="Generated Audio") comedy_video = gr.Video(label="Generated Animation") generate_btn.click( generate_comedy_and_animation, inputs=prompt_input, outputs=[comedy_script, comedy_audio, comedy_video] ) # Kids Music Animation Tab with gr.Tab("Generate Kids Music Animation"): theme_input = gr.Textbox(label="Kids Music Theme") generate_music_btn = gr.Button("Generate Kids Music and Animation") kids_music_audio = gr.Audio(label="Generated Music") kids_music_video = gr.Video(label="Generated Kids Animation") generate_music_btn.click( generate_kids_content, inputs=theme_input, outputs=[kids_music_audio, kids_music_video] ) app.launch()