import gradio as gr from transformers import pipeline from diffusers import StableDiffusionPipeline import torch from PIL import Image, ImageDraw, ImageFont import scipy.io.wavfile from TTS.api import TTS # Coqui TTS (open source) from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips import os import subprocess from moviepy.editor import VideoFileClip, AudioFileClip # Initialize Clients # Replace with your actual API keys or methods of getting them # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # deepgram_client = Deepgram(api_key=os.environ.get("DEEGRAM_API_KEY")) # Use DistilGPT-2 for text generation script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100) # Use Coqui TTS for text-to-speech tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) # Use MusicGen for music generation music_generator = pipeline("text-to-audio", model="facebook/musicgen-small", device="cpu") # Use Fluently Anime (Stable Diffusion) for anime image generation model_id = "fluently/Fluently-anime" anime_image_generator = StableDiffusionPipeline.from_pretrained(model_id).to("cpu") # Generate Comedy Script using DistilGPT-2 def generate_comedy_script(prompt): script = script_generator(prompt)[0]['generated_text'] return script # Convert Text to Speech using Coqui TTS def text_to_speech(script): output_audio = 'output.wav' tts.tts_to_file(text=script, file_path=output_audio) return output_audio # Create Anime Images Using Fluently Anime def create_images_from_script(script): lines = script.split('. ') image_paths = [] for i, line in enumerate(lines): img = anime_image_generator(line).images[0] img_path = f'/tmp/anime_image_{i}.png' img.save(img_path) image_paths.append(img_path) return image_paths # Generate Fun Music Track using MusicGen def generate_fun_music(prompt, output_music_file="fun_music.wav"): response = music_generator(prompt) audio_data = response["audio"] sampling_rate = response["sampling_rate"] scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data) return output_music_file # Create Video from Generated Anime Images Using FFmpeg def generate_text_video(script): image_paths = create_images_from_script(script) # Generate video using moviepy from the sequence of images video_clip = ImageSequenceClip(image_paths, fps=24) video_path = "/tmp/final_video.mp4" video_clip.write_videofile(video_path, codec='libx264') return video_path def combine_audio_video(video_file, audio_file): # Load the video file video = VideoFileClip(video_file) # Load the audio file audio = AudioFileClip(audio_file) # Set the audio of the video clip final_video = video.set_audio(audio) # Return the final video clip return final_video # Main Function to Generate Comedy Animation def generate_comedy_and_animation(prompt): script = generate_comedy_script(prompt) audio_file = text_to_speech(script) video_file = generate_text_video(script) fun_music = generate_fun_music(prompt) final_video = combine_audio_video(video_file, fun_music) return script, audio_file, final_video # Generate Kids Content def generate_kids_content(theme): music_file = generate_fun_music(theme, output_music_file="kids_music.wav") clips = [] for i in range(5): img = Image.new('RGB', (800, 400), color=(0, 0, 255)) d = ImageDraw.Draw(img) fnt = ImageFont.load_default() d.text((10, 180), f"Kids Music: {theme}", font=fnt, fill=(255, 255, 0)) frame_path = f'/tmp/kids_temp_{i}.png' img.save(frame_path) clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center'))) final_video = concatenate_videoclips(clips, method="compose").set_audio(AudioFileClip(music_file)) final_video.write_videofile("/tmp/kids_animation.mp4", fps=24) return music_file, "/tmp/kids_animation.mp4" # Gradio Interface with gr.Blocks() as app: gr.Markdown("## AI Comedy and Kids Content Generator") # Comedy Animation Tab with gr.Tab("Generate Comedy Animation"): prompt_input = gr.Textbox(label="Comedy Prompt") generate_btn = gr.Button("Generate Comedy Script and Animation") comedy_script = gr.Textbox(label="Generated Script") comedy_audio = gr.Audio(label="Generated Audio") comedy_video = gr.Video(label="Generated Animation") generate_btn.click( generate_comedy_and_animation, inputs=prompt_input, outputs=[comedy_script, comedy_audio, comedy_video] ) # Kids Music Animation Tab with gr.Tab("Generate Kids Music Animation"): theme_input = gr.Textbox(label="Kids Music Theme") generate_music_btn = gr.Button("Generate Kids Music and Animation") kids_music_audio = gr.Audio(label="Generated Music") kids_music_video = gr.Video(label="Generated Kids Animation") generate_music_btn.click( generate_kids_content, inputs=theme_input, outputs=[kids_music_audio, kids_music_video] ) app.launch()