Spaces:

Manasa1
/

AI_Comedy_Show

Runtime error

App Files Files Community

Manasa1 commited on Oct 26, 2024

Commit

c13a478

verified ·

1 Parent(s): 514dd74

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -153

app.py CHANGED Viewed

@@ -1,212 +1,246 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer
 from TTS.api import TTS
-import numpy as np
-from PIL import Image
 from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
 from torchvision.io import write_video
 import os
 import groq
 import logging
 from pathlib import Path
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Environment setup and validation
-if not (API_KEY := os.getenv("GROQ_API_KEY")):
-    raise ValueError("GROQ_API_KEY not found in environment variables")
-# Initialize clients and models with error handling
-try:
-    groq_client = groq.Groq(api_key=API_KEY)
-    # Initialize TTS model
-    tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
-    # Initialize Stable Diffusion with optimizations
-    pipe = StableDiffusionPipeline.from_pretrained(
-        "CompVis/stable-diffusion-v1-4",
-        torch_dtype=torch.float32
-    )
-    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-    pipe = pipe.to("cpu")
-    pipe.enable_attention_slicing()  # Memory optimization
-except Exception as e:
-    logger.error(f"Error initializing models: {str(e)}")
-    raise
-class ContentGenerator:
     def __init__(self):
         self.output_dir = Path("generated_content")
         self.output_dir.mkdir(exist_ok=True)
-    def cleanup_old_files(self):
-        """Clean up previously generated files"""
-        for file in self.output_dir.glob("*"):
-            try:
-                file.unlink()
-            except Exception as e:
-                logger.warning(f"Could not delete {file}: {e}")
-    def generate_text_with_groq(self, prompt, max_tokens=200):
-        """Generate text with error handling"""
-        try:
-            chat_completion = groq_client.chat.completions.create(
-                messages=[
-                    {
-                        "role": "system",
-                        "content": "You are a professional comedy writer skilled in creating short, witty scripts."
-                    },
-                    {
-                        "role": "user",
-                        "content": prompt
-                    }
-                ],
-                model="mixtral-8x7b-32768",
-                max_tokens=max_tokens,
-                temperature=0.7,
-            )
-            return chat_completion.choices[0].message.content
-        except Exception as e:
-            logger.error(f"Error generating text: {str(e)}")
-            raise
-    def generate_speech(self, text):
-        """Generate speech with unique filenames"""
-        try:
-            output_path = self.output_dir / f"speech_{hash(text)}.wav"
-            tts.tts_to_file(text=text, file_path=str(output_path))
-            return str(output_path)
-        except Exception as e:
-            logger.error(f"Error generating speech: {str(e)}")
-            raise
-    def generate_video_frames(self, prompt, num_frames=15):
-        """Generate video frames with progress tracking"""
         frames = []
-        try:
-            for i in range(num_frames):
-                frame_prompt = f"{prompt}, frame {i+1} of {num_frames}"
-                with torch.no_grad():
-                    image = pipe(
-                        frame_prompt,
-                        num_inference_steps=20,
-                        guidance_scale=7.5
-                    ).images[0]
-                frames.append(np.array(image))
-                logger.info(f"Generated frame {i+1}/{num_frames}")
-        except Exception as e:
-            logger.error(f"Error generating frames: {str(e)}")
-            raise
         return frames
-    def create_video_from_frames(self, frames, prompt):
-        """Create video with unique filenames"""
         try:
-            output_path = self.output_dir / f"video_{hash(prompt)}.mp4"
-            frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2)
-            write_video(str(output_path), frames_tensor, fps=8)
-            return str(output_path)
         except Exception as e:
-            logger.error(f"Error creating video: {str(e)}")
-            raise
     def generate_comedy_animation(self, prompt):
-        """Generate comedy animation with error handling"""
         try:
-            self.cleanup_old_files()
-            script_prompt = f"""Write a short, witty comedy script with two characters about {prompt}.
-            Use the format 'Character: Dialogue or Action' for each line.
-            Include clever wordplay, unexpected twists, and snappy dialogue.
-            Keep it concise, around 5-8 exchanges. Make it genuinely funny!"""
-            script = self.generate_text_with_groq(script_prompt)
-            video_prompt = f"A comedic scene with two characters: {prompt}"
-            frames = self.generate_video_frames(video_prompt)
-            video_path = self.create_video_from_frames(frames, video_prompt)
-            speech_path = self.generate_speech(script)
-            return script, video_path, speech_path
         except Exception as e:
             logger.error(f"Error in comedy animation generation: {str(e)}")
             return "Error generating content", None, None
     def generate_kids_music_animation(self, theme):
-        """Generate kids music animation with error handling"""
         try:
-            self.cleanup_old_files()
-            lyrics_prompt = f"""Write short, catchy, and simple lyrics for a children's song about {theme}.
-            Each line should be on a new line. Don't include 'Verse' or 'Chorus' labels.
-            Make it educational, fun, and easy to remember. Include a repeating chorus."""
-            lyrics = self.generate_text_with_groq(lyrics_prompt)
-            video_prompt = f"A colorful, animated music video for children about {theme}"
-            frames = self.generate_video_frames(video_prompt)
-            video_path = self.create_video_from_frames(frames, video_prompt)
-            speech_path = self.generate_speech(lyrics)
-            return lyrics, video_path, speech_path
         except Exception as e:
             logger.error(f"Error in kids music animation generation: {str(e)}")
             return "Error generating content", None, None
-# Initialize content generator
-generator = ContentGenerator()
 # Gradio Interface
-with gr.Blocks(theme='ysharma/steampunk') as app:
-    gr.Markdown("## AI-Generated Video and Audio Content")
-    # Status message for errors
-    status_msg = gr.Textbox(label="Status", visible=False)
-    with gr.Tab("Comedy Animation"):
-        comedy_prompt = gr.Textbox(label="Enter comedy prompt")
-        comedy_generate_btn = gr.Button("Generate Comedy Animation")
-        comedy_script = gr.Textbox(label="Generated Comedy Script")
-        comedy_animation = gr.Video(label="Comedy Animation")
-        comedy_audio = gr.Audio(label="Comedy Speech")
-        def comedy_wrapper(prompt):
-            status_msg.visible = True
-            try:
-                return generator.generate_comedy_animation(prompt)
-            except Exception as e:
-                status_msg.value = f"Error: {str(e)}"
-                return None, None, None
         comedy_generate_btn.click(
-            comedy_wrapper,
             inputs=comedy_prompt,
             outputs=[comedy_script, comedy_animation, comedy_audio]
         )
-    with gr.Tab("Kids Music Animation"):
-        music_theme = gr.Textbox(label="Enter music theme for kids")
-        music_generate_btn = gr.Button("Generate Kids Music Animation")
-        music_lyrics = gr.Textbox(label="Generated Lyrics")
-        music_animation = gr.Video(label="Music Animation")
-        music_audio = gr.Audio(label="Music Audio")
-        def music_wrapper(theme):
-            status_msg.visible = True
-            try:
-                return generator.generate_kids_music_animation(theme)
-            except Exception as e:
-                status_msg.value = f"Error: {str(e)}"
-                return None, None, None
         music_generate_btn.click(
-            music_wrapper,
             inputs=music_theme,
             outputs=[music_lyrics, music_animation, music_audio]
         )
 if __name__ == "__main__":
     app.launch()

 import gradio as gr
 import torch
+import numpy as np
+from PIL import Image, ImageDraw
 from transformers import AutoTokenizer
 from TTS.api import TTS
 from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
 from torchvision.io import write_video
 import os
 import groq
 import logging
 from pathlib import Path
+import cv2
+from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+class EnhancedContentGenerator:
     def __init__(self):
         self.output_dir = Path("generated_content")
         self.output_dir.mkdir(exist_ok=True)
+        # Initialize TTS with a more cartoon-appropriate voice
+        self.tts = TTS(model_name="tts_models/en/vctk/vits")
+        # Initialize Stable Diffusion with cartoon-specific model
+        self.pipe = StableDiffusionPipeline.from_pretrained(
+            "nitrosocke/Ghibli-Diffusion",  # Using anime/cartoon style model
+            torch_dtype=torch.float32
+        )
+        self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
+        self.pipe = self.pipe.to("cpu")
+        self.pipe.enable_attention_slicing()
+        # Initialize Groq client
+        if not (self.api_key := os.getenv("GROQ_API_KEY")):
+            raise ValueError("GROQ_API_KEY not found in environment variables")
+        self.groq_client = groq.Groq(api_key=self.api_key)
+    def generate_cartoon_frame(self, prompt, style="cartoon"):
+        """Generate a single cartoon frame with specified style"""
+        style_prompts = {
+            "cartoon": "in the style of a western cartoon, vibrant colors, simple shapes",
+            "anime": "in the style of Studio Ghibli anime, detailed backgrounds",
+            "kids": "in the style of a children's book illustration, cute and colorful"
+        }
+        enhanced_prompt = f"{prompt}, {style_prompts.get(style, style_prompts['cartoon'])}"
+        with torch.no_grad():
+            image = self.pipe(
+                enhanced_prompt,
+                num_inference_steps=30,
+                guidance_scale=7.5
+            ).images[0]
+        return np.array(image)
+    def add_cartoon_effects(self, frame):
+        """Add cartoon-style effects to a frame"""
+        # Convert to RGB if necessary
+        if len(frame.shape) == 2:
+            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
+        # Apply cartoon effect
+        gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+        gray = cv2.medianBlur(gray, 5)
+        edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
+        color = cv2.bilateralFilter(frame, 9, 300, 300)
+        # Combine edges with color
+        cartoon = cv2.bitwise_and(color, color, mask=edges)
+        return cartoon
+    def generate_video_sequence(self, script, style="cartoon", num_frames=24):
+        """Generate a sequence of frames based on the script"""
         frames = []
+        scenes = script.split('\n\n')  # Split script into scenes
+        frames_per_scene = max(num_frames // len(scenes), 4)
+        for scene in scenes:
+            if not scene.strip():
+                continue
+            # Generate base frame for the scene
+            scene_prompt = f"cartoon scene showing: {scene}"
+            base_frame = self.generate_cartoon_frame(scene_prompt, style)
+            # Generate slight variations for animation
+            for i in range(frames_per_scene):
+                frame = base_frame.copy()
+                frame = self.add_cartoon_effects(frame)
+                frames.append(frame)
         return frames
+    def enhance_audio(self, audio_path, style="cartoon"):
+        """Add effects to the audio based on style"""
         try:
+            audio = AudioFileClip(audio_path)
+            if style == "cartoon":
+                # Speed up slightly for cartoon effect
+                audio = audio.speedx(1.1)
+            elif style == "kids":
+                # Add echo effect for kids music
+                echo = audio.set_start(0.1)
+                audio = CompositeVideoClip([audio, echo.volumex(0.3)])
+            enhanced_path = audio_path.replace('.wav', '_enhanced.wav')
+            audio.write_audiofile(enhanced_path)
+            return enhanced_path
         except Exception as e:
+            logger.error(f"Error enhancing audio: {str(e)}")
+            return audio_path
     def generate_comedy_animation(self, prompt):
+        """Generate enhanced comedy animation"""
         try:
+            # Generate a more structured comedy script
+            script_prompt = f"""Write a funny cartoon script about {prompt}.
+            Include:
+            - Two distinct character voices
+            - Physical comedy moments
+            - Sound effects in [brackets]
+            - Scene descriptions in (parentheses)
+            Keep it family-friendly and around 3-4 scenes."""
+            script = self.groq_client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are a professional cartoon comedy writer."},
+                    {"role": "user", "content": script_prompt}
+                ],
+                model="mixtral-8x7b-32768",
+                temperature=0.7
+            ).choices[0].message.content
+            # Generate frames with cartoon style
+            frames = self.generate_video_sequence(script, style="cartoon")
+            # Generate and enhance audio
+            speech_path = str(self.output_dir / f"speech_{hash(script)}.wav")
+            self.tts.tts_to_file(text=script, file_path=speech_path)
+            enhanced_speech = self.enhance_audio(speech_path, "cartoon")
+            # Create video with enhanced frames
+            video_path = str(self.output_dir / f"video_{hash(prompt)}.mp4")
+            frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2)
+            write_video(video_path, frames_tensor, fps=12)  # Higher FPS for smoother animation
+            return script, video_path, enhanced_speech
         except Exception as e:
             logger.error(f"Error in comedy animation generation: {str(e)}")
             return "Error generating content", None, None
     def generate_kids_music_animation(self, theme):
+        """Generate enhanced kids music animation"""
         try:
+            # Generate kid-friendly lyrics with music directions
+            lyrics_prompt = f"""Write lyrics for a children's educational song about {theme}.
+            Include:
+            - Simple, repetitive chorus
+            - Educational facts
+            - [Music notes] for melody changes
+            - (Action descriptions) for animation
+            Make it upbeat and memorable!"""
+            lyrics = self.groq_client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are a children's music composer."},
+                    {"role": "user", "content": lyrics_prompt}
+                ],
+                model="mixtral-8x7b-32768",
+                temperature=0.7
+            ).choices[0].message.content
+            # Generate frames with kids' style
+            frames = self.generate_video_sequence(lyrics, style="kids", num_frames=36)
+            # Generate and enhance audio
+            speech_path = str(self.output_dir / f"music_{hash(lyrics)}.wav")
+            self.tts.tts_to_file(text=lyrics, file_path=speech_path)
+            enhanced_speech = self.enhance_audio(speech_path, "kids")
+            # Create video with enhanced frames
+            video_path = str(self.output_dir / f"video_{hash(theme)}.mp4")
+            frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2)
+            write_video(video_path, frames_tensor, fps=15)  # Smooth animation for kids
+            return lyrics, video_path, enhanced_speech
         except Exception as e:
             logger.error(f"Error in kids music animation generation: {str(e)}")
             return "Error generating content", None, None
 # Gradio Interface
+def create_interface():
+    generator = EnhancedContentGenerator()
+    with gr.Blocks(theme='ysharma/steampunk') as app:
+        gr.Markdown("# AI Cartoon Generator")
+        gr.Markdown("Generate cartoon comedy clips and kids music videos!")
+        with gr.Tab("Cartoon Comedy"):
+            comedy_prompt = gr.Textbox(
+                label="What should the cartoon be about?",
+                placeholder="E.g., 'a penguin learning to fly'"
+            )
+            comedy_generate_btn = gr.Button("Generate Cartoon Comedy", variant="primary")
+            comedy_script = gr.Textbox(label="Generated Script")
+            comedy_animation = gr.Video(label="Cartoon Animation")
+            comedy_audio = gr.Audio(label="Cartoon Audio")
+        with gr.Tab("Kids Music Video"):
+            music_theme = gr.Textbox(
+                label="What should the song teach about?",
+                placeholder="E.g., 'the water cycle'"
+            )
+            music_generate_btn = gr.Button("Generate Music Video", variant="primary")
+            music_lyrics = gr.Textbox(label="Song Lyrics")
+            music_animation = gr.Video(label="Music Video")
+            music_audio = gr.Audio(label="Song Audio")
+        # Event handlers
         comedy_generate_btn.click(
+            generator.generate_comedy_animation,
             inputs=comedy_prompt,
             outputs=[comedy_script, comedy_animation, comedy_audio]
         )
         music_generate_btn.click(
+            generator.generate_kids_music_animation,
             inputs=music_theme,
             outputs=[music_lyrics, music_animation, music_audio]
         )
+    return app
 if __name__ == "__main__":
+    app = create_interface()
     app.launch()