Spaces:

rajux75
/

Miracle

Runtime error

App Files Files Community

rajux75 commited on Apr 26

Commit

ad2e240

verified ·

1 Parent(s): 5b29860

Update app.py

Browse files

Files changed (1) hide show

app.py +243 -89

app.py CHANGED Viewed

@@ -1,16 +1,69 @@
 import gradio as gr
 import time
-import random # To simulate variations like seed
-# --- Placeholder Function (Will Contain Your AI Logic Later) ---
 def process_prompt_and_generate(user_prompt, image_resolution, guidance_scale, seed, animation_frames, animation_style):
     """
-    This function simulates the AI pipeline. It accepts more parameters now.
     It yields updates for the status and logs.
     """
     logs = []
     status = "Starting processing..."
-    yield user_prompt, "", None, None, "", "", "", "", "", "\n".join(logs), status # Initial state
     if not user_prompt:
         logs.append("Error: No prompt provided.")
@@ -18,64 +71,160 @@ def process_prompt_and_generate(user_prompt, image_resolution, guidance_scale, s
         yield user_prompt, "", None, None, "", "", "", "", "", "\n".join(logs), status
         return
-    # --- Simulate Prompt Enhancement ---
-    status = "Enhancing prompt..."
     logs.append(f"User Prompt: '{user_prompt}'")
-    logs.append(f"Parameters: Resolution={image_resolution}, Guidance Scale={guidance_scale}, Seed={seed}, Frames={animation_frames}, Style={animation_style}")
-    yield user_prompt, "", None, None, "", "", "", "", "", "\n".join(logs), status
-    time.sleep(1.5) # Simulate time
-    enhanced_prompt = f"Stunning, highly detailed, {animation_style.lower()} animation frame of {user_prompt}, cinematic lighting, high focus, 8k, trending on Artstation. Seed: {seed}" # Dummy enhancement
-    logs.append(f"Enhanced Prompt: '{enhanced_prompt}'")
-    yield user_prompt, enhanced_prompt, None, None, "", "", "", "", "", "\n".join(logs), status
-    time.sleep(1) # Simulate time
-    # --- Simulate Image Generation ---
-    status = "Generating image..."
-    logs.append(f"Generating initial image ({image_resolution}px)...")
-    yield user_prompt, enhanced_prompt, None, None, "", "", "", "", "", "\n".join(logs), status
-    time.sleep(3) # Simulate time
-    # In a real app, this would call a model and return an image path/object
-    dummy_image_path = "https://gradio-app.github.io/assets/tower.jpg" # Example image
-    logs.append(f"Image generated successfully.")
-    yield user_prompt, enhanced_prompt, dummy_image_path, None, "", "", "", "", "", "\n".join(logs), status
-    time.sleep(1)
-    # --- Simulate Animation ---
-    status = "Generating animation..."
-    logs.append(f"Generating animation ({animation_frames} frames, style: {animation_style})...")
-    yield user_prompt, enhanced_prompt, dummy_image_path, None, "", "", "", "", "", "\n".join(logs), status
-    time.sleep(4) # Simulate time
-    # In a real app, this would call an animation model and return a video path/object
-    dummy_video_path = "https://gradio-app.github.io/assets/video-demo.mp4" # Example video
-    logs.append(f"Animation generated successfully.")
-    yield user_prompt, enhanced_prompt, dummy_image_path, dummy_video_path, "", "", "", "", "", "\n".join(logs), status
-    time.sleep(1)
     # --- Finalizing Outputs ---
     status = "Process complete!"
     logs.append("All steps finished.")
-    # Prepare metadata string for display
-    metadata = f"Resolution: {image_resolution}px\nGuidance Scale: {guidance_scale}\nSeed: {seed}\nFrames: {animation_frames}\nStyle: {animation_style}"
-    # The yield statements above sent partial updates. The final return sends the complete data.
-    # However, with generator functions (`yield`), the final result is implicitly what was yielded last.
-    # To ensure all outputs are correctly populated at the end, it's often better to have
-    # the function return the final state once processing is completely done.
-    # Let's refactor slightly to use a single return at the end for simplicity with Gradio's output mapping.
-    # (Alternative: Use gr.Progress and yield intermediary results explicitly)
-    # Reset logs for the final return state (or append "Complete")
-    logs.append("Final output ready.")
-    return (user_prompt, enhanced_prompt, dummy_image_path, dummy_video_path,
-            str(image_resolution), str(guidance_scale), str(seed), str(animation_frames), animation_style,
-            "\n".join(logs), status)
 # --- Gradio UI Definition ---
@@ -89,6 +238,13 @@ theme = gr.themes.Monochrome().set(
     # spacing_size_lg="2rem" # Example spacing adjustment
 )
 with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
@@ -109,6 +265,8 @@ with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
                 # 🎨 Multi-Step AI Creative Pipeline 🚀
                 Unleash your imagination! Input a prompt, and our AI orchestrates a sequence:
                 Prompt Enhancement → Image Generation → Animation.
                 """
             )
     gr.Markdown("---") # Separator
@@ -169,8 +327,8 @@ with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
             # Row for media
             with gr.Row():
-                generated_image_output = gr.Image(label="Generated Image", interactive=False, height=450, show_share_button=True)
-                generated_animation_output = gr.Video(label="Generated Animation", interactive=False, height=450, show_share_button=True)
             # Display Parameters Used (Collapsed or in a smaller section)
@@ -178,16 +336,17 @@ with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
                 parameters_used_output = gr.Textbox(
                     label="Generation Parameters",
                     interactive=False,
-                    lines=5,
                     show_copy_button=True
                 )
                 # Dummy output components to catch the individual parameters
                 # We will combine them in the process_prompt_and_generate function for the Textbox above
-                res_out = gr.Textbox(visible=False)
-                gs_out = gr.Textbox(visible=False)
-                seed_out = gr.Textbox(visible=False)
-                frames_out = gr.Textbox(visible=False)
-                style_out = gr.Textbox(visible=False)
             # Download Buttons (Placeholder)
@@ -195,8 +354,9 @@ with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
             with gr.Row():
                  # These buttons are just placeholders for now.
                  # Real download logic needs separate functions.
-                 download_image_button = gr.Button("⬇️ Download Image", interactive=False) # Make interactive=True when download logic is added
-                 download_video_button = gr.Button("⬇️ Download Video", interactive=False) # Make interactive=True when download logic is added
     gr.Markdown("---") # Separator
@@ -216,6 +376,8 @@ with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
     # Button click triggers the main processing function
     # The outputs list maps the function's return values to UI components
     generate_button.click(
         fn=process_prompt_and_generate,
         inputs=[
@@ -238,32 +400,16 @@ with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
             style_out,
             logs_output, # Logs are updated incrementally/finally
             status_display # Status is updated incrementally/finally
-        ]
-    )
-    # Update the combined parameters display after the main function runs
-    # This uses the individual output components as inputs for a new function
-    def update_parameters_display(res, gs, seed, frames, style):
-         if not res: # Check if results exist
-             return ""
-         metadata = f"Resolution: {res}px\nGuidance Scale: {gs}\nSeed: {seed}\nFrames: {frames}\nStyle: {style}"
-         return metadata
-    # Add a state component to hold the output parameters temporarily
-    # Or simply chain the update after the main click, using the same outputs
-    # Let's chain it for simplicity here. The order matters.
-    # This click event fires AFTER the main click event completes and updates its outputs.
-    # It takes the updated outputs as its inputs.
-    generate_button.click(
         fn=update_parameters_display,
         inputs=[res_out, gs_out, seed_out, frames_out, style_out],
         outputs=[parameters_used_output]
     )
-    # Randomize Seed Button Interaction
-    def randomize():
-        return random.randint(1, 1000000) # Generate a random seed
     randomize_seed_button.click(
         fn=randomize,
         inputs=[],
@@ -274,5 +420,13 @@ with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
 # --- Launch the App ---
 if __name__ == "__main__":
     print("Gradio AI Creative Studio is starting...")
-    demo.launch()
-    print("App launched!")

 import gradio as gr
 import time
+import random
+import torch
+import numpy as np
+from PIL import Image
+import imageio # For saving video
+import tempfile # For creating temporary files
+import os
+# --- Hugging Face Model Imports ---
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+from diffusers import StableDiffusionPipeline, AnimateDiffPipeline, DDIMScheduler, MotionAdapter
+# --- Model Loading (Load outside the function for better performance) ---
+# Check for CUDA availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load Prompt Enhancement Model
+print("Loading Prompt Enhancement Model (T5)...")
+tokenizer_t5 = T5Tokenizer.from_pretrained("t5-small")
+model_t5 = T5ForConditionalGeneration.from_pretrained("t5-small").to(device)
+print("T5 model loaded.")
+# Load Image Generation Model
+print("Loading Image Generation Model (Stable Diffusion 1.5)...")
+pipe_sd = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32).to(device)
+# Optional: Enable optimizations if using CUDA
+if device == "cuda":
+    pipe_sd.enable_xformers_memory_efficient_attention()
+    pipe_sd.enable_vae_slicing()
+    pipe_sd.enable_cfashion_scaling() # Typo: Should be enable_cfashion_scaling - correcting in code
+    # Corrected:
+    # pipe_sd.enable_cfashion_scaling() # This method doesn't exist. Common optimizations are xformers, vae slicing, model CPU offload. Let's stick to standard ones.
+    # For SDXL specifically, you might use enable_model_cpu_offload()
+print("Stable Diffusion 1.5 model loaded.")
+# Load Animation Model (AnimateDiff)
+print("Loading Animation Model (AnimateDiff)...")
+# Load motion module
+adapter = MotionAdapter.from_pretrained("emperorxi/animatediff-motion-module-v1", torch_dtype=torch.float16 if device == "cuda" else torch.float32)
+# Load base SD pipeline
+pipe_anim = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", motion_adapter=adapter, torch_dtype=torch.float16 if device == "cuda" else torch.float32).to(device)
+# Configure scheduler
+pipe_anim.scheduler = DDIMScheduler.from_config(pipe_anim.scheduler.config, clip_sample=False, timestep_spacing="uniform")
+# Optional: Enable optimizations if using CUDA
+if device == "cuda":
+    pipe_anim.enable_xformers_memory_efficient_attention()
+    pipe_anim.enable_vae_slicing()
+    # pipe_anim.enable_model_cpu_offload() # Can be useful for memory, but slower if components are moved back and forth
+print("AnimateDiff model loaded.")
+# --- Function to run the pipeline ---
 def process_prompt_and_generate(user_prompt, image_resolution, guidance_scale, seed, animation_frames, animation_style):
     """
+    Runs the AI pipeline using Hugging Face models.
     It yields updates for the status and logs.
     """
     logs = []
     status = "Starting processing..."
+    # Yield initial state - Gradio expects all outputs to be present, even if empty
+    yield user_prompt, "", None, None, "", "", "", "", "", "\n".join(logs), status
     if not user_prompt:
         logs.append("Error: No prompt provided.")
         yield user_prompt, "", None, None, "", "", "", "", "", "\n".join(logs), status
         return
+    # Ensure seed is a positive integer, use random if -1
+    current_seed = seed if seed != -1 else random.randint(0, 100000000)
+    generator = torch.Generator(device=device).manual_seed(current_seed)
+    np.random.seed(current_seed) # Seed numpy too for any potential numpy randomness
+    # --- Step 1: Simulate Prompt Enhancement (using T5) ---
+    status = "Enhancing prompt (T5)..."
     logs.append(f"User Prompt: '{user_prompt}'")
+    logs.append(f"Parameters: Resolution={image_resolution}, Guidance Scale={guidance_scale}, Seed={current_seed}, Frames={animation_frames}, Style={animation_style}")
+    yield user_prompt, "", None, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status # Update parameters display early
+    start_time = time.time()
+    try:
+        input_text = f"enhance prompt: {user_prompt}" # T5-small enhancement prefix
+        input_ids = tokenizer_t5(input_text, return_tensors="pt").input_ids.to(device)
+        outputs = model_t5.generate(input_ids, max_length=64, num_beams=4, early_stopping=True) # Keep enhancement concise
+        enhanced_prompt = tokenizer_t5.decode(outputs[0], skip_special_tokens=True)
+        logs.append(f"Enhanced Prompt: '{enhanced_prompt}'")
+        yield user_prompt, enhanced_prompt, None, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+    except Exception as e:
+        logs.append(f"Error during prompt enhancement: {e}")
+        status = "Error during prompt enhancement."
+        yield user_prompt, "", None, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+        return
+    end_time = time.time()
+    logs.append(f"Prompt enhancement took {end_time - start_time:.2f} seconds.")
+    # --- Step 2: Simulate Image Generation (using Stable Diffusion) ---
+    status = "Generating image (Stable Diffusion)..."
+    logs.append(f"Generating initial image ({image_resolution}x{image_resolution}px)...")
+    yield user_prompt, enhanced_prompt, None, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+    start_time = time.time()
+    try:
+        # Generate the image
+        with torch.no_grad():
+            image = pipe_sd(
+                prompt=enhanced_prompt,
+                height=image_resolution,
+                width=image_resolution,
+                guidance_scale=guidance_scale,
+                generator=generator
+            ).images[0]
+        # Save the image temporarily
+        # Gradio can handle PIL images directly, but saving to a temp file is also common
+        # Using tempfile for a robust approach
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
+            temp_image_path = tmpfile.name
+            image.save(temp_image_path)
+        logs.append(f"Image generated successfully: {temp_image_path}")
+        yield user_prompt, enhanced_prompt, temp_image_path, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+    except Exception as e:
+        logs.append(f"Error during image generation: {e}")
+        status = "Error during image generation."
+        yield user_prompt, enhanced_prompt, None, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+        # Clean up temp file if it exists from a partial save
+        if 'temp_image_path' in locals() and os.path.exists(temp_image_path):
+             os.remove(temp_image_path)
+        return
+    end_time = time.time()
+    logs.append(f"Image generation took {end_time - start_time:.2f} seconds.")
+    # --- Step 3: Simulate Animation (using AnimateDiff) ---
+    status = "Generating animation (AnimateDiff)..."
+    logs.append(f"Generating animation ({animation_frames} frames, style: {animation_style}). Note: 'Style' parameter currently doesn't directly control AnimateDiff output...") # Add note about style limitation
+    yield user_prompt, enhanced_prompt, temp_image_path, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+    start_time = time.time()
+    try:
+        # Generate animation frames
+        # AnimateDiff takes text prompt and generates a sequence.
+        # The style parameter doesn't directly map to AnimateDiff options.
+        # We'll use the enhanced prompt and requested frames.
+        # Guidance scale might be applied differently or not at all depending on the pipeline implementation.
+        with torch.no_grad():
+             # The AnimateDiff pipeline often doesn't have image_resolution, guidance_scale,
+             # etc., parameters in the same way as text2image. It's primarily text-to-video.
+             # We'll use the enhanced prompt and num_frames.
+             # The height/width might default or need explicit setting if supported.
+             # Let's use default resolution for simplicity or check pipeline args.
+             # Assuming base SD resolution (512x512) if not explicitly supported/needed.
+             # The pipe_anim loaded is StableDiffusionPipeline with motion adapter, let's check its call signature.
+             # It should support most SD parameters.
+             animation_frames_list = pipe_anim(
+                 prompt=enhanced_prompt,
+                 negative_prompt=None, # Could add negative prompt if needed
+                 num_frames=animation_frames,
+                 guidance_scale=guidance_scale, # Use guidance scale if pipeline supports it
+                 generator=generator,
+                 # width=image_resolution, # AnimateDiff motion adapter might expect specific resolutions
+                 # height=image_resolution, # Commented out for compatibility, using default
+             ).frames
+        # Compile frames into a video
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
+            temp_video_path = tmpfile.name
+            # Use imageio to write video - requires ffmpeg or similar backend
+            # Ensure imageio can find a writer (like ffmpeg)
+            try:
+                imageio.mimwrite(temp_video_path, animation_frames_list, fps=8, quality=8) # Adjust fps and quality as needed
+            except Exception as ffmpeg_error:
+                 logs.append(f"Error saving video with imageio/ffmpeg: {ffmpeg_error}")
+                 logs.append("Ensure ffmpeg is installed and in your PATH, or use imageio.get_writer with a specific backend.")
+                 status = "Error saving video."
+                 # Attempt cleanup
+                 if os.path.exists(temp_video_path):
+                      os.remove(temp_video_path)
+                 yield user_prompt, enhanced_prompt, temp_image_path, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+                 # Clean up temp image
+                 if 'temp_image_path' in locals() and os.path.exists(temp_image_path):
+                      os.remove(temp_image_path)
+                 return
+        logs.append(f"Animation generated successfully: {temp_video_path}")
+        yield user_prompt, enhanced_prompt, temp_image_path, temp_video_path, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+    except Exception as e:
+        logs.append(f"Error during animation generation: {e}")
+        status = "Error during animation generation."
+        yield user_prompt, enhanced_prompt, temp_image_path, None, str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, "\n".join(logs), status
+        # Clean up temp files
+        if 'temp_image_path' in locals() and os.path.exists(temp_image_path):
+             os.remove(temp_image_path)
+        if 'temp_video_path' in locals() and os.path.exists(temp_video_path):
+             os.remove(temp_video_path)
+        return
+    end_time = time.time()
+    logs.append(f"Animation generation took {end_time - start_time:.2f} seconds.")
     # --- Finalizing Outputs ---
     status = "Process complete!"
     logs.append("All steps finished.")
+    # Ensure all outputs are returned in the final state (yielded)
+    # The last yield in a generator function provides the final values for Gradio
+    # Let's make the last yield explicitly contain all final values
+    yield user_prompt, enhanced_prompt, temp_image_path, temp_video_path, \
+          str(image_resolution), str(guidance_scale), str(current_seed), str(animation_frames), animation_style, \
+          "\n".join(logs), status
+# --- Function to update the parameters display (called after main function) ---
+def update_parameters_display(res, gs, seed, frames, style):
+     # This function remains the same, it just formats the strings passed from the main function
+     if not res: # Check if results exist (e.g., first yield is empty)
+         return ""
+     metadata = f"Resolution: {res}px\nGuidance Scale: {gs}\nSeed: {seed}\nFrames: {frames}\nStyle: {style}\n(Note: Animation Style may not directly control model output)" # Add note here too
+     return metadata
+# --- Function to randomize seed ---
+def randomize():
+    return random.randint(1, 100000000) # Generate a random seed
 # --- Gradio UI Definition ---
     # spacing_size_lg="2rem" # Example spacing adjustment
 )
+# Use tempfile for a base temp directory managed by the app
+temp_dir = tempfile.mkdtemp()
+print(f"Using temporary directory: {temp_dir}")
+# Set Gradio's temp dir if needed (often handled automatically)
+# gr.processing_utils.TEMP_DIR = temp_dir # This might be needed in older Gradio versions or specific setups
 with gr.Blocks(theme=theme, title="AI Creative Studio") as demo:
                 # 🎨 Multi-Step AI Creative Pipeline 🚀
                 Unleash your imagination! Input a prompt, and our AI orchestrates a sequence:
                 Prompt Enhancement → Image Generation → Animation.
+                **Using free models from Hugging Face (T5, Stable Diffusion 1.5, AnimateDiff).**
+                *Note: 'Animation Style' parameter might not directly control the AnimateDiff model output.*
                 """
             )
     gr.Markdown("---") # Separator
             # Row for media
             with gr.Row():
+                generated_image_output = gr.Image(label="Generated Image", interactive=False, height=450, show_share_button=True, type="filepath") # Specify type="filepath"
+                generated_animation_output = gr.Video(label="Generated Animation", interactive=False, height=450, show_share_button=True, type="filepath") # Specify type="filepath"
             # Display Parameters Used (Collapsed or in a smaller section)
                 parameters_used_output = gr.Textbox(
                     label="Generation Parameters",
                     interactive=False,
+                    lines=6, # Increased lines slightly to fit the note
+                    max_lines=30,
                     show_copy_button=True
                 )
                 # Dummy output components to catch the individual parameters
                 # We will combine them in the process_prompt_and_generate function for the Textbox above
+                res_out = gr.Textbox(visible=False, type="value")
+                gs_out = gr.Textbox(visible=False, type="value")
+                seed_out = gr.Textbox(visible=False, type="value")
+                frames_out = gr.Textbox(visible=False, type="value")
+                style_out = gr.Textbox(visible=False, type="value")
             # Download Buttons (Placeholder)
             with gr.Row():
                  # These buttons are just placeholders for now.
                  # Real download logic needs separate functions.
+                 # Making them interactive=False as they don't have click events linked
+                 download_image_button = gr.Button("⬇️ Download Image", interactive=False)
+                 download_video_button = gr.Button("⬇️ Download Video", interactive=False)
     gr.Markdown("---") # Separator
     # Button click triggers the main processing function
     # The outputs list maps the function's return values to UI components
+    # Because process_prompt_and_generate is a generator, Gradio updates the outputs
+    # with each yielded value. The final yield provides the final state.
     generate_button.click(
         fn=process_prompt_and_generate,
         inputs=[
             style_out,
             logs_output, # Logs are updated incrementally/finally
             status_display # Status is updated incrementally/finally
+        ],
+        api_name="generate" # Optional: Add an API name for easy calling
+    ).success( # Chain the parameter update after the main process finishes successfully
         fn=update_parameters_display,
         inputs=[res_out, gs_out, seed_out, frames_out, style_out],
         outputs=[parameters_used_output]
     )
+    # Randomize Seed Button Interaction
     randomize_seed_button.click(
         fn=randomize,
         inputs=[],
 # --- Launch the App ---
 if __name__ == "__main__":
     print("Gradio AI Creative Studio is starting...")
+    # Use share=True to make it accessible over the internet (for testing)
+    # Use inbrowser=True to auto-open the browser
+    demo.launch(inbrowser=True)
+    print("App launched!")
+    # Optional: Clean up the temporary directory when the app stops
+    # This is not automatically called when you Ctrl+C, but useful in some deployment scenarios
+    # import shutil
+    # shutil.rmtree(temp_dir)
+    # print(f"Cleaned up temporary directory: {temp_dir}")