Spaces:

b2bomber
/

AIAvatarGenerator

Sleeping

App Files Files Community

b2bomber commited on about 1 month ago

Commit

abf7663

verified ·

1 Parent(s): 5aeb12b

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -16

app.py CHANGED Viewed

@@ -1,17 +1,59 @@
 import gradio as gr
 import torch
-from diffusers import StableDiffusionPipeline, DDIMScheduler
 from PIL import Image
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load SD model (use SD1.5 or SDXL-based)
-model_id = "stabilityai/stable-diffusion-2-1"
-pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32)
 pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
-pipe = pipe.to(device)
-# Preset styles
 styles = {
     "Pixar": "pixar style portrait of",
     "Anime": "anime style portrait of",
@@ -23,21 +65,43 @@ styles = {
 def generate_avatar(image, style):
     if image is None:
         return None
-    # Preprocess image (convert to prompt-only for simplicity)
     base_prompt = styles[style]
-    prompt = f"{base_prompt} a person"
-    image = pipe(prompt=prompt, num_inference_steps=30, guidance_scale=7.5).images[0]
-    return image
-with gr.Blocks() as demo:
-    gr.Markdown("## 🎨 Stable Diffusion Avatar Generator with Preset Styles")
     with gr.Row():
         with gr.Column():
-            image_input = gr.Image(label="Upload your photo", type="pil", sources=["upload", "webcam"])
             style_selector = gr.Radio(choices=list(styles.keys()), label="Choose a style", value="Anime")
             generate_btn = gr.Button("Generate Avatar")
         with gr.Column():
@@ -45,4 +109,4 @@ with gr.Blocks() as demo:
     generate_btn.click(fn=generate_avatar, inputs=[image_input, style_selector], outputs=output_image)
-demo.launch()

 import gradio as gr
 import torch
+from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderTiny
 from PIL import Image
+# 1. Force CPU usage
+device = "cpu"
+# 2. Choose a smaller/distilled Stable Diffusion model
+#    'nota-ai/bk-sdm-small' is a good example of a distilled model that's faster.
+#    Another option is 'segmind/SSD-1B' (though still relatively large, it's optimized).
+#    For truly tiny models, you might look for "TinySD" variations.
+#    Let's start with a well-known distilled model for better CPU performance.
+model_id = "nota-ai/bk-sdm-small" # Smaller and faster than SD 2.1
+# model_id = "segmind/SSD-1B" # Another optimized, but still larger, option.
+# Load the pipeline. For CPU, use torch_dtype=torch.float32.
+# Disable safe_serialization if you encounter issues with some older models.
+print(f"Loading model: {model_id} on {device}...")
+try:
+    pipe = StableDiffusionPipeline.from_pretrained(
+        model_id,
+        torch_dtype=torch.float32, # CPU usually prefers float32 for stability/speed unless specialized kernels are used
+        low_cpu_mem_usage=True # Helps with memory on CPU
+    )
+except Exception as e:
+    print(f"Error loading model {model_id}: {e}. Trying without low_cpu_mem_usage.")
+    pipe = StableDiffusionPipeline.from_pretrained(
+        model_id,
+        torch_dtype=torch.float32,
+    )
+# Optimize VAE (Very Important for Speed and Memory on CPU)
+# The VAE (Variational AutoEncoder) is a bottleneck. Using a tiny VAE helps a lot.
+# 'sayakpaul/taesd-diffusers' is a known tiny VAE.
+print("Loading Tiny VAE...")
+try:
+    pipe.vae = AutoencoderTiny.from_pretrained("sayakpaul/taesd-diffusers", torch_dtype=torch.float32)
+except Exception as e:
+    print(f"Could not load Tiny VAE: {e}. Model might be slower.")
+    # Fallback: if Tiny VAE fails, ensure the default VAE is on CPU
+    pipe.vae.to(device)
+# Move pipeline components to CPU explicitly
+pipe.to(device)
+# Set up the scheduler. DDIMScheduler is fine.
 pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+# Enable CPU offload for even lower memory (can make it slower, but might be necessary for very limited RAM)
+# pipe.enable_sequential_cpu_offload() # Use if you hit OOM errors, but it will be much slower.
+print("Model loaded and configured.")
+# Preset styles (same as before)
 styles = {
     "Pixar": "pixar style portrait of",
     "Anime": "anime style portrait of",
 def generate_avatar(image, style):
     if image is None:
+        # You might want to generate a default image or throw an error via Gradio
+        # For a more robust app, consider a placeholder image or a clear error message in the UI.
+        gr.Warning("Please upload an image to generate an avatar.")
         return None
+    # Although the original intent was image-to-image, your current logic
+    # converts the image input into a text-only prompt.
+    # To truly use the image as input, you would need an img2img pipeline or a specific
+    # controlnet/adapter for Stable Diffusion.
+    # For now, let's keep it as a text-to-image generation based on the style and a generic prompt.
     base_prompt = styles[style]
+    # For CPU, fewer steps and lower guidance scale can yield faster (but potentially lower quality) results.
+    num_inference_steps = 20 # Reduced for speed
+    guidance_scale = 7.0 # Slightly reduced guidance
+    prompt = f"{base_prompt} a person, high quality, detailed, professional" # Enhance prompt
+    negative_prompt = "low resolution, blurry, distorted, bad quality, ugly, cartoon, sketch" # Add negative prompt for better results
+    # Generate image
+    print(f"Generating for style: {style} with prompt: {prompt}")
+    with torch.no_grad(): # Disable gradient calculations for inference
+        generated_image = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale
+        ).images[0]
+    return generated_image
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎨 Stable Diffusion Avatar Generator with Preset Styles (CPU Optimized)")
+    gr.Markdown("This demo uses a smaller, distilled Stable Diffusion model and is optimized for CPU inference. Generation will still take time on CPU, but should be faster than larger models.")
     with gr.Row():
         with gr.Column():
+            image_input = gr.Image(label="Upload your photo (Note: Image currently used only to trigger generation, not as direct input)", type="pil", sources=["upload", "webcam"])
             style_selector = gr.Radio(choices=list(styles.keys()), label="Choose a style", value="Anime")
             generate_btn = gr.Button("Generate Avatar")
         with gr.Column():
     generate_btn.click(fn=generate_avatar, inputs=[image_input, style_selector], outputs=output_image)
+demo.launch()