Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,59 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
from diffusers import StableDiffusionPipeline, DDIMScheduler
|
4 |
from PIL import Image
|
5 |
|
6 |
-
|
|
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
12 |
-
pipe = pipe.to(device)
|
13 |
|
14 |
-
#
|
|
|
|
|
|
|
|
|
|
|
15 |
styles = {
|
16 |
"Pixar": "pixar style portrait of",
|
17 |
"Anime": "anime style portrait of",
|
@@ -23,21 +65,43 @@ styles = {
|
|
23 |
|
24 |
def generate_avatar(image, style):
|
25 |
if image is None:
|
|
|
|
|
|
|
26 |
return None
|
27 |
-
|
28 |
-
#
|
|
|
|
|
|
|
|
|
|
|
29 |
base_prompt = styles[style]
|
30 |
-
|
|
|
|
|
31 |
|
32 |
-
|
33 |
-
|
34 |
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
|
|
|
|
|
|
38 |
with gr.Row():
|
39 |
with gr.Column():
|
40 |
-
image_input = gr.Image(label="Upload your photo", type="pil", sources=["upload", "webcam"])
|
41 |
style_selector = gr.Radio(choices=list(styles.keys()), label="Choose a style", value="Anime")
|
42 |
generate_btn = gr.Button("Generate Avatar")
|
43 |
with gr.Column():
|
@@ -45,4 +109,4 @@ with gr.Blocks() as demo:
|
|
45 |
|
46 |
generate_btn.click(fn=generate_avatar, inputs=[image_input, style_selector], outputs=output_image)
|
47 |
|
48 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderTiny
|
4 |
from PIL import Image
|
5 |
|
6 |
+
# 1. Force CPU usage
|
7 |
+
device = "cpu"
|
8 |
|
9 |
+
# 2. Choose a smaller/distilled Stable Diffusion model
|
10 |
+
# 'nota-ai/bk-sdm-small' is a good example of a distilled model that's faster.
|
11 |
+
# Another option is 'segmind/SSD-1B' (though still relatively large, it's optimized).
|
12 |
+
# For truly tiny models, you might look for "TinySD" variations.
|
13 |
+
# Let's start with a well-known distilled model for better CPU performance.
|
14 |
+
model_id = "nota-ai/bk-sdm-small" # Smaller and faster than SD 2.1
|
15 |
+
# model_id = "segmind/SSD-1B" # Another optimized, but still larger, option.
|
16 |
+
|
17 |
+
# Load the pipeline. For CPU, use torch_dtype=torch.float32.
|
18 |
+
# Disable safe_serialization if you encounter issues with some older models.
|
19 |
+
print(f"Loading model: {model_id} on {device}...")
|
20 |
+
try:
|
21 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
22 |
+
model_id,
|
23 |
+
torch_dtype=torch.float32, # CPU usually prefers float32 for stability/speed unless specialized kernels are used
|
24 |
+
low_cpu_mem_usage=True # Helps with memory on CPU
|
25 |
+
)
|
26 |
+
except Exception as e:
|
27 |
+
print(f"Error loading model {model_id}: {e}. Trying without low_cpu_mem_usage.")
|
28 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
29 |
+
model_id,
|
30 |
+
torch_dtype=torch.float32,
|
31 |
+
)
|
32 |
+
|
33 |
+
# Optimize VAE (Very Important for Speed and Memory on CPU)
|
34 |
+
# The VAE (Variational AutoEncoder) is a bottleneck. Using a tiny VAE helps a lot.
|
35 |
+
# 'sayakpaul/taesd-diffusers' is a known tiny VAE.
|
36 |
+
print("Loading Tiny VAE...")
|
37 |
+
try:
|
38 |
+
pipe.vae = AutoencoderTiny.from_pretrained("sayakpaul/taesd-diffusers", torch_dtype=torch.float32)
|
39 |
+
except Exception as e:
|
40 |
+
print(f"Could not load Tiny VAE: {e}. Model might be slower.")
|
41 |
+
# Fallback: if Tiny VAE fails, ensure the default VAE is on CPU
|
42 |
+
pipe.vae.to(device)
|
43 |
+
|
44 |
+
|
45 |
+
# Move pipeline components to CPU explicitly
|
46 |
+
pipe.to(device)
|
47 |
+
|
48 |
+
# Set up the scheduler. DDIMScheduler is fine.
|
49 |
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
|
|
50 |
|
51 |
+
# Enable CPU offload for even lower memory (can make it slower, but might be necessary for very limited RAM)
|
52 |
+
# pipe.enable_sequential_cpu_offload() # Use if you hit OOM errors, but it will be much slower.
|
53 |
+
|
54 |
+
print("Model loaded and configured.")
|
55 |
+
|
56 |
+
# Preset styles (same as before)
|
57 |
styles = {
|
58 |
"Pixar": "pixar style portrait of",
|
59 |
"Anime": "anime style portrait of",
|
|
|
65 |
|
66 |
def generate_avatar(image, style):
|
67 |
if image is None:
|
68 |
+
# You might want to generate a default image or throw an error via Gradio
|
69 |
+
# For a more robust app, consider a placeholder image or a clear error message in the UI.
|
70 |
+
gr.Warning("Please upload an image to generate an avatar.")
|
71 |
return None
|
72 |
+
|
73 |
+
# Although the original intent was image-to-image, your current logic
|
74 |
+
# converts the image input into a text-only prompt.
|
75 |
+
# To truly use the image as input, you would need an img2img pipeline or a specific
|
76 |
+
# controlnet/adapter for Stable Diffusion.
|
77 |
+
# For now, let's keep it as a text-to-image generation based on the style and a generic prompt.
|
78 |
+
|
79 |
base_prompt = styles[style]
|
80 |
+
# For CPU, fewer steps and lower guidance scale can yield faster (but potentially lower quality) results.
|
81 |
+
num_inference_steps = 20 # Reduced for speed
|
82 |
+
guidance_scale = 7.0 # Slightly reduced guidance
|
83 |
|
84 |
+
prompt = f"{base_prompt} a person, high quality, detailed, professional" # Enhance prompt
|
85 |
+
negative_prompt = "low resolution, blurry, distorted, bad quality, ugly, cartoon, sketch" # Add negative prompt for better results
|
86 |
|
87 |
+
# Generate image
|
88 |
+
print(f"Generating for style: {style} with prompt: {prompt}")
|
89 |
+
with torch.no_grad(): # Disable gradient calculations for inference
|
90 |
+
generated_image = pipe(
|
91 |
+
prompt=prompt,
|
92 |
+
negative_prompt=negative_prompt,
|
93 |
+
num_inference_steps=num_inference_steps,
|
94 |
+
guidance_scale=guidance_scale
|
95 |
+
).images[0]
|
96 |
+
|
97 |
+
return generated_image
|
98 |
|
99 |
+
with gr.Blocks() as demo:
|
100 |
+
gr.Markdown("## 🎨 Stable Diffusion Avatar Generator with Preset Styles (CPU Optimized)")
|
101 |
+
gr.Markdown("This demo uses a smaller, distilled Stable Diffusion model and is optimized for CPU inference. Generation will still take time on CPU, but should be faster than larger models.")
|
102 |
with gr.Row():
|
103 |
with gr.Column():
|
104 |
+
image_input = gr.Image(label="Upload your photo (Note: Image currently used only to trigger generation, not as direct input)", type="pil", sources=["upload", "webcam"])
|
105 |
style_selector = gr.Radio(choices=list(styles.keys()), label="Choose a style", value="Anime")
|
106 |
generate_btn = gr.Button("Generate Avatar")
|
107 |
with gr.Column():
|
|
|
109 |
|
110 |
generate_btn.click(fn=generate_avatar, inputs=[image_input, style_selector], outputs=output_image)
|
111 |
|
112 |
+
demo.launch()
|