b2bomber commited on
Commit
abf7663
·
verified ·
1 Parent(s): 5aeb12b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -16
app.py CHANGED
@@ -1,17 +1,59 @@
1
  import gradio as gr
2
  import torch
3
- from diffusers import StableDiffusionPipeline, DDIMScheduler
4
  from PIL import Image
5
 
6
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
7
 
8
- # Load SD model (use SD1.5 or SDXL-based)
9
- model_id = "stabilityai/stable-diffusion-2-1"
10
- pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
12
- pipe = pipe.to(device)
13
 
14
- # Preset styles
 
 
 
 
 
15
  styles = {
16
  "Pixar": "pixar style portrait of",
17
  "Anime": "anime style portrait of",
@@ -23,21 +65,43 @@ styles = {
23
 
24
  def generate_avatar(image, style):
25
  if image is None:
 
 
 
26
  return None
27
-
28
- # Preprocess image (convert to prompt-only for simplicity)
 
 
 
 
 
29
  base_prompt = styles[style]
30
- prompt = f"{base_prompt} a person"
 
 
31
 
32
- image = pipe(prompt=prompt, num_inference_steps=30, guidance_scale=7.5).images[0]
33
- return image
34
 
35
- with gr.Blocks() as demo:
36
- gr.Markdown("## 🎨 Stable Diffusion Avatar Generator with Preset Styles")
 
 
 
 
 
 
 
 
 
37
 
 
 
 
38
  with gr.Row():
39
  with gr.Column():
40
- image_input = gr.Image(label="Upload your photo", type="pil", sources=["upload", "webcam"])
41
  style_selector = gr.Radio(choices=list(styles.keys()), label="Choose a style", value="Anime")
42
  generate_btn = gr.Button("Generate Avatar")
43
  with gr.Column():
@@ -45,4 +109,4 @@ with gr.Blocks() as demo:
45
 
46
  generate_btn.click(fn=generate_avatar, inputs=[image_input, style_selector], outputs=output_image)
47
 
48
- demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderTiny
4
  from PIL import Image
5
 
6
+ # 1. Force CPU usage
7
+ device = "cpu"
8
 
9
+ # 2. Choose a smaller/distilled Stable Diffusion model
10
+ # 'nota-ai/bk-sdm-small' is a good example of a distilled model that's faster.
11
+ # Another option is 'segmind/SSD-1B' (though still relatively large, it's optimized).
12
+ # For truly tiny models, you might look for "TinySD" variations.
13
+ # Let's start with a well-known distilled model for better CPU performance.
14
+ model_id = "nota-ai/bk-sdm-small" # Smaller and faster than SD 2.1
15
+ # model_id = "segmind/SSD-1B" # Another optimized, but still larger, option.
16
+
17
+ # Load the pipeline. For CPU, use torch_dtype=torch.float32.
18
+ # Disable safe_serialization if you encounter issues with some older models.
19
+ print(f"Loading model: {model_id} on {device}...")
20
+ try:
21
+ pipe = StableDiffusionPipeline.from_pretrained(
22
+ model_id,
23
+ torch_dtype=torch.float32, # CPU usually prefers float32 for stability/speed unless specialized kernels are used
24
+ low_cpu_mem_usage=True # Helps with memory on CPU
25
+ )
26
+ except Exception as e:
27
+ print(f"Error loading model {model_id}: {e}. Trying without low_cpu_mem_usage.")
28
+ pipe = StableDiffusionPipeline.from_pretrained(
29
+ model_id,
30
+ torch_dtype=torch.float32,
31
+ )
32
+
33
+ # Optimize VAE (Very Important for Speed and Memory on CPU)
34
+ # The VAE (Variational AutoEncoder) is a bottleneck. Using a tiny VAE helps a lot.
35
+ # 'sayakpaul/taesd-diffusers' is a known tiny VAE.
36
+ print("Loading Tiny VAE...")
37
+ try:
38
+ pipe.vae = AutoencoderTiny.from_pretrained("sayakpaul/taesd-diffusers", torch_dtype=torch.float32)
39
+ except Exception as e:
40
+ print(f"Could not load Tiny VAE: {e}. Model might be slower.")
41
+ # Fallback: if Tiny VAE fails, ensure the default VAE is on CPU
42
+ pipe.vae.to(device)
43
+
44
+
45
+ # Move pipeline components to CPU explicitly
46
+ pipe.to(device)
47
+
48
+ # Set up the scheduler. DDIMScheduler is fine.
49
  pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
 
50
 
51
+ # Enable CPU offload for even lower memory (can make it slower, but might be necessary for very limited RAM)
52
+ # pipe.enable_sequential_cpu_offload() # Use if you hit OOM errors, but it will be much slower.
53
+
54
+ print("Model loaded and configured.")
55
+
56
+ # Preset styles (same as before)
57
  styles = {
58
  "Pixar": "pixar style portrait of",
59
  "Anime": "anime style portrait of",
 
65
 
66
  def generate_avatar(image, style):
67
  if image is None:
68
+ # You might want to generate a default image or throw an error via Gradio
69
+ # For a more robust app, consider a placeholder image or a clear error message in the UI.
70
+ gr.Warning("Please upload an image to generate an avatar.")
71
  return None
72
+
73
+ # Although the original intent was image-to-image, your current logic
74
+ # converts the image input into a text-only prompt.
75
+ # To truly use the image as input, you would need an img2img pipeline or a specific
76
+ # controlnet/adapter for Stable Diffusion.
77
+ # For now, let's keep it as a text-to-image generation based on the style and a generic prompt.
78
+
79
  base_prompt = styles[style]
80
+ # For CPU, fewer steps and lower guidance scale can yield faster (but potentially lower quality) results.
81
+ num_inference_steps = 20 # Reduced for speed
82
+ guidance_scale = 7.0 # Slightly reduced guidance
83
 
84
+ prompt = f"{base_prompt} a person, high quality, detailed, professional" # Enhance prompt
85
+ negative_prompt = "low resolution, blurry, distorted, bad quality, ugly, cartoon, sketch" # Add negative prompt for better results
86
 
87
+ # Generate image
88
+ print(f"Generating for style: {style} with prompt: {prompt}")
89
+ with torch.no_grad(): # Disable gradient calculations for inference
90
+ generated_image = pipe(
91
+ prompt=prompt,
92
+ negative_prompt=negative_prompt,
93
+ num_inference_steps=num_inference_steps,
94
+ guidance_scale=guidance_scale
95
+ ).images[0]
96
+
97
+ return generated_image
98
 
99
+ with gr.Blocks() as demo:
100
+ gr.Markdown("## 🎨 Stable Diffusion Avatar Generator with Preset Styles (CPU Optimized)")
101
+ gr.Markdown("This demo uses a smaller, distilled Stable Diffusion model and is optimized for CPU inference. Generation will still take time on CPU, but should be faster than larger models.")
102
  with gr.Row():
103
  with gr.Column():
104
+ image_input = gr.Image(label="Upload your photo (Note: Image currently used only to trigger generation, not as direct input)", type="pil", sources=["upload", "webcam"])
105
  style_selector = gr.Radio(choices=list(styles.keys()), label="Choose a style", value="Anime")
106
  generate_btn = gr.Button("Generate Avatar")
107
  with gr.Column():
 
109
 
110
  generate_btn.click(fn=generate_avatar, inputs=[image_input, style_selector], outputs=output_image)
111
 
112
+ demo.launch()