File size: 10,084 Bytes
80a1334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import os
from dotenv import load_dotenv
import google.generativeai as genai
from hardware_detector import HardwareDetector
from typing import Dict, List

load_dotenv()


class AutoDiffusersGenerator:
    def __init__(self, api_key: str):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20')
        self.hardware_detector = HardwareDetector()
        
    def generate_optimized_code(self, 
                              model_name: str, 
                              prompt_text: str,
                              image_size: tuple = (768, 1360),
                              num_inference_steps: int = 4,
                              use_manual_specs: bool = False,
                              manual_specs: Dict = None,
                              memory_analysis: Dict = None) -> str:
        """Generate optimized diffusers code based on hardware specs and memory analysis."""
        
        # Get hardware specifications
        if use_manual_specs and manual_specs:
            hardware_specs = manual_specs
            # Determine optimization profile based on manual specs
            if hardware_specs.get('gpu_info') and hardware_specs['gpu_info']:
                vram_gb = hardware_specs['gpu_info'][0]['memory_mb'] / 1024
                if vram_gb >= 16:
                    optimization_profile = 'performance'
                elif vram_gb >= 8:
                    optimization_profile = 'balanced'
                else:
                    optimization_profile = 'memory_efficient'
            else:
                optimization_profile = 'cpu_only'
        else:
            hardware_specs = self.hardware_detector.specs
            optimization_profile = self.hardware_detector.get_optimization_profile()
        
        # Create the prompt for Gemini API
        system_prompt = self._create_generation_prompt(
            model_name, prompt_text, image_size, num_inference_steps, 
            hardware_specs, optimization_profile, memory_analysis
        )
        
        try:
            response = self.model.generate_content(system_prompt)
            return response.text
        except Exception as e:
            return f"Error generating code: {str(e)}"
    
    def _create_generation_prompt(self, 
                                model_name: str, 
                                prompt_text: str,
                                image_size: tuple,
                                num_inference_steps: int,
                                hardware_specs: Dict,
                                optimization_profile: str,
                                memory_analysis: Dict = None) -> str:
        """Create the prompt for Gemini API to generate optimized code."""
        
        base_prompt = f"""
You are an expert in optimizing diffusers library code for different hardware configurations.

TASK: Generate optimized Python code for running a diffusion model with the following specifications:
- Model: {model_name}
- Prompt: "{prompt_text}"
- Image size: {image_size[0]}x{image_size[1]}
- Inference steps: {num_inference_steps}

HARDWARE SPECIFICATIONS:
- Platform: {hardware_specs['platform']} ({hardware_specs['architecture']})
- CPU Cores: {hardware_specs['cpu_count']}
- CUDA Available: {hardware_specs['cuda_available']}
- MPS Available: {hardware_specs['mps_available']}
- Optimization Profile: {optimization_profile}
"""

        if hardware_specs.get('gpu_info'):
            base_prompt += f"- GPU: {hardware_specs['gpu_info'][0]['name']} ({hardware_specs['gpu_info'][0]['memory_mb']/1024:.1f} GB VRAM)\n"

        # Add user dtype preference if specified
        if hardware_specs.get('user_dtype'):
            base_prompt += f"- User specified dtype: {hardware_specs['user_dtype']}\n"

        # Add memory analysis information
        if memory_analysis:
            memory_info = memory_analysis.get('memory_info', {})
            recommendations = memory_analysis.get('recommendations', {})
            
            base_prompt += f"\nMEMORY ANALYSIS:\n"
            if memory_info.get('estimated_inference_memory_fp16_gb'):
                base_prompt += f"- Model Memory Requirements: {memory_info['estimated_inference_memory_fp16_gb']} GB (FP16 inference)\n"
            if memory_info.get('memory_fp16_gb'):
                base_prompt += f"- Model Weights Size: {memory_info['memory_fp16_gb']} GB (FP16)\n"
            if recommendations.get('recommendations'):
                base_prompt += f"- Memory Recommendation: {', '.join(recommendations['recommendations'])}\n"
            if recommendations.get('recommended_precision'):
                base_prompt += f"- Recommended Precision: {recommendations['recommended_precision']}\n"
            if recommendations.get('cpu_offload'):
                base_prompt += f"- CPU Offloading Required: {recommendations['cpu_offload']}\n"
            if recommendations.get('attention_slicing'):
                base_prompt += f"- Attention Slicing Recommended: {recommendations['attention_slicing']}\n"
            if recommendations.get('vae_slicing'):
                base_prompt += f"- VAE Slicing Recommended: {recommendations['vae_slicing']}\n"

        base_prompt += f"""
OPTIMIZATION REQUIREMENTS:
Please scrape and analyze the latest optimization techniques from this URL: https://huggingface.co/docs/diffusers/main/en/optimization

IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.

Based on the hardware specs and optimization profile, generate Python code that includes:

1. **Memory Optimizations** (if low VRAM):
   - Model offloading (enable_model_cpu_offload, enable_sequential_cpu_offload)
   - Attention slicing (enable_attention_slicing)
   - VAE slicing (enable_vae_slicing)
   - Memory efficient attention

2. **Speed Optimizations**:
   - Appropriate torch.compile() usage
   - Optimal dtype selection (torch.float16, torch.bfloat16)
   - Device placement optimization

3. **Hardware-Specific Optimizations**:
   - CUDA optimizations for NVIDIA GPUs
   - MPS optimizations for Apple Silicon
   - CPU fallbacks when needed

4. **Model-Specific Optimizations**:
   - Appropriate scheduler selection
   - Optimal inference parameters
   - Pipeline configuration

5. **Data Type (dtype) Selection**:
   - If user specified a dtype, use that exact dtype in the code
   - If no dtype specified, automatically select the optimal dtype based on hardware:
     * Apple Silicon (MPS): prefer torch.bfloat16
     * NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 based on capability
     * CPU only: use torch.float32
   - Add a comment explaining why that dtype was chosen

IMPORTANT GUIDELINES:
- Include all necessary imports
- Add brief comments explaining optimization choices
- Use the most current and effective optimization techniques
- Ensure code is production-ready

CODE STYLE REQUIREMENTS - GENERATE COMPACT CODE:
- Assign static values directly to function arguments instead of using variables when possible
- Minimize variable declarations - inline values where it improves readability
- Reduce exception handling to essential cases only - assume normal operation
- Use concise, direct code patterns
- Combine operations where logical and readable
- Avoid unnecessary intermediate variables
- Keep code clean and minimal while maintaining functionality

Examples of preferred compact style:
- pipe = Pipeline.from_pretrained("model", torch_dtype=torch.float16) instead of storing dtype in variable
- image = pipe("prompt", num_inference_steps=4, height=768, width=1360) instead of separate variables
- Direct assignment: device = "cuda" if torch.cuda.is_available() else "cpu"

Generate ONLY the Python code, no explanations before or after the code block.
"""
        
        return base_prompt
    
    def run_interactive_mode(self):
        """Run the generator in interactive mode."""
        print("=== Auto-Diffusers Code Generator ===")
        print("This tool generates optimized diffusers code based on your hardware.\n")
        
        # Check hardware
        print("=== Hardware Detection ===")
        self.hardware_detector.print_specs()
        
        use_manual = input("\nUse manual hardware input? (y/n): ").lower() == 'y'
        
        # Get user inputs
        print("\n=== Model Configuration ===")
        model_name = input("Model name (default: black-forest-labs/FLUX.1-schnell): ").strip()
        if not model_name:
            model_name = "black-forest-labs/FLUX.1-schnell"
            
        prompt_text = input("Prompt text (default: A cat holding a sign that says hello world): ").strip()
        if not prompt_text:
            prompt_text = "A cat holding a sign that says hello world"
            
        try:
            width = int(input("Image width (default: 1360): ") or "1360")
            height = int(input("Image height (default: 768): ") or "768")
            steps = int(input("Inference steps (default: 4): ") or "4")
        except ValueError:
            width, height, steps = 1360, 768, 4
            
        print("\n=== Generating Optimized Code ===")
        
        # Generate code
        optimized_code = self.generate_optimized_code(
            model_name=model_name,
            prompt_text=prompt_text,
            image_size=(height, width),
            num_inference_steps=steps,
            use_manual_specs=use_manual
        )
        
        print("\n" + "="*60)
        print("OPTIMIZED DIFFUSERS CODE:")
        print("="*60)
        print(optimized_code)
        print("="*60)


def main():
    # Get API key from .env file
    api_key = os.getenv('GOOGLE_API_KEY')
    if not api_key:
        api_key = os.getenv('GEMINI_API_KEY')  # fallback
    if not api_key:
        api_key = input("Enter your Gemini API key: ").strip()
        if not api_key:
            print("API key is required!")
            return
    
    generator = AutoDiffusersGenerator(api_key)
    generator.run_interactive_mode()


if __name__ == "__main__":
    main()