Spaces:

chansung
/

auto-diffuser-config

Running

App Files Files Community

auto-diffuser-config / auto_diffusers.py

chansung

Upload folder using huggingface_hub

80a1334 verified 10 days ago

raw

history blame

10.1 kB

	import os
	from dotenv import load_dotenv
	import google.generativeai as genai
	from hardware_detector import HardwareDetector
	from typing import Dict, List

	load_dotenv()


	class AutoDiffusersGenerator:
	def __init__(self, api_key: str):
	genai.configure(api_key=api_key)
	self.model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20')
	self.hardware_detector = HardwareDetector()

	def generate_optimized_code(self,
	model_name: str,
	prompt_text: str,
	image_size: tuple = (768, 1360),
	num_inference_steps: int = 4,
	use_manual_specs: bool = False,
	manual_specs: Dict = None,
	memory_analysis: Dict = None) -> str:
	"""Generate optimized diffusers code based on hardware specs and memory analysis."""

	# Get hardware specifications
	if use_manual_specs and manual_specs:
	hardware_specs = manual_specs
	# Determine optimization profile based on manual specs
	if hardware_specs.get('gpu_info') and hardware_specs['gpu_info']:
	vram_gb = hardware_specs['gpu_info'][0]['memory_mb'] / 1024
	if vram_gb >= 16:
	optimization_profile = 'performance'
	elif vram_gb >= 8:
	optimization_profile = 'balanced'
	else:
	optimization_profile = 'memory_efficient'
	else:
	optimization_profile = 'cpu_only'
	else:
	hardware_specs = self.hardware_detector.specs
	optimization_profile = self.hardware_detector.get_optimization_profile()

	# Create the prompt for Gemini API
	system_prompt = self._create_generation_prompt(
	model_name, prompt_text, image_size, num_inference_steps,
	hardware_specs, optimization_profile, memory_analysis
	)

	try:
	response = self.model.generate_content(system_prompt)
	return response.text
	except Exception as e:
	return f"Error generating code: {str(e)}"

	def _create_generation_prompt(self,
	model_name: str,
	prompt_text: str,
	image_size: tuple,
	num_inference_steps: int,
	hardware_specs: Dict,
	optimization_profile: str,
	memory_analysis: Dict = None) -> str:
	"""Create the prompt for Gemini API to generate optimized code."""

	base_prompt = f"""
	You are an expert in optimizing diffusers library code for different hardware configurations.

	TASK: Generate optimized Python code for running a diffusion model with the following specifications:
	- Model: {model_name}
	- Prompt: "{prompt_text}"
	- Image size: {image_size[0]}x{image_size[1]}
	- Inference steps: {num_inference_steps}

	HARDWARE SPECIFICATIONS:
	- Platform: {hardware_specs['platform']} ({hardware_specs['architecture']})
	- CPU Cores: {hardware_specs['cpu_count']}
	- CUDA Available: {hardware_specs['cuda_available']}
	- MPS Available: {hardware_specs['mps_available']}
	- Optimization Profile: {optimization_profile}
	"""

	if hardware_specs.get('gpu_info'):
	base_prompt += f"- GPU: {hardware_specs['gpu_info'][0]['name']} ({hardware_specs['gpu_info'][0]['memory_mb']/1024:.1f} GB VRAM)\n"

	# Add user dtype preference if specified
	if hardware_specs.get('user_dtype'):
	base_prompt += f"- User specified dtype: {hardware_specs['user_dtype']}\n"

	# Add memory analysis information
	if memory_analysis:
	memory_info = memory_analysis.get('memory_info', {})
	recommendations = memory_analysis.get('recommendations', {})

	base_prompt += f"\nMEMORY ANALYSIS:\n"
	if memory_info.get('estimated_inference_memory_fp16_gb'):
	base_prompt += f"- Model Memory Requirements: {memory_info['estimated_inference_memory_fp16_gb']} GB (FP16 inference)\n"
	if memory_info.get('memory_fp16_gb'):
	base_prompt += f"- Model Weights Size: {memory_info['memory_fp16_gb']} GB (FP16)\n"
	if recommendations.get('recommendations'):
	base_prompt += f"- Memory Recommendation: {', '.join(recommendations['recommendations'])}\n"
	if recommendations.get('recommended_precision'):
	base_prompt += f"- Recommended Precision: {recommendations['recommended_precision']}\n"
	if recommendations.get('cpu_offload'):
	base_prompt += f"- CPU Offloading Required: {recommendations['cpu_offload']}\n"
	if recommendations.get('attention_slicing'):
	base_prompt += f"- Attention Slicing Recommended: {recommendations['attention_slicing']}\n"
	if recommendations.get('vae_slicing'):
	base_prompt += f"- VAE Slicing Recommended: {recommendations['vae_slicing']}\n"

	base_prompt += f"""
	OPTIMIZATION REQUIREMENTS:
	Please scrape and analyze the latest optimization techniques from this URL: https://huggingface.co/docs/diffusers/main/en/optimization

	IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.

	Based on the hardware specs and optimization profile, generate Python code that includes:

	1. Memory Optimizations (if low VRAM):
	- Model offloading (enable_model_cpu_offload, enable_sequential_cpu_offload)
	- Attention slicing (enable_attention_slicing)
	- VAE slicing (enable_vae_slicing)
	- Memory efficient attention

	2. Speed Optimizations:
	- Appropriate torch.compile() usage
	- Optimal dtype selection (torch.float16, torch.bfloat16)
	- Device placement optimization

	3. Hardware-Specific Optimizations:
	- CUDA optimizations for NVIDIA GPUs
	- MPS optimizations for Apple Silicon
	- CPU fallbacks when needed

	4. Model-Specific Optimizations:
	- Appropriate scheduler selection
	- Optimal inference parameters
	- Pipeline configuration

	5. Data Type (dtype) Selection:
	- If user specified a dtype, use that exact dtype in the code
	- If no dtype specified, automatically select the optimal dtype based on hardware:
	* Apple Silicon (MPS): prefer torch.bfloat16
	* NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 based on capability
	* CPU only: use torch.float32
	- Add a comment explaining why that dtype was chosen

	IMPORTANT GUIDELINES:
	- Include all necessary imports
	- Add brief comments explaining optimization choices
	- Use the most current and effective optimization techniques
	- Ensure code is production-ready

	CODE STYLE REQUIREMENTS - GENERATE COMPACT CODE:
	- Assign static values directly to function arguments instead of using variables when possible
	- Minimize variable declarations - inline values where it improves readability
	- Reduce exception handling to essential cases only - assume normal operation
	- Use concise, direct code patterns
	- Combine operations where logical and readable
	- Avoid unnecessary intermediate variables
	- Keep code clean and minimal while maintaining functionality

	Examples of preferred compact style:
	- pipe = Pipeline.from_pretrained("model", torch_dtype=torch.float16) instead of storing dtype in variable
	- image = pipe("prompt", num_inference_steps=4, height=768, width=1360) instead of separate variables
	- Direct assignment: device = "cuda" if torch.cuda.is_available() else "cpu"

	Generate ONLY the Python code, no explanations before or after the code block.
	"""

	return base_prompt

	def run_interactive_mode(self):
	"""Run the generator in interactive mode."""
	print("=== Auto-Diffusers Code Generator ===")
	print("This tool generates optimized diffusers code based on your hardware.\n")

	# Check hardware
	print("=== Hardware Detection ===")
	self.hardware_detector.print_specs()

	use_manual = input("\nUse manual hardware input? (y/n): ").lower() == 'y'

	# Get user inputs
	print("\n=== Model Configuration ===")
	model_name = input("Model name (default: black-forest-labs/FLUX.1-schnell): ").strip()
	if not model_name:
	model_name = "black-forest-labs/FLUX.1-schnell"

	prompt_text = input("Prompt text (default: A cat holding a sign that says hello world): ").strip()
	if not prompt_text:
	prompt_text = "A cat holding a sign that says hello world"

	try:
	width = int(input("Image width (default: 1360): ") or "1360")
	height = int(input("Image height (default: 768): ") or "768")
	steps = int(input("Inference steps (default: 4): ") or "4")
	except ValueError:
	width, height, steps = 1360, 768, 4

	print("\n=== Generating Optimized Code ===")

	# Generate code
	optimized_code = self.generate_optimized_code(
	model_name=model_name,
	prompt_text=prompt_text,
	image_size=(height, width),
	num_inference_steps=steps,
	use_manual_specs=use_manual
	)

	print("\n" + "="*60)
	print("OPTIMIZED DIFFUSERS CODE:")
	print("="*60)
	print(optimized_code)
	print("="*60)


	def main():
	# Get API key from .env file
	api_key = os.getenv('GOOGLE_API_KEY')
	if not api_key:
	api_key = os.getenv('GEMINI_API_KEY') # fallback
	if not api_key:
	api_key = input("Enter your Gemini API key: ").strip()
	if not api_key:
	print("API key is required!")
	return

	generator = AutoDiffusersGenerator(api_key)
	generator.run_interactive_mode()


	if __name__ == "__main__":
	main()