manbeast3b
/

fire_stang2

Model card Files Files and versions Community

manbeast3b commited on Feb 23

Commit

8f24c5c

verified ·

1 Parent(s): 065ed72

Update src/pipeline.py

Browse files

Files changed (1) hide show

src/pipeline.py +311 -136

src/pipeline.py CHANGED Viewed

@@ -1,141 +1,330 @@
 import os
-import gc
-import time
 import torch
-import torch.nn.functional as F
-from PIL import Image as img
 from PIL.Image import Image
-from typing import Optional, Type
-from dataclasses import dataclass
-from diffusers import (
-    FluxTransformer2DModel,
-    DiffusionPipeline,
-    AutoencoderTiny
-)
 from transformers import T5EncoderModel
 from huggingface_hub.constants import HF_HUB_CACHE
 from torchao.quantization import quantize_, int8_weight_only, float8_weight_only
 from pipelines.models import TextToImageRequest
-from torch import Generator
-from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
-# Configuration
 @dataclass
-class Config:
-    CKPT_ID: str = "black-forest-labs/FLUX.1-schnell"
-    CKPT_REVISION: str = "741f7c3ce8b383c54771c7003378a50191e9efe9"
-    DEVICE: str = "cuda"
-    DTYPE = torch.bfloat16
-    PYTORCH_CUDA_ALLOC_CONF: str = "expandable_segments:True"
-# Initialize global settings
-def init_global_settings():
-    torch.backends.cuda.matmul.allow_tf32 = True
-    torch.backends.cudnn.enabled = True
-    torch.backends.cudnn.benchmark = True
-    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = Config.PYTORCH_CUDA_ALLOC_CONF
-# Tensor comparison utilities
-class TensorComparator:
-    @staticmethod
-    def orig_comparison(t1, t2, *, threshold=0.85):
-        mean_diff = (t1 - t2).abs().mean()
-        mean_t1 = t1.abs().mean()
-        diff = mean_diff / mean_t1
-        return diff.item() < threshold
-    @staticmethod
-    def mse_comparison(t1, t2, threshold=0.95):
-        mse = F.mse_loss(t1, t2)
-        return mse.item() < threshold
-    @staticmethod
-    def relative_comparison(t1, t2, threshold=0.15):
-        with torch.no_grad():
-            mean_diff = torch.mean(torch.abs(t1 - t2))
-            mean_t1 = torch.mean(torch.abs(t1))
-            relative_diff = mean_diff / (mean_t1 + 1e-8)
-            return relative_diff.item() < threshold
-    @staticmethod
-    def normalized_comparison(t1, t2, threshold=0.85):
-        with torch.no_grad():
-            t1_norm = (t1 - t1.mean()) / (t1.std() + 1e-8)
-            t2_norm = (t2 - t2.mean()) / (t2.std() + 1e-8)
-            diff = torch.mean(torch.abs(t1_norm - t2_norm))
-            return diff.item() < threshold
-    @staticmethod
-    def l1_comparison(t1, t2, threshold=0.85):
-        with torch.no_grad():
-            l1_dist = torch.nn.L1Loss()(t1, t2)
-            return l1_dist.item() < threshold
-    @staticmethod
-    def max_diff_comparison(t1, t2, threshold=0.85):
-        with torch.no_grad():
-            max_diff = torch.max(torch.abs(t1 - t2))
-            return max_diff.item() < threshold
-# Memory management
-class MemoryManager:
-    @staticmethod
-    def empty_cache():
-        gc.collect()
-        torch.cuda.empty_cache()
-        torch.cuda.reset_max_memory_allocated()
-        torch.cuda.reset_peak_memory_stats()
-# Pipeline management
-class PipelineManager:
-    @staticmethod
-    def load_pipeline() -> DiffusionPipeline:
-        MemoryManager.empty_cache()
-        text_encoder_2 = T5EncoderModel.from_pretrained(
-            "city96/t5-v1_1-xxl-encoder-bf16",
-            revision="1b9c856aadb864af93c1dcdc226c2774fa67bc86",
-            torch_dtype=Config.DTYPE
-        ).to(memory_format=torch.channels_last)
-        vae = AutoencoderTiny.from_pretrained("RobertML/FLUX.1-schnell-vae_e3m2", revision="da0d2cd7815792fb40d084dbd8ed32b63f153d8d", torch_dtype=Config.DTYPE)
-        # vae.encoder=_load(vae.encoder, "E", dtype=torch.bfloat16); vae.decoder=_load(vae.decoder, "D", dtype=torch.bfloat16)
-        path = os.path.join(HF_HUB_CACHE, "models--RobertML--FLUX.1-schnell-int8wo/snapshots/307e0777d92df966a3c0f99f31a6ee8957a9857a")
-        model = FluxTransformer2DModel.from_pretrained(
-            path,
-            torch_dtype=Config.DTYPE,
-            use_safetensors=False
-        ).to(memory_format=torch.channels_last)
         pipeline = DiffusionPipeline.from_pretrained(
-            Config.CKPT_ID,
-            vae=vae,
-            revision=Config.CKPT_REVISION,
-            transformer=model,
-            text_encoder_2=text_encoder_2,
-            torch_dtype=Config.DTYPE,
-        ).to(Config.DEVICE)
-        apply_cache_on_pipe(pipeline)
-        pipeline.to(memory_format=torch.channels_last)
-        pipeline.vae = torch.compile(pipeline.vae, mode="max-autotune")
-        quantize_(pipeline.vae, int8_weight_only())
-        quantize_(pipeline.vae, float8_weight_only())
-        PipelineManager._warmup(pipeline)
         return pipeline
-    @staticmethod
-    def _warmup(pipeline):
-        for _ in range(3):
-            pipeline(prompt=" ")
-    @staticmethod
-    @torch.no_grad()
-    def infer(request: TextToImageRequest, pipeline: DiffusionPipeline, generator: Generator) -> Image:
-        image = pipeline(
-            request.prompt,
             generator=generator,
             guidance_scale=0.0,
             num_inference_steps=4,
@@ -143,18 +332,4 @@ class PipelineManager:
             height=request.height,
             width=request.width,
             output_type="pil"
-        ).images[0]
-        return image
-# Initialize global settings
-init_global_settings()
-# Keep original interface
-load_pipeline = PipelineManager.load_pipeline
-infer = PipelineManager.infer
-are_two_tensors_similar = TensorComparator.orig_comparison
-are_two_tensors_similar_relative = TensorComparator.relative_comparison
-are_two_tensors_similar_normalized = TensorComparator.normalized_comparison
-are_two_tensors_similar_l1 = TensorComparator.l1_comparison
-are_two_tensors_similar_max_diff = TensorComparator.max_diff_comparison
-empty_cache = MemoryManager.empty_cache

+# import os
+# import gc
+# import time
+# import torch
+# import torch.nn.functional as F
+# from PIL import Image as img
+# from PIL.Image import Image
+# from typing import Optional, Type
+# from dataclasses import dataclass
+# from diffusers import (
+#     FluxTransformer2DModel,
+#     DiffusionPipeline,
+#     AutoencoderTiny
+# )
+# from transformers import T5EncoderModel
+# from huggingface_hub.constants import HF_HUB_CACHE
+# from torchao.quantization import quantize_, int8_weight_only, float8_weight_only
+# from pipelines.models import TextToImageRequest
+# from torch import Generator
+# from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
+# # Configuration
+# @dataclass
+# class Config:
+#     CKPT_ID: str = "black-forest-labs/FLUX.1-schnell"
+#     CKPT_REVISION: str = "741f7c3ce8b383c54771c7003378a50191e9efe9"
+#     DEVICE: str = "cuda"
+#     DTYPE = torch.bfloat16
+#     PYTORCH_CUDA_ALLOC_CONF: str = "expandable_segments:True"
+# # Initialize global settings
+# def init_global_settings():
+#     torch.backends.cuda.matmul.allow_tf32 = True
+#     torch.backends.cudnn.enabled = True
+#     torch.backends.cudnn.benchmark = True
+#     os.environ['PYTORCH_CUDA_ALLOC_CONF'] = Config.PYTORCH_CUDA_ALLOC_CONF
+# # Tensor comparison utilities
+# class TensorComparator:
+#     @staticmethod
+#     def orig_comparison(t1, t2, *, threshold=0.85):
+#         mean_diff = (t1 - t2).abs().mean()
+#         mean_t1 = t1.abs().mean()
+#         diff = mean_diff / mean_t1
+#         return diff.item() < threshold
+#     @staticmethod
+#     def mse_comparison(t1, t2, threshold=0.95):
+#         mse = F.mse_loss(t1, t2)
+#         return mse.item() < threshold
+#     @staticmethod
+#     def relative_comparison(t1, t2, threshold=0.15):
+#         with torch.no_grad():
+#             mean_diff = torch.mean(torch.abs(t1 - t2))
+#             mean_t1 = torch.mean(torch.abs(t1))
+#             relative_diff = mean_diff / (mean_t1 + 1e-8)
+#             return relative_diff.item() < threshold
+#     @staticmethod
+#     def normalized_comparison(t1, t2, threshold=0.85):
+#         with torch.no_grad():
+#             t1_norm = (t1 - t1.mean()) / (t1.std() + 1e-8)
+#             t2_norm = (t2 - t2.mean()) / (t2.std() + 1e-8)
+#             diff = torch.mean(torch.abs(t1_norm - t2_norm))
+#             return diff.item() < threshold
+#     @staticmethod
+#     def l1_comparison(t1, t2, threshold=0.85):
+#         with torch.no_grad():
+#             l1_dist = torch.nn.L1Loss()(t1, t2)
+#             return l1_dist.item() < threshold
+#     @staticmethod
+#     def max_diff_comparison(t1, t2, threshold=0.85):
+#         with torch.no_grad():
+#             max_diff = torch.max(torch.abs(t1 - t2))
+#             return max_diff.item() < threshold
+# # Memory management
+# class MemoryManager:
+#     @staticmethod
+#     def empty_cache():
+#         gc.collect()
+#         torch.cuda.empty_cache()
+#         torch.cuda.reset_max_memory_allocated()
+#         torch.cuda.reset_peak_memory_stats()
+# # Pipeline management
+# class PipelineManager:
+#     @staticmethod
+#     def load_pipeline() -> DiffusionPipeline:
+#         MemoryManager.empty_cache()
+#         text_encoder_2 = T5EncoderModel.from_pretrained(
+#             "city96/t5-v1_1-xxl-encoder-bf16",
+#             revision="1b9c856aadb864af93c1dcdc226c2774fa67bc86",
+#             torch_dtype=Config.DTYPE
+#         ).to(memory_format=torch.channels_last)
+#         vae = AutoencoderTiny.from_pretrained("RobertML/FLUX.1-schnell-vae_e3m2", revision="da0d2cd7815792fb40d084dbd8ed32b63f153d8d", torch_dtype=Config.DTYPE)
+#         # vae.encoder=_load(vae.encoder, "E", dtype=torch.bfloat16); vae.decoder=_load(vae.decoder, "D", dtype=torch.bfloat16)
+#         path = os.path.join(HF_HUB_CACHE, "models--RobertML--FLUX.1-schnell-int8wo/snapshots/307e0777d92df966a3c0f99f31a6ee8957a9857a")
+#         model = FluxTransformer2DModel.from_pretrained(
+#             path,
+#             torch_dtype=Config.DTYPE,
+#             use_safetensors=False
+#         ).to(memory_format=torch.channels_last)
+#         pipeline = DiffusionPipeline.from_pretrained(
+#             Config.CKPT_ID,
+#             vae=vae,
+#             revision=Config.CKPT_REVISION,
+#             transformer=model,
+#             text_encoder_2=text_encoder_2,
+#             torch_dtype=Config.DTYPE,
+#         ).to(Config.DEVICE)
+#         apply_cache_on_pipe(pipeline)
+#         pipeline.to(memory_format=torch.channels_last)
+#         pipeline.vae = torch.compile(pipeline.vae, mode="max-autotune")
+#         quantize_(pipeline.vae, int8_weight_only())
+#         quantize_(pipeline.vae, float8_weight_only())
+#         PipelineManager._warmup(pipeline)
+#         return pipeline
+#     @staticmethod
+#     def _warmup(pipeline):
+#         for _ in range(3):
+#             pipeline(prompt=" ")
+#     @staticmethod
+#     @torch.no_grad()
+#     def infer(request: TextToImageRequest, pipeline: DiffusionPipeline, generator: Generator) -> Image:
+#         image = pipeline(
+#             request.prompt,
+#             generator=generator,
+#             guidance_scale=0.0,
+#             num_inference_steps=4,
+#             max_sequence_length=256,
+#             height=request.height,
+#             width=request.width,
+#             output_type="pil"
+#         ).images[0]
+#         return image
+# # Initialize global settings
+# init_global_settings()
+# # Keep original interface
+# load_pipeline = PipelineManager.load_pipeline
+# infer = PipelineManager.infer
+# are_two_tensors_similar = TensorComparator.orig_comparison
+# are_two_tensors_similar_relative = TensorComparator.relative_comparison
+# are_two_tensors_similar_normalized = TensorComparator.normalized_comparison
+# are_two_tensors_similar_l1 = TensorComparator.l1_comparison
+# are_two_tensors_similar_max_diff = TensorComparator.max_diff_comparison
+# empty_cache = MemoryManager.empty_cache
+from __future__ import annotations
 import os
 import torch
+import functools
+from enum import Enum, auto
+from contextlib import contextmanager
+from typing import Protocol, TypeVar, Generic, Callable, Any
+from dataclasses import dataclass, field
 from PIL.Image import Image
+from torch import Generator
+from diffusers import DiffusionPipeline, AutoencoderTiny, FluxTransformer2DModel
 from transformers import T5EncoderModel
 from huggingface_hub.constants import HF_HUB_CACHE
 from torchao.quantization import quantize_, int8_weight_only, float8_weight_only
+from first_block_cache.diffusers_adapters import apply_cache_on_pipe
 from pipelines.models import TextToImageRequest
+T = TypeVar('T')
+class ModelComponent(Protocol):
+    def to(self, *args, **kwargs) -> Any: ...
+class ModelState(Enum):
+    INITIALIZED = auto()
+    LOADED = auto()
+    OPTIMIZED = auto()
+    READY = auto()
+class ResourceMonitor:
+    """Monitors and manages system resources."""
+    @contextmanager
+    def monitor_memory(self, threshold_mb: int = 1000):
+        initial_memory = torch.cuda.memory_allocated() / 1024**2
+        yield
+        final_memory = torch.cuda.memory_allocated() / 1024**2
+        if final_memory - initial_memory > threshold_mb:
+            torch.cuda.empty_cache()
 @dataclass
+class ModelRegistry(Generic[T]):
+    """Type-safe registry for model components."""
+    _components: dict[str, T] = field(default_factory=dict)
+    def register(self, name: str, component: T) -> None:
+        self._components[name] = component
+    def get(self, name: str) -> T:
+        return self._components[name]
+    def __iter__(self):
+        return iter(self._components.values())
+class PipelineBuilder:
+    """Fluent builder for pipeline construction."""
+    def __init__(self):
+        self.config = {
+            "model_id": "black-forest-labs/FLUX.1-schnell",
+            "revision": "741f7c3ce8b383c54771c7003378a50191e9efe9",
+            "device": "cuda",
+            "dtype": torch.bfloat16
+        }
+        self.registry = ModelRegistry[ModelComponent]()
+        self.state = ModelState.INITIALIZED
+        self.monitor = ResourceMonitor()
+    def with_torch_settings(self) -> PipelineBuilder:
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.enabled = True
+        torch.backends.cudnn.benchmark = True
+        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "expandable_segments:True"
+        return self
+    def load_text_encoder(self) -> PipelineBuilder:
+        with self.monitor.monitor_memory():
+            encoder = T5EncoderModel.from_pretrained(
+                "city96/t5-v1_1-xxl-encoder-bf16",
+                revision="1b9c856aadb864af93c1dcdc226c2774fa67bc86",
+                torch_dtype=self.config["dtype"]
+            ).to(memory_format=torch.channels_last)
+            self.registry.register("text_encoder", encoder)
+        return self
+    def load_vae(self) -> PipelineBuilder:
+        with self.monitor.monitor_memory():
+            vae = AutoencoderTiny.from_pretrained(
+                "RobertML/FLUX.1-schnell-vae_e3m2",
+                revision="da0d2cd7815792fb40d084dbd8ed32b63f153d8d",
+                torch_dtype=self.config["dtype"]
+            )
+            self.registry.register("vae", vae)
+        return self
+    def load_transformer(self) -> PipelineBuilder:
+        with self.monitor.monitor_memory():
+            path = os.path.join(
+                HF_HUB_CACHE,
+                "models--RobertML--FLUX.1-schnell-int8wo/snapshots/307e0777d92df966a3c0f99f31a6ee8957a9857a"
+            )
+            transformer = FluxTransformer2DModel.from_pretrained(
+                path,
+                torch_dtype=self.config["dtype"],
+                use_safetensors=False
+            ).to(memory_format=torch.channels_last)
+            self.registry.register("transformer", transformer)
+        return self
+    def optimize(self, pipeline: DiffusionPipeline) -> PipelineBuilder:
+        with self.monitor.monitor_memory():
+            pipeline.to(memory_format=torch.channels_last)
+            pipeline.vae = torch.compile(pipeline.vae, mode="max-autotune")
+            quantize_(pipeline.vae, int8_weight_only())
+            quantize_(pipeline.vae, float8_weight_only())
+            apply_cache_on_pipe(pipeline)
+        return self
+    def warmup(self, pipeline: DiffusionPipeline) -> PipelineBuilder:
+        with torch.no_grad(), self.monitor.monitor_memory():
+            for _ in range(3):
+                pipeline(prompt=" ")
+        return self
+    def build(self) -> DiffusionPipeline:
         pipeline = DiffusionPipeline.from_pretrained(
+            self.config["model_id"],
+            vae=self.registry.get("vae"),
+            revision=self.config["revision"],
+            transformer=self.registry.get("transformer"),
+            text_encoder_2=self.registry.get("text_encoder"),
+            torch_dtype=self.config["dtype"],
+        ).to(self.config["device"])
+        self.optimize(pipeline)
+        self.warmup(pipeline)
+        self.state = ModelState.READY
         return pipeline
+class InferenceContext:
+    """Context manager for inference operations."""
+    def __init__(self, pipeline: DiffusionPipeline):
+        self.pipeline = pipeline
+        self.monitor = ResourceMonitor()
+    @contextmanager
+    def inference_mode(self):
+        with torch.no_grad(), self.monitor.monitor_memory():
+            yield self.pipeline
+def load() -> DiffusionPipeline:
+    """Build and configure the pipeline using the fluent builder pattern."""
+    return (PipelineBuilder()
+            .with_torch_settings()
+            .load_text_encoder()
+            .load_vae()
+            .load_transformer()
+            .build())
+def infer(request: TextToImageRequest, pipeline: DiffusionPipeline, generator: Generator) -> Image:
+    """Generate image using the pipeline within a managed context."""
+    context = InferenceContext(pipeline)
+    with context.inference_mode() as p:
+        return p(
+            prompt=request.prompt,
             generator=generator,
             guidance_scale=0.0,
             num_inference_steps=4,
             height=request.height,
             width=request.width,
             output_type="pil"
+        ).images[0]