Update server.py
Browse files
server.py
CHANGED
@@ -3,7 +3,6 @@ from fastapi import Response
|
|
3 |
import torch
|
4 |
import time
|
5 |
import litserve as ls
|
6 |
-
from optimum.quanto import freeze, qfloat8, quantize
|
7 |
from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
|
8 |
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
|
9 |
from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
|
@@ -20,12 +19,6 @@ class FluxLitAPI(ls.LitAPI):
|
|
20 |
vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-schnell", subfolder="vae", torch_dtype=torch.bfloat16, revision="refs/pr/1")
|
21 |
transformer = FluxTransformer2DModel.from_pretrained("black-forest-labs/FLUX.1-schnell", subfolder="transformer", torch_dtype=torch.bfloat16, revision="refs/pr/1")
|
22 |
|
23 |
-
# quantize to 8-bit to fit on an L4
|
24 |
-
quantize(transformer, weights=qfloat8)
|
25 |
-
freeze(transformer)
|
26 |
-
quantize(text_encoder_2, weights=qfloat8)
|
27 |
-
freeze(text_encoder_2)
|
28 |
-
|
29 |
self.pipe = FluxPipeline(
|
30 |
scheduler=scheduler,
|
31 |
text_encoder=text_encoder,
|
|
|
3 |
import torch
|
4 |
import time
|
5 |
import litserve as ls
|
|
|
6 |
from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
|
7 |
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
|
8 |
from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
|
|
|
19 |
vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-schnell", subfolder="vae", torch_dtype=torch.bfloat16, revision="refs/pr/1")
|
20 |
transformer = FluxTransformer2DModel.from_pretrained("black-forest-labs/FLUX.1-schnell", subfolder="transformer", torch_dtype=torch.bfloat16, revision="refs/pr/1")
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
self.pipe = FluxPipeline(
|
23 |
scheduler=scheduler,
|
24 |
text_encoder=text_encoder,
|