blanchon commited on
Commit
3001072
·
1 Parent(s): c724ff4
Files changed (1) hide show
  1. app-fast.py +4 -4
app-fast.py CHANGED
@@ -40,7 +40,6 @@ RESOLUTION_OPTIONS: list[str] = [
40
  ]
41
 
42
 
43
- # Using AOBaseConfig instance (torchao >= 0.10.0)
44
  quant_config = Int4WeightOnlyConfig(group_size=128)
45
  quantization_config = TransformersTorchAoConfig(
46
  quant_type=quant_config, dtype=torch.bfloat16
@@ -51,19 +50,20 @@ text_encoder = AutoModelForCausalLM.from_pretrained(
51
  LLAMA_MODEL_NAME,
52
  torch_dtype=torch.bfloat16,
53
  low_cpu_mem_usage=True,
54
- device_map=None,
55
  output_hidden_states=True,
56
  output_attentions=True,
57
  quantization_config=quantization_config,
58
- ).to("cuda", torch.bfloat16)
59
 
60
  quantization_config = DiffusersTorchAoConfig("int8wo")
61
  transformer = HiDreamImageTransformer2DModel.from_pretrained(
62
  MODEL_PATH,
63
  subfolder="transformer",
 
64
  quantization_config=quantization_config,
65
  torch_dtype=torch.bfloat16,
66
- ).to("cuda", dtype=torch.float16)
67
 
68
  scheduler = MODEL_CONFIGS["scheduler"](
69
  num_train_timesteps=1000,
 
40
  ]
41
 
42
 
 
43
  quant_config = Int4WeightOnlyConfig(group_size=128)
44
  quantization_config = TransformersTorchAoConfig(
45
  quant_type=quant_config, dtype=torch.bfloat16
 
50
  LLAMA_MODEL_NAME,
51
  torch_dtype=torch.bfloat16,
52
  low_cpu_mem_usage=True,
53
+ device_map="auto", # ✅ load directly onto GPU
54
  output_hidden_states=True,
55
  output_attentions=True,
56
  quantization_config=quantization_config,
57
+ )
58
 
59
  quantization_config = DiffusersTorchAoConfig("int8wo")
60
  transformer = HiDreamImageTransformer2DModel.from_pretrained(
61
  MODEL_PATH,
62
  subfolder="transformer",
63
+ device_map="auto", # ✅ load directly onto GPU
64
  quantization_config=quantization_config,
65
  torch_dtype=torch.bfloat16,
66
+ )
67
 
68
  scheduler = MODEL_CONFIGS["scheduler"](
69
  num_train_timesteps=1000,