blanchon commited on
Commit
f5ce168
·
1 Parent(s): 3001072
Files changed (1) hide show
  1. app-fast.py +6 -6
app-fast.py CHANGED
@@ -48,9 +48,9 @@ quantization_config = TransformersTorchAoConfig(
48
  tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
49
  text_encoder = AutoModelForCausalLM.from_pretrained(
50
  LLAMA_MODEL_NAME,
51
- torch_dtype=torch.bfloat16,
52
  low_cpu_mem_usage=True,
53
- device_map="auto", # ✅ load directly onto GPU
54
  output_hidden_states=True,
55
  output_attentions=True,
56
  quantization_config=quantization_config,
@@ -60,9 +60,9 @@ quantization_config = DiffusersTorchAoConfig("int8wo")
60
  transformer = HiDreamImageTransformer2DModel.from_pretrained(
61
  MODEL_PATH,
62
  subfolder="transformer",
63
- device_map="auto", # ✅ load directly onto GPU
64
  quantization_config=quantization_config,
65
- torch_dtype=torch.bfloat16,
66
  )
67
 
68
  scheduler = MODEL_CONFIGS["scheduler"](
@@ -76,8 +76,8 @@ pipe = HiDreamImagePipeline.from_pretrained(
76
  scheduler=scheduler,
77
  tokenizer_4=tokenizer,
78
  text_encoder_4=text_encoder,
79
- torch_dtype=torch.bfloat16,
80
- ).to("cuda", torch.bfloat16)
81
 
82
  pipe.transformer = transformer
83
 
 
48
  tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
49
  text_encoder = AutoModelForCausalLM.from_pretrained(
50
  LLAMA_MODEL_NAME,
51
+ torch_dtype="auto",
52
  low_cpu_mem_usage=True,
53
+ device_map="auto",
54
  output_hidden_states=True,
55
  output_attentions=True,
56
  quantization_config=quantization_config,
 
60
  transformer = HiDreamImageTransformer2DModel.from_pretrained(
61
  MODEL_PATH,
62
  subfolder="transformer",
63
+ device_map="auto",
64
  quantization_config=quantization_config,
65
+ torch_dtype="auto",
66
  )
67
 
68
  scheduler = MODEL_CONFIGS["scheduler"](
 
76
  scheduler=scheduler,
77
  tokenizer_4=tokenizer,
78
  text_encoder_4=text_encoder,
79
+ torch_dtype="auto",
80
+ )
81
 
82
  pipe.transformer = transformer
83