saakshigupta commited on
Commit
9d19a1f
·
verified ·
1 Parent(s): cca832d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -8
app.py CHANGED
@@ -37,7 +37,7 @@ device = init_device()
37
 
38
  @st.cache_resource
39
  def load_model():
40
- """Load model with proper quantization handling"""
41
  try:
42
  # Using your original base model
43
  base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
@@ -45,16 +45,10 @@ def load_model():
45
  # Load processor
46
  processor = AutoProcessor.from_pretrained(base_model_id)
47
 
48
- # Load the model with proper quantization settings
49
  model = AutoModelForCausalLM.from_pretrained(
50
  base_model_id,
51
  device_map="auto",
52
- quantization_config=BitsAndBytesConfig(
53
- load_in_4bit=True,
54
- bnb_4bit_compute_dtype=torch.float16,
55
- bnb_4bit_use_double_quant=True,
56
- bnb_4bit_quant_type="nf4"
57
- ),
58
  torch_dtype=torch.float16
59
  )
60
 
 
37
 
38
  @st.cache_resource
39
  def load_model():
40
+ """Load pre-quantized model"""
41
  try:
42
  # Using your original base model
43
  base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
 
45
  # Load processor
46
  processor = AutoProcessor.from_pretrained(base_model_id)
47
 
48
+ # Load the pre-quantized model
49
  model = AutoModelForCausalLM.from_pretrained(
50
  base_model_id,
51
  device_map="auto",
 
 
 
 
 
 
52
  torch_dtype=torch.float16
53
  )
54