saakshigupta commited on
Commit
8fc5d75
·
verified ·
1 Parent(s): f594abc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -17
app.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  from PIL import Image
4
  import os
5
  import gc
6
- from transformers import AutoProcessor, AutoModelForCausalLM
7
  from peft import PeftModel
8
 
9
  # Page config
@@ -37,7 +37,7 @@ device = init_device()
37
 
38
  @st.cache_resource
39
  def load_model():
40
- """Load model with fallback options for quantization"""
41
  try:
42
  # Using your original base model
43
  base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
@@ -45,22 +45,19 @@ def load_model():
45
  # Load processor
46
  processor = AutoProcessor.from_pretrained(base_model_id)
47
 
48
- # Try to load with 4-bit quantization first
49
- try:
50
- import bitsandbytes
51
- model = AutoModelForCausalLM.from_pretrained(
52
- base_model_id,
53
- device_map="auto",
54
  load_in_4bit=True,
55
- torch_dtype=torch.float16
56
- )
57
- except ImportError:
58
- st.warning("bitsandbytes not available. Falling back to float16 precision.")
59
- model = AutoModelForCausalLM.from_pretrained(
60
- base_model_id,
61
- device_map="auto",
62
- torch_dtype=torch.float16
63
- )
64
 
65
  # Load adapter
66
  adapter_id = "saakshigupta/deepfake-explainer-1"
 
3
  from PIL import Image
4
  import os
5
  import gc
6
+ from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
7
  from peft import PeftModel
8
 
9
  # Page config
 
37
 
38
  @st.cache_resource
39
  def load_model():
40
+ """Load model with proper quantization handling"""
41
  try:
42
  # Using your original base model
43
  base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
 
45
  # Load processor
46
  processor = AutoProcessor.from_pretrained(base_model_id)
47
 
48
+ # Load the model with proper quantization settings
49
+ model = AutoModelForCausalLM.from_pretrained(
50
+ base_model_id,
51
+ device_map="auto",
52
+ load_in_4bit=True,
53
+ quantization_config=BitsAndBytesConfig(
54
  load_in_4bit=True,
55
+ bnb_4bit_compute_dtype=torch.float16,
56
+ bnb_4bit_use_double_quant=True,
57
+ bnb_4bit_quant_type="nf4"
58
+ ),
59
+ torch_dtype=torch.float16
60
+ )
 
 
 
61
 
62
  # Load adapter
63
  adapter_id = "saakshigupta/deepfake-explainer-1"