Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

saakshigupta commited on Apr 2

Commit

9d19a1f

verified ·

1 Parent(s): cca832d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ device = init_device()
 @st.cache_resource
 def load_model():
-    """Load model with proper quantization handling"""
     try:
         # Using your original base model
         base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
@@ -45,16 +45,10 @@ def load_model():
         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
-        # Load the model with proper quantization settings
         model = AutoModelForCausalLM.from_pretrained(
             base_model_id,
             device_map="auto",
-            quantization_config=BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_compute_dtype=torch.float16,
-                bnb_4bit_use_double_quant=True,
-                bnb_4bit_quant_type="nf4"
-            ),
             torch_dtype=torch.float16
         )

 @st.cache_resource
 def load_model():
+    """Load pre-quantized model"""
     try:
         # Using your original base model
         base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
+        # Load the pre-quantized model
         model = AutoModelForCausalLM.from_pretrained(
             base_model_id,
             device_map="auto",
             torch_dtype=torch.float16
         )