Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

saakshigupta commited on Apr 2

Commit

24cd4f4

verified ·

1 Parent(s): 473874a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ device = init_device()
 @st.cache_resource
 def load_model():
-    """Load model without quantization"""
     try:
         # Using your original base model
         base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
@@ -45,12 +45,22 @@ def load_model():
         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
-        # Load the model in half precision (float16) without 4-bit quantization
-        model = AutoModelForCausalLM.from_pretrained(
-            base_model_id,
-            device_map="auto",
-            torch_dtype=torch.float16  # Use float16 for memory efficiency
-        )
         # Load adapter
         adapter_id = "saakshigupta/deepfake-explainer-1"

 @st.cache_resource
 def load_model():
+    """Load model with fallback options for quantization"""
     try:
         # Using your original base model
         base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
+        # Try to load with 4-bit quantization first
+        try:
+            import bitsandbytes
+            model = AutoModelForCausalLM.from_pretrained(
+                base_model_id,
+                device_map="auto",
+                load_in_4bit=True,
+                torch_dtype=torch.float16
+            )
+        except ImportError:
+            st.warning("bitsandbytes not available. Falling back to float16 precision.")
+            model = AutoModelForCausalLM.from_pretrained(
+                base_model_id,
+                device_map="auto",
+                torch_dtype=torch.float16
+            )
         # Load adapter
         adapter_id = "saakshigupta/deepfake-explainer-1"