Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Mar 26

Commit

9938609

verified ·

1 Parent(s): 34b363c

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -36

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import streamlit as st
 import torch
-import os
 from PIL import Image
-from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
 from peft import PeftModel
 import gc
 # Page config
 st.set_page_config(
@@ -38,35 +37,30 @@ device = init_device()
 @st.cache_resource
 def load_model():
-    """Load model and processor with proper dtype settings"""
     try:
-        # Load base model
-        base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
-        # Load processor first
-        processor = AutoProcessor.from_pretrained(base_model_id)
-        # Configure quantization explicitly with float16
-        quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.float16,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_use_double_quant=True
         )
-        # Load model with explicit dtype settings
-        model = AutoModelForCausalLM.from_pretrained(
-            base_model_id,
-            device_map="auto",
-            torch_dtype=torch.float16,  # Explicit float16
-            quantization_config=quantization_config
-        )
-        # Load adapter
         adapter_id = "saakshigupta/deepfake-explainer-1"
         model = PeftModel.from_pretrained(model, adapter_id)
-        return model, processor
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
@@ -117,10 +111,10 @@ with st.sidebar:
 # Load model on startup
 with st.spinner("Loading model... this may take a minute."):
     try:
-        model, processor = load_model()
-        if model is not None and processor is not None:
             st.session_state['model'] = model
-            st.session_state['processor'] = processor
             st.success("Model loaded successfully!")
         else:
             st.error("Failed to load model.")
@@ -145,20 +139,33 @@ if uploaded_file is not None and model_loaded:
             try:
                 # Get components from session state
                 model = st.session_state['model']
-                processor = st.session_state['processor']
-                # Process the image
-                inputs = processor(text=custom_prompt, images=image, return_tensors="pt")
-                # Fix cross-attention mask
                 fixed, inputs = fix_processor_outputs(inputs)
                 if fixed:
                     st.info("Fixed cross-attention mask dimensions")
-                # Move to device
-                inputs = {k: v.to(model.device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
-                # Generate the analysis
                 with torch.no_grad():
                     output_ids = model.generate(
                         **inputs,
@@ -168,11 +175,12 @@ if uploaded_file is not None and model_loaded:
                     )
                 # Decode the output
-                response = processor.decode(output_ids[0], skip_special_tokens=True)
-                # Extract the actual response (removing the prompt)
-                if custom_prompt in response:
-                    result = response.split(custom_prompt)[-1].strip()
                 else:
                     result = response

 import streamlit as st
 import torch
 from PIL import Image
 from peft import PeftModel
 import gc
+import os
 # Page config
 st.set_page_config(
 @st.cache_resource
 def load_model():
+    """Load model using Unsloth, similar to your notebook code"""
     try:
+        # Import Unsloth here to ensure it's loaded when needed
+        from unsloth import FastVisionModel
+        st.info("Loading base model and tokenizer using Unsloth...")
+        # Use the same model ID and loading approach that worked in your notebook
+        base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
+        model, tokenizer = FastVisionModel.from_pretrained(
+            base_model_id,
             load_in_4bit=True,
+            torch_dtype=torch.float16,
         )
+        # Set to inference mode
+        FastVisionModel.for_inference(model)
+        # Load the fine-tuned adapter
+        st.info("Loading adapter...")
         adapter_id = "saakshigupta/deepfake-explainer-1"
         model = PeftModel.from_pretrained(model, adapter_id)
+        return model, tokenizer
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
 # Load model on startup
 with st.spinner("Loading model... this may take a minute."):
     try:
+        model, tokenizer = load_model()
+        if model is not None and tokenizer is not None:
             st.session_state['model'] = model
+            st.session_state['tokenizer'] = tokenizer
             st.success("Model loaded successfully!")
         else:
             st.error("Failed to load model.")
             try:
                 # Get components from session state
                 model = st.session_state['model']
+                tokenizer = st.session_state['tokenizer']
+                # Format the message for Unsloth - same as your notebook
+                messages = [
+                    {"role": "user", "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": custom_prompt}
+                    ]}
+                ]
+                # Apply chat template
+                input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
+                # Process with image
+                inputs = tokenizer(
+                    image,
+                    input_text,
+                    add_special_tokens=False,
+                    return_tensors="pt",
+                ).to(model.device)
+                # Apply the cross-attention fix
                 fixed, inputs = fix_processor_outputs(inputs)
                 if fixed:
                     st.info("Fixed cross-attention mask dimensions")
+                # Generate analysis
                 with torch.no_grad():
                     output_ids = model.generate(
                         **inputs,
                     )
                 # Decode the output
+                response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+                # Extract the model's response
+                # Format might be different from processor.decode, check the output
+                if "assistant" in response:
+                    result = response.split("assistant")[-1].strip()
                 else:
                     result = response