Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Mar 26

Commit

a96c23c

verified ·

1 Parent(s): 0b59358

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -46

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import torch
 from PIL import Image
 import os
 import gc
 # Page config
 st.set_page_config(
@@ -23,7 +25,6 @@ def free_memory():
 # Helper function to check CUDA
 def init_device():
-    """Set the appropriate device and return it"""
     if torch.cuda.is_available():
         st.sidebar.success("✓ GPU available: Using CUDA")
         return "cuda"
@@ -36,31 +37,26 @@ device = init_device()
 @st.cache_resource
 def load_model():
-    """Load model using Unsloth, similar to your notebook code"""
     try:
-        # Import libraries here to ensure they're loaded when needed
-        from peft import PeftModel
-        from unsloth import FastVisionModel
-        st.info("Loading base model and tokenizer using Unsloth...")
-        # Use the same model ID and loading approach that worked in your notebook
-        base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
-        model, tokenizer = FastVisionModel.from_pretrained(
             base_model_id,
-            load_in_4bit=True,
-            torch_dtype=torch.float16,
         )
-        # Set to inference mode
-        FastVisionModel.for_inference(model)
-        # Load the fine-tuned adapter
-        st.info("Loading adapter...")
         adapter_id = "saakshigupta/deepfake-explainer-1"
         model = PeftModel.from_pretrained(model, adapter_id)
-        return model, tokenizer
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
@@ -110,12 +106,12 @@ with st.sidebar:
 # Load model button
 if st.button("Load Model"):
-    with st.spinner("Loading model... this may take a minute."):
         try:
-            model, tokenizer = load_model()
-            if model is not None and tokenizer is not None:
                 st.session_state['model'] = model
-                st.session_state['tokenizer'] = tokenizer
                 st.success("Model loaded successfully!")
             else:
                 st.error("Failed to load model.")
@@ -143,33 +139,20 @@ if uploaded_file is not None:
                 try:
                     # Get components from session state
                     model = st.session_state['model']
-                    tokenizer = st.session_state['tokenizer']
-                    # Format the message for Unsloth - same as your notebook
-                    messages = [
-                        {"role": "user", "content": [
-                            {"type": "image"},
-                            {"type": "text", "text": custom_prompt}
-                        ]}
-                    ]
-                    # Apply chat template
-                    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
-                    # Process with image
-                    inputs = tokenizer(
-                        image,
-                        input_text,
-                        add_special_tokens=False,
-                        return_tensors="pt",
-                    ).to(model.device)
-                    # Apply the cross-attention fix
                     fixed, inputs = fix_processor_outputs(inputs)
                     if fixed:
                         st.info("Fixed cross-attention mask dimensions")
-                    # Generate analysis
                     with torch.no_grad():
                         output_ids = model.generate(
                             **inputs,
@@ -179,11 +162,11 @@ if uploaded_file is not None:
                         )
                     # Decode the output
-                    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-                    # Extract the model's response
-                    if "assistant" in response:
-                        result = response.split("assistant")[-1].strip()
                     else:
                         result = response

 from PIL import Image
 import os
 import gc
+from transformers import AutoProcessor, AutoModelForCausalLM
+from peft import PeftModel
 # Page config
 st.set_page_config(
 # Helper function to check CUDA
 def init_device():
     if torch.cuda.is_available():
         st.sidebar.success("✓ GPU available: Using CUDA")
         return "cuda"
 @st.cache_resource
 def load_model():
+    """Load model without quantization"""
     try:
+        # Using your original base model
+        base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
+        # Load processor
+        processor = AutoProcessor.from_pretrained(base_model_id)
+        # Load the model in half precision (float16) without 4-bit quantization
+        model = AutoModelForCausalLM.from_pretrained(
             base_model_id,
+            device_map="auto",
+            torch_dtype=torch.float16  # Use float16 for memory efficiency
         )
+        # Load adapter
         adapter_id = "saakshigupta/deepfake-explainer-1"
         model = PeftModel.from_pretrained(model, adapter_id)
+        return model, processor
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
 # Load model button
 if st.button("Load Model"):
+    with st.spinner("Loading model... this may take several minutes"):
         try:
+            model, processor = load_model()
+            if model is not None and processor is not None:
                 st.session_state['model'] = model
+                st.session_state['processor'] = processor
                 st.success("Model loaded successfully!")
             else:
                 st.error("Failed to load model.")
                 try:
                     # Get components from session state
                     model = st.session_state['model']
+                    processor = st.session_state['processor']
+                    # Process the image using the processor
+                    inputs = processor(text=custom_prompt, images=image, return_tensors="pt")
+                    # Fix cross-attention mask if needed
                     fixed, inputs = fix_processor_outputs(inputs)
                     if fixed:
                         st.info("Fixed cross-attention mask dimensions")
+                    # Move to device
+                    inputs = {k: v.to(model.device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
+                    # Generate the analysis
                     with torch.no_grad():
                         output_ids = model.generate(
                             **inputs,
                         )
                     # Decode the output
+                    response = processor.decode(output_ids[0], skip_special_tokens=True)
+                    # Extract the actual response (removing the prompt)
+                    if custom_prompt in response:
+                        result = response.split(custom_prompt)[-1].strip()
                     else:
                         result = response