Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Apr 6

Commit

6e58aae

verified ·

1 Parent(s): eab7cdf

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -149

app.py CHANGED Viewed

@@ -1,174 +1,128 @@
 import streamlit as st
 import torch
 from PIL import Image
-import gc
-from transformers import AutoProcessor
 from peft import PeftModel
 from unsloth import FastVisionModel
-# Simple page config
 st.set_page_config(page_title="Deepfake Analyzer", layout="wide")
-# Minimal UI
 st.title("Deepfake Image Analyzer")
-st.markdown("This app analyzes images for signs of deepfake manipulation")
-# Function to free up memory
-def free_memory():
-    gc.collect()
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
 # Function to fix cross-attention masks
-def fix_processor_outputs(inputs):
-    """Fix cross-attention mask dimensions if needed"""
     if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape:
         batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape
-        visual_features = 6404  # The exact dimension used in training
-        new_mask = torch.ones(
-            (batch_size, seq_len, visual_features, num_tiles),
-            device=inputs['cross_attention_mask'].device
-        )
         inputs['cross_attention_mask'] = new_mask
-        return True, inputs
-    return False, inputs
 # Load model function
 @st.cache_resource
 def load_model():
-    """Load model using Unsloth approach (similar to Colab)"""
-    try:
-        base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
-        # Load processor
-        processor = AutoProcessor.from_pretrained(base_model_id)
-        # Load model using Unsloth's FastVisionModel
-        model, _ = FastVisionModel.from_pretrained(
-            base_model_id,
-            load_in_4bit=True,
-            torch_dtype=torch.float16,
-            device_map="auto"
-        )
-        # Set to inference mode
-        FastVisionModel.for_inference(model)
-        # Load adapter
-        adapter_id = "saakshigupta/deepfake-explainer-1"
-        model = PeftModel.from_pretrained(model, adapter_id)
-        return model, processor
-    except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        st.exception(e)
-        return None, None
-# Minimal sidebar
-with st.sidebar:
-    st.header("Settings")
-    temperature = st.slider("Temperature", 0.1, 1.0, 0.7, 0.1)
-    max_length = st.slider("Max length", 100, 500, 300, 50)
-    # Instruction field
-    prompt = st.text_area(
-        "Analysis instruction",
-        value="Analyze this image and determine if it's a deepfake. Provide your reasoning.",
-        height=100
-    )
-# Main content - two columns for clarity
-col1, col2 = st.columns([1, 2])
-with col1:
-    # Load model button
-    if st.button("1. Load Model"):
-        with st.spinner("Loading model... (this may take a minute)"):
-            model, processor = load_model()
-            if model is not None and processor is not None:
-                st.session_state['model'] = model
-                st.session_state['processor'] = processor
-                st.success("✓ Model loaded successfully!")
-            else:
-                st.error("Failed to load model")
-    # File uploader
-    uploaded_file = st.file_uploader("2. Upload an image", type=["jpg", "jpeg", "png"])
-    # Display uploaded image
-    if uploaded_file is not None:
-        image = Image.open(uploaded_file).convert('RGB')
-        st.image(image, caption="Uploaded Image", use_column_width=True)
-        # Only enable analysis if model is loaded
-        model_loaded = 'model' in st.session_state and st.session_state['model'] is not None
-        if st.button("3. Analyze Image", disabled=not model_loaded):
-            if not model_loaded:
-                st.warning("Please load the model first")
-            else:
-                col2.subheader("Analysis Results")
-                with col2.spinner("Analyzing image..."):
-                    try:
-                        # Get model components
-                        model = st.session_state['model']
-                        processor = st.session_state['processor']
-                        # Format message for analysis
-                        messages = [
-                            {"role": "user", "content": [
-                                {"type": "image"},
-                                {"type": "text", "text": prompt}
-                            ]}
-                        ]
-                        # Apply chat template
-                        input_text = processor.tokenizer.apply_chat_template(
-                            messages,
-                            add_generation_prompt=True
-                        )
-                        # Process with image
-                        inputs = processor(
-                            images=image,
-                            text=input_text,
-                            add_special_tokens=False,
-                            return_tensors="pt"
-                        ).to(model.device)
-                        # Apply the fix
-                        fixed, inputs = fix_processor_outputs(inputs)
-                        if fixed:
-                            col2.info("Fixed cross-attention mask dimensions")
-                        # Generate analysis
-                        with torch.no_grad():
-                            output_ids = model.generate(
-                                **inputs,
-                                max_new_tokens=max_length,
-                                temperature=temperature,
-                                top_p=0.9
-                            )
-                        # Decode the output
-                        response = processor.tokenizer.decode(output_ids[0], skip_special_tokens=True)
-                        # Display results
-                        col2.success("Analysis complete!")
-                        col2.markdown(response)
-                        # Free memory
-                        free_memory()
-                    except Exception as e:
-                        col2.error(f"Error analyzing image: {str(e)}")
-                        col2.exception(e)
-        elif not model_loaded:
-            st.info("Please load the model first (Step 1)")
     else:
-        st.info("Please upload an image (Step 2)")
-with col2:
-    if 'model' not in st.session_state:
-        st.info("👈 Follow the steps on the left to analyze an image")

 import streamlit as st
 import torch
 from PIL import Image
+import io
 from peft import PeftModel
 from unsloth import FastVisionModel
+import tempfile
+import os
+# App title and description
 st.set_page_config(page_title="Deepfake Analyzer", layout="wide")
 st.title("Deepfake Image Analyzer")
+st.markdown("Upload an image to analyze it for potential deepfake manipulation")
 # Function to fix cross-attention masks
+def fix_cross_attention_mask(inputs):
     if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape:
         batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape
+        visual_features = 6404  # Critical dimension
+        new_mask = torch.ones((batch_size, seq_len, visual_features, num_tiles),
+                            device=inputs['cross_attention_mask'].device)
         inputs['cross_attention_mask'] = new_mask
+        st.success("Fixed cross-attention mask dimensions")
+    return inputs
 # Load model function
 @st.cache_resource
 def load_model():
+    with st.spinner("Loading model... This may take a minute or two..."):
+        try:
+            # Load base model and tokenizer using Unsloth
+            base_model_id = "unsloth/llama-3.2-11b-vision-instruct"
+            model, tokenizer = FastVisionModel.from_pretrained(
+                base_model_id,
+                load_in_4bit=True,
+            )
+            # Load the adapter
+            adapter_id = "saakshigupta/deepfake-explainer-1"
+            model = PeftModel.from_pretrained(model, adapter_id)
+            # Set to inference mode
+            FastVisionModel.for_inference(model)
+            return model, tokenizer
+        except Exception as e:
+            st.error(f"Error loading model: {str(e)}")
+            return None, None
+# Analyze image function
+def analyze_image(image, question, model, tokenizer):
+    # Format the message
+    messages = [
+        {"role": "user", "content": [
+            {"type": "image"},
+            {"type": "text", "text": question}
+        ]}
+    ]
+    # Apply chat template
+    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
+    # Process with image
+    inputs = tokenizer(
+        image,
+        input_text,
+        add_special_tokens=False,
+        return_tensors="pt",
+    ).to(model.device)
+    # Fix cross-attention mask if needed
+    inputs = fix_cross_attention_mask(inputs)
+    # Generate response
+    with st.spinner("Analyzing image... (this may take a moment)"):
+        with torch.no_grad():
+            output_ids = model.generate(
+                **inputs,
+                max_new_tokens=512,
+                use_cache=True,
+                temperature=0.7,
+                top_p=0.9
+            )
+        # Decode the output
+        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        # Try to extract just the model's response (after the prompt)
+        if question in response:
+            result = response.split(question)[-1].strip()
+        else:
+            result = response
+        return result
+# Main app
+def main():
+    # Load model
+    model, tokenizer = load_model()
+    if model is not None and tokenizer is not None:
+        st.success("✅ Model loaded successfully! You can now analyze images.")
+        # Image upload section
+        st.subheader("Upload an Image")
+        uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+        # Default question with option to customize
+        default_question = "Analyze this image and tell me if it's a deepfake. Provide both technical and non-technical explanations."
+        question = st.text_area("Question/Prompt:", value=default_question, height=100)
+        if uploaded_file is not None:
+            # Display the uploaded image
+            image = Image.open(uploaded_file).convert("RGB")
+            st.image(image, caption="Uploaded Image", use_column_width=True)
+            # Analyze button
+            if st.button("Analyze Image"):
+                result = analyze_image(image, question, model, tokenizer)
+                # Display results
+                st.subheader("Analysis Results")
+                st.markdown(result)
     else:
+        st.warning("Failed to load the model. Please check the console for errors.")
+if __name__ == "__main__":
+    main()