File size: 8,354 Bytes
473311a 9938609 fa29b79 a96c23c 473311a 99ef832 473311a 99ef832 42fa481 99ef832 473311a 24cd4f4 99ef832 a96c23c 42fa481 a96c23c 99ef832 24cd4f4 99ef832 a96c23c 99ef832 a96c23c 42fa481 99ef832 42fa481 99ef832 473311a 99ef832 473311a 99ef832 473311a 99ef832 473311a 99ef832 fa29b79 a96c23c fa29b79 a96c23c fa29b79 a96c23c fa29b79 473311a 99ef832 42fa481 99ef832 fa29b79 473311a fa29b79 a96c23c fa29b79 a96c23c fa29b79 a96c23c fa29b79 a96c23c fa29b79 a96c23c fa29b79 a96c23c fa29b79 99ef832 fa29b79 99ef832 fa29b79 473311a 99ef832 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
import streamlit as st
import torch
from PIL import Image
import os
import gc
from transformers import AutoProcessor, AutoModelForCausalLM
from peft import PeftModel
# Page config
st.set_page_config(
page_title="Deepfake Image Analyzer",
page_icon="π",
layout="wide"
)
# App title and description
st.title("Deepfake Image Analyzer")
st.markdown("Upload an image to analyze it for possible deepfake manipulation")
# Function to free up memory
def free_memory():
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Helper function to check CUDA
def init_device():
if torch.cuda.is_available():
st.sidebar.success("β GPU available: Using CUDA")
return "cuda"
else:
st.sidebar.warning("β οΈ No GPU detected: Using CPU (analysis will be slow)")
return "cpu"
# Set device
device = init_device()
@st.cache_resource
def load_model():
"""Load model with fallback options for quantization"""
try:
# Using your original base model
base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
# Load processor
processor = AutoProcessor.from_pretrained(base_model_id)
# Try to load with 4-bit quantization first
try:
import bitsandbytes
model = AutoModelForCausalLM.from_pretrained(
base_model_id,
device_map="auto",
load_in_4bit=True,
torch_dtype=torch.float16
)
except ImportError:
st.warning("bitsandbytes not available. Falling back to float16 precision.")
model = AutoModelForCausalLM.from_pretrained(
base_model_id,
device_map="auto",
torch_dtype=torch.float16
)
# Load adapter
adapter_id = "saakshigupta/deepfake-explainer-1"
model = PeftModel.from_pretrained(model, adapter_id)
return model, processor
except Exception as e:
st.error(f"Error loading model: {str(e)}")
st.exception(e)
return None, None
# Function to fix cross-attention masks
def fix_processor_outputs(inputs):
"""Fix cross-attention mask dimensions if needed"""
if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape:
batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape
visual_features = 6404 # The exact dimension used in training
new_mask = torch.ones(
(batch_size, seq_len, visual_features, num_tiles),
device=inputs['cross_attention_mask'].device
)
inputs['cross_attention_mask'] = new_mask
return True, inputs
return False, inputs
# Create sidebar with options
with st.sidebar:
st.header("Options")
temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.7, step=0.1,
help="Higher values make output more random, lower values more deterministic")
max_length = st.slider("Maximum response length", min_value=100, max_value=1000, value=500, step=50)
custom_prompt = st.text_area(
"Custom instruction (optional)",
value="Analyze this image and determine if it's a deepfake. Provide both technical and non-technical explanations.",
height=100
)
st.markdown("### About")
st.markdown("""
This app uses a fine-tuned Llama 3.2 Vision model to detect and explain deepfakes.
The analyzer looks for:
- Inconsistencies in facial features
- Unusual lighting or shadows
- Unnatural blur patterns
- Artifacts around edges
- Texture inconsistencies
Model by [saakshigupta](https://huggingface.co/saakshigupta/deepfake-explainer-1)
""")
# Load model button
if st.button("Load Model"):
with st.spinner("Loading model... this may take several minutes"):
try:
model, processor = load_model()
if model is not None and processor is not None:
st.session_state['model'] = model
st.session_state['processor'] = processor
st.success("Model loaded successfully!")
else:
st.error("Failed to load model.")
except Exception as e:
st.error(f"Error during model loading: {str(e)}")
st.exception(e)
# Main content area - file uploader
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
# Check if model is loaded
model_loaded = 'model' in st.session_state and st.session_state['model'] is not None
if uploaded_file is not None:
# Display the image
image = Image.open(uploaded_file).convert('RGB')
st.image(image, caption="Uploaded Image", use_column_width=True)
# Analyze button (only enabled if model is loaded)
if st.button("Analyze Image", disabled=not model_loaded):
if not model_loaded:
st.warning("Please load the model first by clicking the 'Load Model' button.")
else:
with st.spinner("Analyzing the image... This may take 15-30 seconds"):
try:
# Get components from session state
model = st.session_state['model']
processor = st.session_state['processor']
# Process the image using the processor
inputs = processor(text=custom_prompt, images=image, return_tensors="pt")
# Fix cross-attention mask if needed
fixed, inputs = fix_processor_outputs(inputs)
if fixed:
st.info("Fixed cross-attention mask dimensions")
# Move to device
inputs = {k: v.to(model.device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
# Generate the analysis
with torch.no_grad():
output_ids = model.generate(
**inputs,
max_new_tokens=max_length,
temperature=temperature,
top_p=0.9
)
# Decode the output
response = processor.decode(output_ids[0], skip_special_tokens=True)
# Extract the actual response (removing the prompt)
if custom_prompt in response:
result = response.split(custom_prompt)[-1].strip()
else:
result = response
# Display result in a nice format
st.success("Analysis complete!")
# Show technical and non-technical explanations separately if they exist
if "Technical Explanation:" in result and "Non-Technical Explanation:" in result:
technical, non_technical = result.split("Non-Technical Explanation:")
technical = technical.replace("Technical Explanation:", "").strip()
col1, col2 = st.columns(2)
with col1:
st.subheader("Technical Analysis")
st.write(technical)
with col2:
st.subheader("Simple Explanation")
st.write(non_technical)
else:
st.subheader("Analysis Result")
st.write(result)
# Free memory after analysis
free_memory()
except Exception as e:
st.error(f"Error analyzing image: {str(e)}")
st.exception(e)
elif not model_loaded:
st.warning("Please load the model first by clicking the 'Load Model' button at the top of the page.")
else:
st.info("Please upload an image to begin analysis")
# Add footer
st.markdown("---")
st.markdown("Deepfake Image Analyzer") |