Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
from PIL import Image | |
import io | |
from peft import PeftModel | |
from unsloth import FastVisionModel | |
import tempfile | |
import os | |
# App title and description | |
st.set_page_config( | |
page_title="Deepfake Analyzer", | |
layout="wide", | |
page_icon="π" | |
) | |
# Main title and description | |
st.title("Deepfake Image Analyzer") | |
st.markdown("Upload an image to analyze it for possible deepfake manipulation") | |
# Check for GPU availability | |
def check_gpu(): | |
if torch.cuda.is_available(): | |
gpu_info = torch.cuda.get_device_properties(0) | |
st.sidebar.success(f"β GPU available: {gpu_info.name} ({gpu_info.total_memory / (1024**3):.2f} GB)") | |
return True | |
else: | |
st.sidebar.warning("β οΈ No GPU detected. Analysis will be slower.") | |
return False | |
# Sidebar components | |
st.sidebar.title("Options") | |
# Temperature slider | |
temperature = st.sidebar.slider( | |
"Temperature", | |
min_value=0.1, | |
max_value=1.0, | |
value=0.7, | |
step=0.1, | |
help="Higher values make output more random, lower values more deterministic" | |
) | |
# Max response length slider | |
max_tokens = st.sidebar.slider( | |
"Maximum Response Length", | |
min_value=100, | |
max_value=1000, | |
value=500, | |
step=50, | |
help="The maximum number of tokens in the response" | |
) | |
# Custom instruction text area in sidebar | |
custom_instruction = st.sidebar.text_area( | |
"Custom Instructions (Advanced)", | |
value="Analyze for facial inconsistencies, lighting irregularities, mismatched shadows, and other signs of manipulation.", | |
help="Add specific instructions for the model" | |
) | |
# About section in sidebar | |
st.sidebar.markdown("---") | |
st.sidebar.subheader("About") | |
st.sidebar.markdown(""" | |
This analyzer looks for: | |
- Facial inconsistencies | |
- Unnatural movements | |
- Lighting issues | |
- Texture anomalies | |
- Edge artifacts | |
- Blending problems | |
**Model**: Fine-tuned Llama 3.2 Vision | |
**Creator**: [Saakshi Gupta](https://huggingface.co/saakshigupta) | |
""") | |
# Function to fix cross-attention masks | |
def fix_cross_attention_mask(inputs): | |
if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape: | |
batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape | |
visual_features = 6404 # Critical dimension | |
new_mask = torch.ones((batch_size, seq_len, visual_features, num_tiles), | |
device=inputs['cross_attention_mask'].device) | |
inputs['cross_attention_mask'] = new_mask | |
st.success("Fixed cross-attention mask dimensions") | |
return inputs | |
# Load model function | |
def load_model(): | |
with st.spinner("Loading model... This may take a few minutes. Please be patient..."): | |
try: | |
# Check for GPU | |
has_gpu = check_gpu() | |
# Load base model and tokenizer using Unsloth | |
base_model_id = "unsloth/llama-3.2-11b-vision-instruct" | |
model, tokenizer = FastVisionModel.from_pretrained( | |
base_model_id, | |
load_in_4bit=True, | |
) | |
# Load the adapter | |
adapter_id = "saakshigupta/deepfake-explainer-1" | |
model = PeftModel.from_pretrained(model, adapter_id) | |
# Set to inference mode | |
FastVisionModel.for_inference(model) | |
return model, tokenizer | |
except Exception as e: | |
st.error(f"Error loading model: {str(e)}") | |
return None, None | |
# Analyze image function | |
def analyze_image(image, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""): | |
# Combine question with custom instruction if provided | |
if custom_instruction.strip(): | |
full_prompt = f"{question}\n\nAdditional instructions: {custom_instruction}" | |
else: | |
full_prompt = question | |
# Format the message | |
messages = [ | |
{"role": "user", "content": [ | |
{"type": "image"}, | |
{"type": "text", "text": full_prompt} | |
]} | |
] | |
# Apply chat template | |
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) | |
# Process with image | |
inputs = tokenizer( | |
image, | |
input_text, | |
add_special_tokens=False, | |
return_tensors="pt", | |
).to(model.device) | |
# Fix cross-attention mask if needed | |
inputs = fix_cross_attention_mask(inputs) | |
# Generate response | |
with st.spinner("Analyzing image... (this may take 15-30 seconds)"): | |
with torch.no_grad(): | |
output_ids = model.generate( | |
**inputs, | |
max_new_tokens=max_tokens, | |
use_cache=True, | |
temperature=temperature, | |
top_p=0.9 | |
) | |
# Decode the output | |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
# Try to extract just the model's response (after the prompt) | |
if full_prompt in response: | |
result = response.split(full_prompt)[-1].strip() | |
else: | |
result = response | |
return result | |
# Main app | |
def main(): | |
# Create a button to load the model | |
if 'model_loaded' not in st.session_state: | |
st.session_state.model_loaded = False | |
st.session_state.model = None | |
st.session_state.tokenizer = None | |
# Load model button | |
if not st.session_state.model_loaded: | |
if st.button("π₯ Load Deepfake Analysis Model", type="primary"): | |
model, tokenizer = load_model() | |
if model is not None and tokenizer is not None: | |
st.session_state.model = model | |
st.session_state.tokenizer = tokenizer | |
st.session_state.model_loaded = True | |
st.success("β Model loaded successfully! You can now analyze images.") | |
else: | |
st.error("β Failed to load model. Please check the logs for errors.") | |
else: | |
st.success("β Model loaded successfully! You can now analyze images.") | |
# Image upload section | |
st.subheader("Upload an Image") | |
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) | |
# Default question with option to customize | |
default_question = "Analyze this image and tell me if it's a deepfake. Provide both technical and non-technical explanations." | |
question = st.text_area("Question/Prompt:", value=default_question, height=100) | |
if uploaded_file is not None: | |
# Display the uploaded image | |
image = Image.open(uploaded_file).convert("RGB") | |
st.image(image, caption="Uploaded Image", use_column_width=True) | |
# Analyze button - only enabled if model is loaded | |
if st.session_state.model_loaded: | |
if st.button("π Analyze Image", type="primary"): | |
result = analyze_image( | |
image, | |
question, | |
st.session_state.model, | |
st.session_state.tokenizer, | |
temperature=temperature, | |
max_tokens=max_tokens, | |
custom_instruction=custom_instruction | |
) | |
# Display results | |
st.success("β Analysis complete!") | |
# Check if the result contains both technical and non-technical explanations | |
if "Technical" in result and "Non-Technical" in result: | |
# Split the result into technical and non-technical sections | |
parts = result.split("Non-Technical") | |
technical = parts[0] | |
non_technical = "Non-Technical" + parts[1] | |
# Display in two columns | |
col1, col2 = st.columns(2) | |
with col1: | |
st.subheader("Technical Analysis") | |
st.markdown(technical) | |
with col2: | |
st.subheader("Simple Explanation") | |
st.markdown(non_technical) | |
else: | |
# Just display the whole result | |
st.subheader("Analysis Result") | |
st.markdown(result) | |
else: | |
st.warning("β οΈ Please load the model first before analyzing images.") | |
# Footer | |
st.markdown("---") | |
st.caption("Deepfake Image Analyzer") | |
if __name__ == "__main__": | |
main() |