import streamlit as st import torch from PIL import Image import io from peft import PeftModel from unsloth import FastVisionModel import tempfile import os # App title and description st.set_page_config( page_title="Deepfake Analyzer", layout="wide", page_icon="🔍" ) # Main title and description st.title("Deepfake Image Analyzer") st.markdown("Upload an image to analyze it for possible deepfake manipulation") # Check for GPU availability def check_gpu(): if torch.cuda.is_available(): gpu_info = torch.cuda.get_device_properties(0) st.sidebar.success(f"✅ GPU available: {gpu_info.name} ({gpu_info.total_memory / (1024**3):.2f} GB)") return True else: st.sidebar.warning("⚠️ No GPU detected. Analysis will be slower.") return False # Sidebar components st.sidebar.title("Options") # Temperature slider temperature = st.sidebar.slider( "Temperature", min_value=0.1, max_value=1.0, value=0.7, step=0.1, help="Higher values make output more random, lower values more deterministic" ) # Max response length slider max_tokens = st.sidebar.slider( "Maximum Response Length", min_value=100, max_value=1000, value=500, step=50, help="The maximum number of tokens in the response" ) # Custom instruction text area in sidebar custom_instruction = st.sidebar.text_area( "Custom Instructions (Advanced)", value="Analyze for facial inconsistencies, lighting irregularities, mismatched shadows, and other signs of manipulation.", help="Add specific instructions for the model" ) # About section in sidebar st.sidebar.markdown("---") st.sidebar.subheader("About") st.sidebar.markdown(""" This analyzer looks for: - Facial inconsistencies - Unnatural movements - Lighting issues - Texture anomalies - Edge artifacts - Blending problems **Model**: Fine-tuned Llama 3.2 Vision **Creator**: [Saakshi Gupta](https://huggingface.co/saakshigupta) """) # Function to fix cross-attention masks def fix_cross_attention_mask(inputs): if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape: batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape visual_features = 6404 # Critical dimension new_mask = torch.ones((batch_size, seq_len, visual_features, num_tiles), device=inputs['cross_attention_mask'].device) inputs['cross_attention_mask'] = new_mask st.success("Fixed cross-attention mask dimensions") return inputs # Load model function @st.cache_resource def load_model(): with st.spinner("Loading model... This may take a few minutes. Please be patient..."): try: # Check for GPU has_gpu = check_gpu() # Load base model and tokenizer using Unsloth base_model_id = "unsloth/llama-3.2-11b-vision-instruct" model, tokenizer = FastVisionModel.from_pretrained( base_model_id, load_in_4bit=True, ) # Load the adapter adapter_id = "saakshigupta/deepfake-explainer-1" model = PeftModel.from_pretrained(model, adapter_id) # Set to inference mode FastVisionModel.for_inference(model) return model, tokenizer except Exception as e: st.error(f"Error loading model: {str(e)}") return None, None # Analyze image function def analyze_image(image, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""): # Combine question with custom instruction if provided if custom_instruction.strip(): full_prompt = f"{question}\n\nAdditional instructions: {custom_instruction}" else: full_prompt = question # Format the message messages = [ {"role": "user", "content": [ {"type": "image"}, {"type": "text", "text": full_prompt} ]} ] # Apply chat template input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) # Process with image inputs = tokenizer( image, input_text, add_special_tokens=False, return_tensors="pt", ).to(model.device) # Fix cross-attention mask if needed inputs = fix_cross_attention_mask(inputs) # Generate response with st.spinner("Analyzing image... (this may take 15-30 seconds)"): with torch.no_grad(): output_ids = model.generate( **inputs, max_new_tokens=max_tokens, use_cache=True, temperature=temperature, top_p=0.9 ) # Decode the output response = tokenizer.decode(output_ids[0], skip_special_tokens=True) # Try to extract just the model's response (after the prompt) if full_prompt in response: result = response.split(full_prompt)[-1].strip() else: result = response return result # Main app def main(): # Create a button to load the model if 'model_loaded' not in st.session_state: st.session_state.model_loaded = False st.session_state.model = None st.session_state.tokenizer = None # Load model button if not st.session_state.model_loaded: if st.button("📥 Load Deepfake Analysis Model", type="primary"): model, tokenizer = load_model() if model is not None and tokenizer is not None: st.session_state.model = model st.session_state.tokenizer = tokenizer st.session_state.model_loaded = True st.success("✅ Model loaded successfully! You can now analyze images.") else: st.error("❌ Failed to load model. Please check the logs for errors.") else: st.success("✅ Model loaded successfully! You can now analyze images.") # Image upload section st.subheader("Upload an Image") uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) # Default question with option to customize default_question = "Analyze this image and tell me if it's a deepfake. Provide both technical and non-technical explanations." question = st.text_area("Question/Prompt:", value=default_question, height=100) if uploaded_file is not None: # Display the uploaded image image = Image.open(uploaded_file).convert("RGB") st.image(image, caption="Uploaded Image", use_column_width=True) # Analyze button - only enabled if model is loaded if st.session_state.model_loaded: if st.button("🔍 Analyze Image", type="primary"): result = analyze_image( image, question, st.session_state.model, st.session_state.tokenizer, temperature=temperature, max_tokens=max_tokens, custom_instruction=custom_instruction ) # Display results st.success("✅ Analysis complete!") # Check if the result contains both technical and non-technical explanations if "Technical" in result and "Non-Technical" in result: # Split the result into technical and non-technical sections parts = result.split("Non-Technical") technical = parts[0] non_technical = "Non-Technical" + parts[1] # Display in two columns col1, col2 = st.columns(2) with col1: st.subheader("Technical Analysis") st.markdown(technical) with col2: st.subheader("Simple Explanation") st.markdown(non_technical) else: # Just display the whole result st.subheader("Analysis Result") st.markdown(result) else: st.warning("⚠️ Please load the model first before analyzing images.") # Footer st.markdown("---") st.caption("Deepfake Image Analyzer") if __name__ == "__main__": main()