Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

deepfake-explainer-app / app.py

saakshigupta

Update app.py

e1419e7 verified 5 months ago

raw

history blame

8.52 kB

	import streamlit as st
	import torch
	from PIL import Image
	import io
	from peft import PeftModel
	from unsloth import FastVisionModel
	import tempfile
	import os

	# App title and description
	st.set_page_config(
	page_title="Deepfake Analyzer",
	layout="wide",
	page_icon="🔍"
	)

	# Main title and description
	st.title("Deepfake Image Analyzer")
	st.markdown("Upload an image to analyze it for possible deepfake manipulation")

	# Check for GPU availability
	def check_gpu():
	if torch.cuda.is_available():
	gpu_info = torch.cuda.get_device_properties(0)
	st.sidebar.success(f"✅ GPU available: {gpu_info.name} ({gpu_info.total_memory / (1024**3):.2f} GB)")
	return True
	else:
	st.sidebar.warning("⚠️ No GPU detected. Analysis will be slower.")
	return False

	# Sidebar components
	st.sidebar.title("Options")

	# Temperature slider
	temperature = st.sidebar.slider(
	"Temperature",
	min_value=0.1,
	max_value=1.0,
	value=0.7,
	step=0.1,
	help="Higher values make output more random, lower values more deterministic"
	)

	# Max response length slider
	max_tokens = st.sidebar.slider(
	"Maximum Response Length",
	min_value=100,
	max_value=1000,
	value=500,
	step=50,
	help="The maximum number of tokens in the response"
	)

	# Custom instruction text area in sidebar
	custom_instruction = st.sidebar.text_area(
	"Custom Instructions (Advanced)",
	value="Analyze for facial inconsistencies, lighting irregularities, mismatched shadows, and other signs of manipulation.",
	help="Add specific instructions for the model"
	)

	# About section in sidebar
	st.sidebar.markdown("---")
	st.sidebar.subheader("About")
	st.sidebar.markdown("""
	This analyzer looks for:
	- Facial inconsistencies
	- Unnatural movements
	- Lighting issues
	- Texture anomalies
	- Edge artifacts
	- Blending problems

	Model: Fine-tuned Llama 3.2 Vision
	Creator: [Saakshi Gupta](https://huggingface.co/saakshigupta)
	""")

	# Function to fix cross-attention masks
	def fix_cross_attention_mask(inputs):
	if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape:
	batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape
	visual_features = 6404 # Critical dimension
	new_mask = torch.ones((batch_size, seq_len, visual_features, num_tiles),
	device=inputs['cross_attention_mask'].device)
	inputs['cross_attention_mask'] = new_mask
	st.success("Fixed cross-attention mask dimensions")
	return inputs

	# Load model function
	@st.cache_resource
	def load_model():
	with st.spinner("Loading model... This may take a few minutes. Please be patient..."):
	try:
	# Check for GPU
	has_gpu = check_gpu()

	# Load base model and tokenizer using Unsloth
	base_model_id = "unsloth/llama-3.2-11b-vision-instruct"
	model, tokenizer = FastVisionModel.from_pretrained(
	base_model_id,
	load_in_4bit=True,
	)

	# Load the adapter
	adapter_id = "saakshigupta/deepfake-explainer-1"
	model = PeftModel.from_pretrained(model, adapter_id)

	# Set to inference mode
	FastVisionModel.for_inference(model)

	return model, tokenizer
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	return None, None

	# Analyze image function
	def analyze_image(image, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""):
	# Combine question with custom instruction if provided
	if custom_instruction.strip():
	full_prompt = f"{question}\n\nAdditional instructions: {custom_instruction}"
	else:
	full_prompt = question

	# Format the message
	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": full_prompt}
	]}
	]

	# Apply chat template
	input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)

	# Process with image
	inputs = tokenizer(
	image,
	input_text,
	add_special_tokens=False,
	return_tensors="pt",
	).to(model.device)

	# Fix cross-attention mask if needed
	inputs = fix_cross_attention_mask(inputs)

	# Generate response
	with st.spinner("Analyzing image... (this may take 15-30 seconds)"):
	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	use_cache=True,
	temperature=temperature,
	top_p=0.9
	)

	# Decode the output
	response = tokenizer.decode(output_ids[0], skip_special_tokens=True)

	# Try to extract just the model's response (after the prompt)
	if full_prompt in response:
	result = response.split(full_prompt)[-1].strip()
	else:
	result = response

	return result

	# Main app
	def main():
	# Create a button to load the model
	if 'model_loaded' not in st.session_state:
	st.session_state.model_loaded = False
	st.session_state.model = None
	st.session_state.tokenizer = None

	# Load model button
	if not st.session_state.model_loaded:
	if st.button("📥 Load Deepfake Analysis Model", type="primary"):
	model, tokenizer = load_model()
	if model is not None and tokenizer is not None:
	st.session_state.model = model
	st.session_state.tokenizer = tokenizer
	st.session_state.model_loaded = True
	st.success("✅ Model loaded successfully! You can now analyze images.")
	else:
	st.error("❌ Failed to load model. Please check the logs for errors.")
	else:
	st.success("✅ Model loaded successfully! You can now analyze images.")

	# Image upload section
	st.subheader("Upload an Image")
	uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

	# Default question with option to customize
	default_question = "Analyze this image and tell me if it's a deepfake. Provide both technical and non-technical explanations."
	question = st.text_area("Question/Prompt:", value=default_question, height=100)

	if uploaded_file is not None:
	# Display the uploaded image
	image = Image.open(uploaded_file).convert("RGB")
	st.image(image, caption="Uploaded Image", use_column_width=True)

	# Analyze button - only enabled if model is loaded
	if st.session_state.model_loaded:
	if st.button("🔍 Analyze Image", type="primary"):
	result = analyze_image(
	image,
	question,
	st.session_state.model,
	st.session_state.tokenizer,
	temperature=temperature,
	max_tokens=max_tokens,
	custom_instruction=custom_instruction
	)

	# Display results
	st.success("✅ Analysis complete!")

	# Check if the result contains both technical and non-technical explanations
	if "Technical" in result and "Non-Technical" in result:
	# Split the result into technical and non-technical sections
	parts = result.split("Non-Technical")
	technical = parts[0]
	non_technical = "Non-Technical" + parts[1]

	# Display in two columns
	col1, col2 = st.columns(2)
	with col1:
	st.subheader("Technical Analysis")
	st.markdown(technical)

	with col2:
	st.subheader("Simple Explanation")
	st.markdown(non_technical)
	else:
	# Just display the whole result
	st.subheader("Analysis Result")
	st.markdown(result)
	else:
	st.warning("⚠️ Please load the model first before analyzing images.")

	# Footer
	st.markdown("---")
	st.caption("Deepfake Image Analyzer")

	if __name__ == "__main__":
	main()