|
import streamlit as st |
|
import torch |
|
from PIL import Image |
|
import io |
|
from peft import PeftModel |
|
from unsloth import FastVisionModel |
|
import tempfile |
|
import os |
|
|
|
|
|
st.set_page_config( |
|
page_title="Deepfake Analyzer", |
|
layout="wide", |
|
page_icon="π" |
|
) |
|
|
|
|
|
st.title("Deepfake Image Analyzer") |
|
st.markdown("Upload an image to analyze it for possible deepfake manipulation") |
|
|
|
|
|
def check_gpu(): |
|
if torch.cuda.is_available(): |
|
gpu_info = torch.cuda.get_device_properties(0) |
|
st.sidebar.success(f"β
GPU available: {gpu_info.name} ({gpu_info.total_memory / (1024**3):.2f} GB)") |
|
return True |
|
else: |
|
st.sidebar.warning("β οΈ No GPU detected. Analysis will be slower.") |
|
return False |
|
|
|
|
|
st.sidebar.title("Options") |
|
|
|
|
|
temperature = st.sidebar.slider( |
|
"Temperature", |
|
min_value=0.1, |
|
max_value=1.0, |
|
value=0.7, |
|
step=0.1, |
|
help="Higher values make output more random, lower values more deterministic" |
|
) |
|
|
|
|
|
max_tokens = st.sidebar.slider( |
|
"Maximum Response Length", |
|
min_value=100, |
|
max_value=1000, |
|
value=500, |
|
step=50, |
|
help="The maximum number of tokens in the response" |
|
) |
|
|
|
|
|
custom_instruction = st.sidebar.text_area( |
|
"Custom Instructions (Advanced)", |
|
value="Analyze for facial inconsistencies, lighting irregularities, mismatched shadows, and other signs of manipulation.", |
|
help="Add specific instructions for the model" |
|
) |
|
|
|
|
|
st.sidebar.markdown("---") |
|
st.sidebar.subheader("About") |
|
st.sidebar.markdown(""" |
|
This analyzer looks for: |
|
- Facial inconsistencies |
|
- Unnatural movements |
|
- Lighting issues |
|
- Texture anomalies |
|
- Edge artifacts |
|
- Blending problems |
|
|
|
**Model**: Fine-tuned Llama 3.2 Vision |
|
**Creator**: [Saakshi Gupta](https://huggingface.co/saakshigupta) |
|
""") |
|
|
|
|
|
def fix_cross_attention_mask(inputs): |
|
if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape: |
|
batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape |
|
visual_features = 6404 |
|
new_mask = torch.ones((batch_size, seq_len, visual_features, num_tiles), |
|
device=inputs['cross_attention_mask'].device) |
|
inputs['cross_attention_mask'] = new_mask |
|
st.success("Fixed cross-attention mask dimensions") |
|
return inputs |
|
|
|
|
|
@st.cache_resource |
|
def load_model(): |
|
with st.spinner("Loading model... This may take a few minutes. Please be patient..."): |
|
try: |
|
|
|
has_gpu = check_gpu() |
|
|
|
|
|
base_model_id = "unsloth/llama-3.2-11b-vision-instruct" |
|
model, tokenizer = FastVisionModel.from_pretrained( |
|
base_model_id, |
|
load_in_4bit=True, |
|
) |
|
|
|
|
|
adapter_id = "saakshigupta/deepfake-explainer-1" |
|
model = PeftModel.from_pretrained(model, adapter_id) |
|
|
|
|
|
FastVisionModel.for_inference(model) |
|
|
|
return model, tokenizer |
|
except Exception as e: |
|
st.error(f"Error loading model: {str(e)}") |
|
return None, None |
|
|
|
|
|
def analyze_image(image, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""): |
|
|
|
if custom_instruction.strip(): |
|
full_prompt = f"{question}\n\nAdditional instructions: {custom_instruction}" |
|
else: |
|
full_prompt = question |
|
|
|
|
|
messages = [ |
|
{"role": "user", "content": [ |
|
{"type": "image"}, |
|
{"type": "text", "text": full_prompt} |
|
]} |
|
] |
|
|
|
|
|
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) |
|
|
|
|
|
inputs = tokenizer( |
|
image, |
|
input_text, |
|
add_special_tokens=False, |
|
return_tensors="pt", |
|
).to(model.device) |
|
|
|
|
|
inputs = fix_cross_attention_mask(inputs) |
|
|
|
|
|
with st.spinner("Analyzing image... (this may take 15-30 seconds)"): |
|
with torch.no_grad(): |
|
output_ids = model.generate( |
|
**inputs, |
|
max_new_tokens=max_tokens, |
|
use_cache=True, |
|
temperature=temperature, |
|
top_p=0.9 |
|
) |
|
|
|
|
|
response = tokenizer.decode(output_ids[0], skip_special_tokens=True) |
|
|
|
|
|
if full_prompt in response: |
|
result = response.split(full_prompt)[-1].strip() |
|
else: |
|
result = response |
|
|
|
return result |
|
|
|
|
|
def main(): |
|
|
|
if 'model_loaded' not in st.session_state: |
|
st.session_state.model_loaded = False |
|
st.session_state.model = None |
|
st.session_state.tokenizer = None |
|
|
|
|
|
if not st.session_state.model_loaded: |
|
if st.button("π₯ Load Deepfake Analysis Model", type="primary"): |
|
model, tokenizer = load_model() |
|
if model is not None and tokenizer is not None: |
|
st.session_state.model = model |
|
st.session_state.tokenizer = tokenizer |
|
st.session_state.model_loaded = True |
|
st.success("β
Model loaded successfully! You can now analyze images.") |
|
else: |
|
st.error("β Failed to load model. Please check the logs for errors.") |
|
else: |
|
st.success("β
Model loaded successfully! You can now analyze images.") |
|
|
|
|
|
st.subheader("Upload an Image") |
|
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) |
|
|
|
|
|
default_question = "Analyze this image and tell me if it's a deepfake. Provide both technical and non-technical explanations." |
|
question = st.text_area("Question/Prompt:", value=default_question, height=100) |
|
|
|
if uploaded_file is not None: |
|
|
|
image = Image.open(uploaded_file).convert("RGB") |
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
if st.session_state.model_loaded: |
|
if st.button("π Analyze Image", type="primary"): |
|
result = analyze_image( |
|
image, |
|
question, |
|
st.session_state.model, |
|
st.session_state.tokenizer, |
|
temperature=temperature, |
|
max_tokens=max_tokens, |
|
custom_instruction=custom_instruction |
|
) |
|
|
|
|
|
st.success("β
Analysis complete!") |
|
|
|
|
|
if "Technical" in result and "Non-Technical" in result: |
|
|
|
parts = result.split("Non-Technical") |
|
technical = parts[0] |
|
non_technical = "Non-Technical" + parts[1] |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.subheader("Technical Analysis") |
|
st.markdown(technical) |
|
|
|
with col2: |
|
st.subheader("Simple Explanation") |
|
st.markdown(non_technical) |
|
else: |
|
|
|
st.subheader("Analysis Result") |
|
st.markdown(result) |
|
else: |
|
st.warning("β οΈ Please load the model first before analyzing images.") |
|
|
|
|
|
st.markdown("---") |
|
st.caption("Deepfake Image Analyzer") |
|
|
|
if __name__ == "__main__": |
|
main() |