Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Apr 7

Commit

062622e

verified ·

1 Parent(s): 0a35799

Update app.py

Browse files

Files changed (1) hide show

app.py +228 -71

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ st.set_page_config(
 )
 # Main title and description
-st.title("Deepfake Image Analyzer")
 st.markdown("Analyze images for deepfake manipulation with multi-stage analysis")
 # Check for GPU availability
@@ -42,9 +42,25 @@ def check_gpu():
 # Sidebar components
 st.sidebar.title("Options")
-# Fixed values for temperature and max_tokens (removed sliders)
-temperature = 0.7
-max_tokens = 500
 # Custom instruction text area in sidebar
 custom_instruction = st.sidebar.text_area(
@@ -693,73 +709,214 @@ def main():
                         # Store caption but don't display it yet
-                                # Detect with CLIP model if loaded
                 if st.session_state.clip_model_loaded:
-                    try:
-                        with st.spinner("Analyzing image with CLIP model..."):
-                            # Preprocess image for CLIP
-                            transform = transforms.Compose([
-                                transforms.Resize((224, 224)),
-                                transforms.ToTensor(),
-                                transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]),
-                            ])
-                            # Create a simple dataset for the image
-                            dataset = ImageDataset(image, transform=transform, face_only=True)
-                            tensor, _, _, _, face_box, _ = dataset[0]
-                            tensor = tensor.unsqueeze(0)
-                            # Get device
-                            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-                            # Move model and tensor to device
-                            model = st.session_state.clip_model.to(device)
-                            tensor = tensor.to(device)
-                            # Forward pass
-                            with torch.no_grad():
-                                outputs = model.vision_model(pixel_values=tensor).pooler_output
-                                logits = model.classification_head(outputs)
-                                probs = torch.softmax(logits, dim=1)[0]
-                                pred_class = torch.argmax(probs).item()
-                                confidence = probs[pred_class].item()
-                                pred_label = "Fake" if pred_class == 1 else "Real"
-                            # Display results
-                            with col2:
-                                st.markdown("### Detection Result")
-                                st.markdown(f"**Classification:** {pred_label} (Confidence: {confidence:.2%})")
-                            # GradCAM visualization
-                            st.subheader("GradCAM Visualization")
-                            cam, overlay, comparison, detected_face_box = process_image_with_gradcam(
-                                image, model, device, pred_class
-                            )
-                            # Display GradCAM results (controlled size)
-                            st.image(comparison, caption="Original | CAM | Overlay", width=700)
-                            # Generate caption for GradCAM overlay image if BLIP model is loaded
-                            if st.session_state.blip_model_loaded:
-                                with st.spinner("Analyzing GradCAM visualization..."):
-                                    gradcam_caption = generate_gradcam_caption(
-                                        overlay,
-                                        st.session_state.blip_processor,
-                                        st.session_state.blip_model
-                                    )
-                                    st.session_state.gradcam_caption = gradcam_caption
-                            # Save results in session state for LLM analysis
-                            st.session_state.current_image = image
-                            st.session_state.current_overlay = overlay
-                            st.session_state.current_face_box = detected_face_box
-                            st.session_state.current_pred_label = pred_label
-                            st.session_state.current_confidence = confidence
-                            st.success("✅ Initial detection and GradCAM visualization complete!")
-                    except Exception as e:
-                        st.error(f"Error analyzing image: {str(e)}")
-                        import traceback
-                        st.error(traceback.format_exc())
-                else:
-                    st.warning("⚠️ Please load the CLIP model first to perform initial detection.")

 )
 # Main title and description
+st.title("Advanced Deepfake Image Analyzer")
 st.markdown("Analyze images for deepfake manipulation with multi-stage analysis")
 # Check for GPU availability
 # Sidebar components
 st.sidebar.title("Options")
+# Temperature slider
+temperature = st.sidebar.slider(
+    "Temperature",
+    min_value=0.1,
+    max_value=1.0,
+    value=0.7,
+    step=0.1,
+    help="Higher values make output more random, lower values more deterministic"
+)
+# Max response length slider
+max_tokens = st.sidebar.slider(
+    "Maximum Response Length",
+    min_value=100,
+    max_value=1000,
+    value=500,
+    step=50,
+    help="The maximum number of tokens in the response"
+)
 # Custom instruction text area in sidebar
 custom_instruction = st.sidebar.text_area(
                         # Store caption but don't display it yet
+                # Detect with CLIP model if loaded
                 if st.session_state.clip_model_loaded:
+                    with st.spinner("Analyzing image with CLIP model..."):
+                        # Preprocess image for CLIP
+                        transform = transforms.Compose([
+                            transforms.Resize((224, 224)),
+                            transforms.ToTensor(),
+                            transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]),
+                        ])
+                        # Create a simple dataset for the image
+                        dataset = ImageDataset(image, transform=transform, face_only=True)
+                        tensor, _, _, _, face_box, _ = dataset[0]
+                        tensor = tensor.unsqueeze(0)
+                        # Get device
+                        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                        # Move model and tensor to device
+                        model = st.session_state.clip_model.to(device)
+                        tensor = tensor.to(device)
+                        # Forward pass
+                        with torch.no_grad():
+                            outputs = model.vision_model(pixel_values=tensor).pooler_output
+                            logits = model.classification_head(outputs)
+                            probs = torch.softmax(logits, dim=1)[0]
+                            pred_class = torch.argmax(probs).item()
+                            confidence = probs[pred_class].item()
+                            pred_label = "Fake" if pred_class == 1 else "Real"
+                        # Display results
+                        with col2:
+                            st.markdown("### Detection Result")
+                            st.markdown(f"**Classification:** {pred_label} (Confidence: {confidence:.2%})")
+                        # GradCAM visualization
+                        st.subheader("GradCAM Visualization")
+                        cam, overlay, comparison, detected_face_box = process_image_with_gradcam(
+                            image, model, device, pred_class
+                        )
+                        # Display GradCAM results (controlled size)
+                        st.image(comparison, caption="Original | CAM | Overlay", width=700)
+                        # Generate caption for GradCAM overlay image if BLIP model is loaded
+                        if st.session_state.blip_model_loaded:
+                            with st.spinner("Analyzing GradCAM visualization..."):
+                                gradcam_caption = generate_gradcam_caption(
+                                    overlay,
+                                    st.session_state.blip_processor,
+                                    st.session_state.blip_model
+                                )
+                                st.session_state.gradcam_caption = gradcam_caption
+                                # Store caption but don't display it yet
+                        # Save results in session state for LLM analysis
+                        st.session_state.current_image = image
+                        st.session_state.current_overlay = overlay
+                        st.session_state.current_face_box = detected_face_box
+                        st.session_state.current_pred_label = pred_label
+                        st.session_state.current_confidence = confidence
+                        st.success("✅ Initial detection and GradCAM visualization complete!")
+                else:
+                    st.warning("⚠️ Please load the CLIP model first to perform initial detection.")
+            except Exception as e:
+                st.error(f"Error processing image: {str(e)}")
+                import traceback
+                st.error(traceback.format_exc())  # This will show the full error traceback
+    # Image Analysis Summary section - AFTER Stage 2
+    if hasattr(st.session_state, 'current_image') and (hasattr(st.session_state, 'image_caption') or hasattr(st.session_state, 'gradcam_caption')):
+        with st.expander("Image Analysis Summary", expanded=True):
+            st.subheader("Generated Descriptions and Analysis")
+            # Display image, captions, and results in organized layout with proper formatting
+            col1, col2 = st.columns([1, 2])
+            with col1:
+                # Display original image and overlay side by side with controlled size
+                st.image(st.session_state.current_image, caption="Original Image", width=300)
+                if hasattr(st.session_state, 'current_overlay'):
+                    st.image(st.session_state.current_overlay, caption="GradCAM Overlay", width=300)
+            with col2:
+                # Detection result
+                if hasattr(st.session_state, 'current_pred_label'):
+                    st.markdown("### Detection Result")
+                    st.markdown(f"**Classification:** {st.session_state.current_pred_label} (Confidence: {st.session_state.current_confidence:.2%})")
+                    st.markdown("---")
+                # Image description
+                if hasattr(st.session_state, 'image_caption'):
+                    st.markdown("### Image Description")
+                    st.markdown(st.session_state.image_caption)
+                    st.markdown("---")
+                # GradCAM analysis
+                if hasattr(st.session_state, 'gradcam_caption'):
+                    st.markdown("### GradCAM Analysis")
+                    st.markdown(st.session_state.gradcam_caption)
+    # LLM Analysis section - AFTER Image Analysis Summary
+    with st.expander("Stage 3: Detailed Analysis with Vision LLM", expanded=False):
+        if hasattr(st.session_state, 'current_image') and st.session_state.llm_model_loaded:
+            st.subheader("Detailed Deepfake Analysis")
+            # Display chat history
+            for i, (question, answer) in enumerate(st.session_state.chat_history):
+                st.markdown(f"**Question {i+1}:** {question}")
+                st.markdown(f"**Answer:** {answer}")
+                st.markdown("---")
+            # Include both captions in the prompt if available
+            caption_text = ""
+            if hasattr(st.session_state, 'image_caption'):
+                caption_text += f"\n\nImage Description:\n{st.session_state.image_caption}"
+            if hasattr(st.session_state, 'gradcam_caption'):
+                caption_text += f"\n\nGradCAM Analysis:\n{st.session_state.gradcam_caption}"
+            # Default question with option to customize
+            default_question = f"This image has been classified as {st.session_state.current_pred_label}. Analyze the key features that led to this classification, focusing on the highlighted areas in the GradCAM visualization. Provide both a technical explanation for experts and a simple explanation for non-technical users."
+            # User input for new question
+            new_question = st.text_area("Ask a question about the image:", value=default_question if not st.session_state.chat_history else "", height=100)
+            # Analyze button and Clear Chat button in the same row
+            col1, col2 = st.columns([3, 1])
+            with col1:
+                analyze_button = st.button("🔍 Send Question", type="primary")
+            with col2:
+                clear_button = st.button("🗑️ Clear Chat History")
+            if clear_button:
+                st.session_state.chat_history = []
+                st.experimental_rerun()
+            if analyze_button and new_question:
+                try:
+                    # Add caption info if it's the first question
+                    if not st.session_state.chat_history:
+                        full_question = new_question + caption_text
+                    else:
+                        full_question = new_question
+                    result = analyze_image_with_llm(
+                        st.session_state.current_image,
+                        st.session_state.current_overlay,
+                        st.session_state.current_face_box,
+                        st.session_state.current_pred_label,
+                        st.session_state.current_confidence,
+                        full_question,
+                        st.session_state.llm_model,
+                        st.session_state.tokenizer,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        custom_instruction=custom_instruction
+                    )
+                    # Add to chat history
+                    st.session_state.chat_history.append((new_question, result))
+                    # Display the latest result too
+                    st.success("✅ Analysis complete!")
+                    # Check if the result contains both technical and non-technical explanations
+                    if "Technical" in result and "Non-Technical" in result:
+                        try:
+                            # Split the result into technical and non-technical sections
+                            parts = result.split("Non-Technical")
+                            technical = parts[0]
+                            non_technical = "Non-Technical" + parts[1]
+                            # Display in two columns
+                            tech_col, simple_col = st.columns(2)
+                            with tech_col:
+                                st.subheader("Technical Analysis")
+                                st.markdown(technical)
+                            with simple_col:
+                                st.subheader("Simple Explanation")
+                                st.markdown(non_technical)
+                        except Exception as e:
+                            # Fallback if splitting fails
+                            st.subheader("Analysis Result")
+                            st.markdown(result)
+                    else:
+                        # Just display the whole result
+                        st.subheader("Analysis Result")
+                        st.markdown(result)
+                    # Rerun to update the chat history display
+                    st.experimental_rerun()
+                except Exception as e:
+                    st.error(f"Error during LLM analysis: {str(e)}")
+        elif not hasattr(st.session_state, 'current_image'):
+            st.warning("⚠️ Please upload an image and complete the initial detection first.")
+        else:
+            st.warning("⚠️ Please load the Vision LLM to perform detailed analysis.")
+    # Footer
+    st.markdown("---")
+    st.caption("Advanced Deepfake Image Analyzer with Structured BLIP Captioning")
+if __name__ == "__main__":
+    main()