Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Apr 6

Commit

55a09e9

verified ·

1 Parent(s): f0a1db6

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -93

app.py CHANGED Viewed

@@ -432,8 +432,25 @@ def load_blip_model():
             st.error(f"Error loading BLIP model: {str(e)}")
             return None, None
 # Function to generate image caption
-def generate_image_caption(image, processor, model, max_length=50, num_beams=5):
     """
     Generate a caption for the input image using BLIP model
@@ -441,6 +458,7 @@ def generate_image_caption(image, processor, model, max_length=50, num_beams=5):
         image (PIL.Image): Input image
         processor: BLIP processor
         model: BLIP model
         max_length (int): Maximum length of the caption
         num_beams (int): Number of beams for beam search
@@ -448,8 +466,11 @@ def generate_image_caption(image, processor, model, max_length=50, num_beams=5):
         str: Generated caption
     """
     try:
-        # Preprocess the image
-        inputs = processor(image, return_tensors="pt")
         # Check for available GPU
         device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -462,6 +483,11 @@ def generate_image_caption(image, processor, model, max_length=50, num_beams=5):
         # Decode the caption
         caption = processor.decode(output[0], skip_special_tokens=True)
         return caption
     except Exception as e:
         st.error(f"Error generating caption: {str(e)}")
@@ -636,16 +662,21 @@ def main():
             image = Image.open(uploaded_file).convert("RGB")
             st.image(image, caption="Uploaded Image", use_column_width=True)
-            # Generate image caption if BLIP model is loaded
             if st.session_state.blip_model_loaded:
-                with st.spinner("Generating image caption..."):
                     caption = generate_image_caption(
                         image,
                         st.session_state.blip_processor,
-                        st.session_state.blip_model
                     )
                     st.session_state.image_caption = caption
-                    st.success(f"📝 Image Caption: **{caption}**")
             # Detect with CLIP model if loaded
             if st.session_state.clip_model_loaded:
@@ -694,6 +725,23 @@ def main():
                     # Display GradCAM results
                     st.image(comparison, caption="Original | CAM | Overlay", use_column_width=True)
                     # Save results in session state for LLM analysis
                     st.session_state.current_image = image
                     st.session_state.current_overlay = overlay
@@ -701,89 +749,4 @@ def main():
                     st.session_state.current_pred_label = pred_label
                     st.session_state.current_confidence = confidence
-                    st.success("✅ Initial detection and GradCAM visualization complete!")
-            else:
-                st.warning("⚠️ Please load the CLIP model first to perform initial detection.")
-    # LLM Analysis section
-    with st.expander("Stage 3: Detailed Analysis with Vision LLM", expanded=False):
-        if hasattr(st.session_state, 'current_image') and st.session_state.llm_model_loaded:
-            st.subheader("Detailed Deepfake Analysis")
-            # Include caption in the prompt if available
-            caption_text = ""
-            if hasattr(st.session_state, 'image_caption'):
-                caption_text = f"\n\nImage caption: {st.session_state.image_caption}"
-            # Default question with option to customize
-            default_question = f"This image has been classified as {st.session_state.current_pred_label}.{caption_text} Analyze the key features that led to this classification, focusing on the highlighted areas in the GradCAM visualization. Provide both a technical explanation for experts and a simple explanation for non-technical users."
-            question = st.text_area("Question/Prompt:", value=default_question, height=100)
-            # Analyze button
-            if st.button("🔍 Perform Detailed Analysis", type="primary"):
-                result = analyze_image_with_llm(
-                    st.session_state.current_image,
-                    st.session_state.current_overlay,
-                    st.session_state.current_face_box,
-                    st.session_state.current_pred_label,
-                    st.session_state.current_confidence,
-                    question,
-                    st.session_state.llm_model,
-                    st.session_state.tokenizer,
-                    temperature=temperature,
-                    max_tokens=max_tokens,
-                    custom_instruction=custom_instruction
-                )
-                # Display results
-                st.success("✅ Analysis complete!")
-                # Check if the result contains both technical and non-technical explanations
-                if "Technical" in result and "Non-Technical" in result:
-                    # Split the result into technical and non-technical sections
-                    parts = result.split("Non-Technical")
-                    technical = parts[0]
-                    non_technical = "Non-Technical" + parts[1]
-                    # Display in two columns
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        st.subheader("Technical Analysis")
-                        st.markdown(technical)
-                    with col2:
-                        st.subheader("Simple Explanation")
-                        st.markdown(non_technical)
-                else:
-                    # Just display the whole result
-                    st.subheader("Analysis Result")
-                    st.markdown(result)
-        elif not hasattr(st.session_state, 'current_image'):
-            st.warning("⚠️ Please upload an image and complete the initial detection first.")
-        else:
-            st.warning("⚠️ Please load the Vision LLM to perform detailed analysis.")
-    # Summary section with caption
-    if hasattr(st.session_state, 'current_image') and hasattr(st.session_state, 'image_caption'):
-        with st.expander("Image Caption Summary", expanded=True):
-            st.subheader("Generated Image Description")
-            # Display image and caption
-            col1, col2 = st.columns([1, 2])
-            with col1:
-                st.image(st.session_state.current_image, use_column_width=True)
-            with col2:
-                st.markdown("### BLIP Caption:")
-                st.markdown(f"**{st.session_state.image_caption}**")
-                # Display detection result if available
-                if hasattr(st.session_state, 'current_pred_label'):
-                    st.markdown("### Detection Result:")
-                    st.markdown(f"Classification: **{st.session_state.current_pred_label}** (Confidence: {st.session_state.current_confidence:.2%})")
-    # Footer
-    st.markdown("---")
-    st.caption("Advanced Deepfake Image Analyzer with BLIP Captioning")
-if __name__ == "__main__":
-    main()

             st.error(f"Error loading BLIP model: {str(e)}")
             return None, None
+# Define custom prompts for original and GradCAM images
+ORIGINAL_IMAGE_PROMPT = """Generate a detailed description of this image with the following structure:
+Subject: [Describe the person/main subject]
+Appearance: [Describe clothing, hair, facial features]
+Pose: [Describe the person's pose and expression]
+Background: [Describe the environment and setting]
+Lighting: [Describe lighting conditions and shadows]
+Colors: [Note dominant colors and color palette]
+Notable Elements: [Any distinctive objects or visual elements]"""
+GRADCAM_IMAGE_PROMPT = """Describe the GradCAM visualization overlay with the following structure:
+Main Focus Area: [Identify the primary region highlighted]
+High Activation Regions: [Describe red/yellow areas and corresponding image features]
+Medium Activation Regions: [Describe green/cyan areas and corresponding image features]
+Low Activation Regions: [Describe blue/dark blue areas and corresponding image features]
+Activation Pattern: [Describe the overall pattern of the heatmap]"""
 # Function to generate image caption
+def generate_image_caption(image, processor, model, is_gradcam=False, max_length=75, num_beams=5):
     """
     Generate a caption for the input image using BLIP model
         image (PIL.Image): Input image
         processor: BLIP processor
         model: BLIP model
+        is_gradcam (bool): Whether the image is a GradCAM visualization
         max_length (int): Maximum length of the caption
         num_beams (int): Number of beams for beam search
         str: Generated caption
     """
     try:
+        # Select the appropriate prompt based on image type
+        prompt = GRADCAM_IMAGE_PROMPT if is_gradcam else ORIGINAL_IMAGE_PROMPT
+        # Preprocess the image with the prompt
+        inputs = processor(image, text=prompt, return_tensors="pt")
         # Check for available GPU
         device = "cuda" if torch.cuda.is_available() else "cpu"
         # Decode the caption
         caption = processor.decode(output[0], skip_special_tokens=True)
+        # If the caption contains the prompt, remove it
+        if prompt in caption:
+            caption = caption.replace(prompt, "").strip()
         return caption
     except Exception as e:
         st.error(f"Error generating caption: {str(e)}")
             image = Image.open(uploaded_file).convert("RGB")
             st.image(image, caption="Uploaded Image", use_column_width=True)
+            # Generate detailed caption for original image if BLIP model is loaded
             if st.session_state.blip_model_loaded:
+                with st.spinner("Generating detailed image description..."):
                     caption = generate_image_caption(
                         image,
                         st.session_state.blip_processor,
+                        st.session_state.blip_model,
+                        is_gradcam=False
                     )
                     st.session_state.image_caption = caption
+                    st.success(f"📝 Image Description Generated")
+                    # Format the caption nicely
+                    st.markdown("### Image Description:")
+                    st.markdown(caption)
             # Detect with CLIP model if loaded
             if st.session_state.clip_model_loaded:
                     # Display GradCAM results
                     st.image(comparison, caption="Original | CAM | Overlay", use_column_width=True)
+                    # Generate caption for GradCAM overlay image if BLIP model is loaded
+                    if st.session_state.blip_model_loaded:
+                        with st.spinner("Analyzing GradCAM visualization..."):
+                            gradcam_caption = generate_image_caption(
+                                overlay,
+                                st.session_state.blip_processor,
+                                st.session_state.blip_model,
+                                is_gradcam=True,
+                                max_length=100  # Longer for detailed analysis
+                            )
+                            st.session_state.gradcam_caption = gradcam_caption
+                            st.success("✅ GradCAM analysis complete")
+                            # Format the GradCAM caption nicely
+                            st.markdown("### GradCAM Analysis:")
+                            st.markdown(gradcam_caption)
                     # Save results in session state for LLM analysis
                     st.session_state.current_image = image
                     st.session_state.current_overlay = overlay
                     st.session_state.current_pred_label = pred_label
                     st.session_state.current_confidence = confidence
+                    st.success("✅ Initial detection and GradCAM visualization complete!")