saakshigupta commited on
Commit
062622e
·
verified ·
1 Parent(s): 0a35799

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +228 -71
app.py CHANGED
@@ -26,7 +26,7 @@ st.set_page_config(
26
  )
27
 
28
  # Main title and description
29
- st.title("Deepfake Image Analyzer")
30
  st.markdown("Analyze images for deepfake manipulation with multi-stage analysis")
31
 
32
  # Check for GPU availability
@@ -42,9 +42,25 @@ def check_gpu():
42
  # Sidebar components
43
  st.sidebar.title("Options")
44
 
45
- # Fixed values for temperature and max_tokens (removed sliders)
46
- temperature = 0.7
47
- max_tokens = 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  # Custom instruction text area in sidebar
50
  custom_instruction = st.sidebar.text_area(
@@ -693,73 +709,214 @@ def main():
693
 
694
  # Store caption but don't display it yet
695
 
696
- # Detect with CLIP model if loaded
697
  if st.session_state.clip_model_loaded:
698
- try:
699
- with st.spinner("Analyzing image with CLIP model..."):
700
- # Preprocess image for CLIP
701
- transform = transforms.Compose([
702
- transforms.Resize((224, 224)),
703
- transforms.ToTensor(),
704
- transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]),
705
- ])
706
-
707
- # Create a simple dataset for the image
708
- dataset = ImageDataset(image, transform=transform, face_only=True)
709
- tensor, _, _, _, face_box, _ = dataset[0]
710
- tensor = tensor.unsqueeze(0)
711
-
712
- # Get device
713
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
714
-
715
- # Move model and tensor to device
716
- model = st.session_state.clip_model.to(device)
717
- tensor = tensor.to(device)
718
-
719
- # Forward pass
720
- with torch.no_grad():
721
- outputs = model.vision_model(pixel_values=tensor).pooler_output
722
- logits = model.classification_head(outputs)
723
- probs = torch.softmax(logits, dim=1)[0]
724
- pred_class = torch.argmax(probs).item()
725
- confidence = probs[pred_class].item()
726
- pred_label = "Fake" if pred_class == 1 else "Real"
727
-
728
- # Display results
729
- with col2:
730
- st.markdown("### Detection Result")
731
- st.markdown(f"**Classification:** {pred_label} (Confidence: {confidence:.2%})")
732
-
733
- # GradCAM visualization
734
- st.subheader("GradCAM Visualization")
735
- cam, overlay, comparison, detected_face_box = process_image_with_gradcam(
736
- image, model, device, pred_class
737
- )
738
-
739
- # Display GradCAM results (controlled size)
740
- st.image(comparison, caption="Original | CAM | Overlay", width=700)
741
-
742
- # Generate caption for GradCAM overlay image if BLIP model is loaded
743
- if st.session_state.blip_model_loaded:
744
- with st.spinner("Analyzing GradCAM visualization..."):
745
- gradcam_caption = generate_gradcam_caption(
746
- overlay,
747
- st.session_state.blip_processor,
748
- st.session_state.blip_model
749
- )
750
- st.session_state.gradcam_caption = gradcam_caption
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
 
752
- # Save results in session state for LLM analysis
753
- st.session_state.current_image = image
754
- st.session_state.current_overlay = overlay
755
- st.session_state.current_face_box = detected_face_box
756
- st.session_state.current_pred_label = pred_label
757
- st.session_state.current_confidence = confidence
758
 
759
- st.success("✅ Initial detection and GradCAM visualization complete!")
760
- except Exception as e:
761
- st.error(f"Error analyzing image: {str(e)}")
762
- import traceback
763
- st.error(traceback.format_exc())
764
- else:
765
- st.warning("⚠️ Please load the CLIP model first to perform initial detection.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  )
27
 
28
  # Main title and description
29
+ st.title("Advanced Deepfake Image Analyzer")
30
  st.markdown("Analyze images for deepfake manipulation with multi-stage analysis")
31
 
32
  # Check for GPU availability
 
42
  # Sidebar components
43
  st.sidebar.title("Options")
44
 
45
+ # Temperature slider
46
+ temperature = st.sidebar.slider(
47
+ "Temperature",
48
+ min_value=0.1,
49
+ max_value=1.0,
50
+ value=0.7,
51
+ step=0.1,
52
+ help="Higher values make output more random, lower values more deterministic"
53
+ )
54
+
55
+ # Max response length slider
56
+ max_tokens = st.sidebar.slider(
57
+ "Maximum Response Length",
58
+ min_value=100,
59
+ max_value=1000,
60
+ value=500,
61
+ step=50,
62
+ help="The maximum number of tokens in the response"
63
+ )
64
 
65
  # Custom instruction text area in sidebar
66
  custom_instruction = st.sidebar.text_area(
 
709
 
710
  # Store caption but don't display it yet
711
 
712
+ # Detect with CLIP model if loaded
713
  if st.session_state.clip_model_loaded:
714
+ with st.spinner("Analyzing image with CLIP model..."):
715
+ # Preprocess image for CLIP
716
+ transform = transforms.Compose([
717
+ transforms.Resize((224, 224)),
718
+ transforms.ToTensor(),
719
+ transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]),
720
+ ])
721
+
722
+ # Create a simple dataset for the image
723
+ dataset = ImageDataset(image, transform=transform, face_only=True)
724
+ tensor, _, _, _, face_box, _ = dataset[0]
725
+ tensor = tensor.unsqueeze(0)
726
+
727
+ # Get device
728
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
729
+
730
+ # Move model and tensor to device
731
+ model = st.session_state.clip_model.to(device)
732
+ tensor = tensor.to(device)
733
+
734
+ # Forward pass
735
+ with torch.no_grad():
736
+ outputs = model.vision_model(pixel_values=tensor).pooler_output
737
+ logits = model.classification_head(outputs)
738
+ probs = torch.softmax(logits, dim=1)[0]
739
+ pred_class = torch.argmax(probs).item()
740
+ confidence = probs[pred_class].item()
741
+ pred_label = "Fake" if pred_class == 1 else "Real"
742
+
743
+ # Display results
744
+ with col2:
745
+ st.markdown("### Detection Result")
746
+ st.markdown(f"**Classification:** {pred_label} (Confidence: {confidence:.2%})")
747
+
748
+ # GradCAM visualization
749
+ st.subheader("GradCAM Visualization")
750
+ cam, overlay, comparison, detected_face_box = process_image_with_gradcam(
751
+ image, model, device, pred_class
752
+ )
753
+
754
+ # Display GradCAM results (controlled size)
755
+ st.image(comparison, caption="Original | CAM | Overlay", width=700)
756
+
757
+ # Generate caption for GradCAM overlay image if BLIP model is loaded
758
+ if st.session_state.blip_model_loaded:
759
+ with st.spinner("Analyzing GradCAM visualization..."):
760
+ gradcam_caption = generate_gradcam_caption(
761
+ overlay,
762
+ st.session_state.blip_processor,
763
+ st.session_state.blip_model
764
+ )
765
+ st.session_state.gradcam_caption = gradcam_caption
766
+
767
+ # Store caption but don't display it yet
768
+
769
+ # Save results in session state for LLM analysis
770
+ st.session_state.current_image = image
771
+ st.session_state.current_overlay = overlay
772
+ st.session_state.current_face_box = detected_face_box
773
+ st.session_state.current_pred_label = pred_label
774
+ st.session_state.current_confidence = confidence
775
+
776
+ st.success("✅ Initial detection and GradCAM visualization complete!")
777
+ else:
778
+ st.warning("⚠️ Please load the CLIP model first to perform initial detection.")
779
+ except Exception as e:
780
+ st.error(f"Error processing image: {str(e)}")
781
+ import traceback
782
+ st.error(traceback.format_exc()) # This will show the full error traceback
783
+
784
+ # Image Analysis Summary section - AFTER Stage 2
785
+ if hasattr(st.session_state, 'current_image') and (hasattr(st.session_state, 'image_caption') or hasattr(st.session_state, 'gradcam_caption')):
786
+ with st.expander("Image Analysis Summary", expanded=True):
787
+ st.subheader("Generated Descriptions and Analysis")
788
+
789
+ # Display image, captions, and results in organized layout with proper formatting
790
+ col1, col2 = st.columns([1, 2])
791
+
792
+ with col1:
793
+ # Display original image and overlay side by side with controlled size
794
+ st.image(st.session_state.current_image, caption="Original Image", width=300)
795
+ if hasattr(st.session_state, 'current_overlay'):
796
+ st.image(st.session_state.current_overlay, caption="GradCAM Overlay", width=300)
797
+
798
+ with col2:
799
+ # Detection result
800
+ if hasattr(st.session_state, 'current_pred_label'):
801
+ st.markdown("### Detection Result")
802
+ st.markdown(f"**Classification:** {st.session_state.current_pred_label} (Confidence: {st.session_state.current_confidence:.2%})")
803
+ st.markdown("---")
804
+
805
+ # Image description
806
+ if hasattr(st.session_state, 'image_caption'):
807
+ st.markdown("### Image Description")
808
+ st.markdown(st.session_state.image_caption)
809
+ st.markdown("---")
810
+
811
+ # GradCAM analysis
812
+ if hasattr(st.session_state, 'gradcam_caption'):
813
+ st.markdown("### GradCAM Analysis")
814
+ st.markdown(st.session_state.gradcam_caption)
815
+
816
+ # LLM Analysis section - AFTER Image Analysis Summary
817
+ with st.expander("Stage 3: Detailed Analysis with Vision LLM", expanded=False):
818
+ if hasattr(st.session_state, 'current_image') and st.session_state.llm_model_loaded:
819
+ st.subheader("Detailed Deepfake Analysis")
820
+
821
+ # Display chat history
822
+ for i, (question, answer) in enumerate(st.session_state.chat_history):
823
+ st.markdown(f"**Question {i+1}:** {question}")
824
+ st.markdown(f"**Answer:** {answer}")
825
+ st.markdown("---")
826
+
827
+ # Include both captions in the prompt if available
828
+ caption_text = ""
829
+ if hasattr(st.session_state, 'image_caption'):
830
+ caption_text += f"\n\nImage Description:\n{st.session_state.image_caption}"
831
+
832
+ if hasattr(st.session_state, 'gradcam_caption'):
833
+ caption_text += f"\n\nGradCAM Analysis:\n{st.session_state.gradcam_caption}"
834
+
835
+ # Default question with option to customize
836
+ default_question = f"This image has been classified as {st.session_state.current_pred_label}. Analyze the key features that led to this classification, focusing on the highlighted areas in the GradCAM visualization. Provide both a technical explanation for experts and a simple explanation for non-technical users."
837
+
838
+ # User input for new question
839
+ new_question = st.text_area("Ask a question about the image:", value=default_question if not st.session_state.chat_history else "", height=100)
840
+
841
+ # Analyze button and Clear Chat button in the same row
842
+ col1, col2 = st.columns([3, 1])
843
+ with col1:
844
+ analyze_button = st.button("🔍 Send Question", type="primary")
845
+ with col2:
846
+ clear_button = st.button("🗑️ Clear Chat History")
847
+
848
+ if clear_button:
849
+ st.session_state.chat_history = []
850
+ st.experimental_rerun()
851
+
852
+ if analyze_button and new_question:
853
+ try:
854
+ # Add caption info if it's the first question
855
+ if not st.session_state.chat_history:
856
+ full_question = new_question + caption_text
857
+ else:
858
+ full_question = new_question
859
+
860
+ result = analyze_image_with_llm(
861
+ st.session_state.current_image,
862
+ st.session_state.current_overlay,
863
+ st.session_state.current_face_box,
864
+ st.session_state.current_pred_label,
865
+ st.session_state.current_confidence,
866
+ full_question,
867
+ st.session_state.llm_model,
868
+ st.session_state.tokenizer,
869
+ temperature=temperature,
870
+ max_tokens=max_tokens,
871
+ custom_instruction=custom_instruction
872
+ )
873
+
874
+ # Add to chat history
875
+ st.session_state.chat_history.append((new_question, result))
876
+
877
+ # Display the latest result too
878
+ st.success("✅ Analysis complete!")
879
+
880
+ # Check if the result contains both technical and non-technical explanations
881
+ if "Technical" in result and "Non-Technical" in result:
882
+ try:
883
+ # Split the result into technical and non-technical sections
884
+ parts = result.split("Non-Technical")
885
+ technical = parts[0]
886
+ non_technical = "Non-Technical" + parts[1]
887
 
888
+ # Display in two columns
889
+ tech_col, simple_col = st.columns(2)
890
+ with tech_col:
891
+ st.subheader("Technical Analysis")
892
+ st.markdown(technical)
 
893
 
894
+ with simple_col:
895
+ st.subheader("Simple Explanation")
896
+ st.markdown(non_technical)
897
+ except Exception as e:
898
+ # Fallback if splitting fails
899
+ st.subheader("Analysis Result")
900
+ st.markdown(result)
901
+ else:
902
+ # Just display the whole result
903
+ st.subheader("Analysis Result")
904
+ st.markdown(result)
905
+
906
+ # Rerun to update the chat history display
907
+ st.experimental_rerun()
908
+
909
+ except Exception as e:
910
+ st.error(f"Error during LLM analysis: {str(e)}")
911
+
912
+ elif not hasattr(st.session_state, 'current_image'):
913
+ st.warning("⚠️ Please upload an image and complete the initial detection first.")
914
+ else:
915
+ st.warning("⚠️ Please load the Vision LLM to perform detailed analysis.")
916
+
917
+ # Footer
918
+ st.markdown("---")
919
+ st.caption("Advanced Deepfake Image Analyzer with Structured BLIP Captioning")
920
+
921
+ if __name__ == "__main__":
922
+ main()