saakshigupta commited on
Commit
5d3e972
·
verified ·
1 Parent(s): f08ce84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -26
app.py CHANGED
@@ -408,8 +408,8 @@ def process_image_with_gradcam(image, model, device, pred_class):
408
  def load_blip_model():
409
  with st.spinner("Loading BLIP captioning model..."):
410
  try:
411
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
412
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
413
  return processor, model
414
  except Exception as e:
415
  st.error(f"Error loading BLIP model: {str(e)}")
@@ -418,40 +418,31 @@ def load_blip_model():
418
  # Function to generate image caption using BLIP's VQA approach for GradCAM
419
  def generate_gradcam_caption(image, processor, model, max_length=60):
420
  """
421
- Generate a detailed analysis of GradCAM visualization using multiple questions
422
  """
423
  try:
424
  # Check for available GPU
425
  device = "cuda" if torch.cuda.is_available() else "cpu"
426
  model = model.to(device)
427
 
428
- # Multiple specific questions about the GradCAM visualization
429
- questions = [
430
- "What facial features are highlighted by the red and yellow areas in this heatmap?",
431
- "What does this facial heat map visualization show?",
432
- "What patterns do you see in this facial heatmap visualization?"
433
- ]
434
 
435
- # Get answers to each question
436
- answers = []
437
- for question in questions:
438
- inputs = processor(image, text=question, return_tensors="pt").to(device)
439
- with torch.no_grad():
440
- output = model.generate(**inputs, max_length=max_length, num_beams=5)
441
- answer = processor.decode(output[0], skip_special_tokens=True)
442
- answers.append(answer)
443
 
444
- # Format answers into a structured analysis
 
 
 
445
  structured_output = f"""
446
- **Main Focus Area**: The heatmap is primarily focused on the facial region of the person.
447
-
448
- **High Activation Regions**: The red/yellow areas highlight {answers[0]}
449
-
450
- **Medium Activation Regions**: The green/cyan areas correspond to regions of medium importance in the detection process, typically including parts of the face and surrounding areas.
451
-
452
- **Low Activation Regions**: The blue/dark blue areas represent features that have less impact on the model's decision, usually the background and peripheral elements.
453
 
454
- **Activation Pattern**: {answers[2]}
 
 
 
455
  """
456
  return structured_output.strip()
457
 
 
408
  def load_blip_model():
409
  with st.spinner("Loading BLIP captioning model..."):
410
  try:
411
+ processor = BlipProcessor.from_pretrained("saakshigupta/deepfake-blip-large")
412
+ model = BlipForConditionalGeneration.from_pretrained("saakshigupta/deepfake-blip-large")
413
  return processor, model
414
  except Exception as e:
415
  st.error(f"Error loading BLIP model: {str(e)}")
 
418
  # Function to generate image caption using BLIP's VQA approach for GradCAM
419
  def generate_gradcam_caption(image, processor, model, max_length=60):
420
  """
421
+ Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
422
  """
423
  try:
424
  # Check for available GPU
425
  device = "cuda" if torch.cuda.is_available() else "cpu"
426
  model = model.to(device)
427
 
428
+ # Process image with BLIP
429
+ inputs = processor(image, return_tensors="pt").to(device)
 
 
 
 
430
 
431
+ # Generate caption
432
+ with torch.no_grad():
433
+ output = model.generate(**inputs, max_length=max_length, num_beams=5)
 
 
 
 
 
434
 
435
+ # Decode the output
436
+ caption = processor.decode(output[0], skip_special_tokens=True)
437
+
438
+ # Format into structured analysis
439
  structured_output = f"""
440
+ **Heatmap Analysis**: {caption}
 
 
 
 
 
 
441
 
442
+ **Key Observations**:
443
+ - The red/yellow regions indicate areas of high importance in the detection process
444
+ - The green/cyan areas show regions of medium importance
445
+ - The blue/dark blue regions represent features with lower impact on the model's decision
446
  """
447
  return structured_output.strip()
448