Update app.py
Browse files
app.py
CHANGED
@@ -408,8 +408,8 @@ def process_image_with_gradcam(image, model, device, pred_class):
|
|
408 |
def load_blip_model():
|
409 |
with st.spinner("Loading BLIP captioning model..."):
|
410 |
try:
|
411 |
-
processor = BlipProcessor.from_pretrained("
|
412 |
-
model = BlipForConditionalGeneration.from_pretrained("
|
413 |
return processor, model
|
414 |
except Exception as e:
|
415 |
st.error(f"Error loading BLIP model: {str(e)}")
|
@@ -418,40 +418,31 @@ def load_blip_model():
|
|
418 |
# Function to generate image caption using BLIP's VQA approach for GradCAM
|
419 |
def generate_gradcam_caption(image, processor, model, max_length=60):
|
420 |
"""
|
421 |
-
Generate a detailed analysis of GradCAM visualization using
|
422 |
"""
|
423 |
try:
|
424 |
# Check for available GPU
|
425 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
426 |
model = model.to(device)
|
427 |
|
428 |
-
#
|
429 |
-
|
430 |
-
"What facial features are highlighted by the red and yellow areas in this heatmap?",
|
431 |
-
"What does this facial heat map visualization show?",
|
432 |
-
"What patterns do you see in this facial heatmap visualization?"
|
433 |
-
]
|
434 |
|
435 |
-
#
|
436 |
-
|
437 |
-
|
438 |
-
inputs = processor(image, text=question, return_tensors="pt").to(device)
|
439 |
-
with torch.no_grad():
|
440 |
-
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
441 |
-
answer = processor.decode(output[0], skip_special_tokens=True)
|
442 |
-
answers.append(answer)
|
443 |
|
444 |
-
#
|
|
|
|
|
|
|
445 |
structured_output = f"""
|
446 |
-
**
|
447 |
-
|
448 |
-
**High Activation Regions**: The red/yellow areas highlight {answers[0]}
|
449 |
-
|
450 |
-
**Medium Activation Regions**: The green/cyan areas correspond to regions of medium importance in the detection process, typically including parts of the face and surrounding areas.
|
451 |
-
|
452 |
-
**Low Activation Regions**: The blue/dark blue areas represent features that have less impact on the model's decision, usually the background and peripheral elements.
|
453 |
|
454 |
-
**
|
|
|
|
|
|
|
455 |
"""
|
456 |
return structured_output.strip()
|
457 |
|
|
|
408 |
def load_blip_model():
|
409 |
with st.spinner("Loading BLIP captioning model..."):
|
410 |
try:
|
411 |
+
processor = BlipProcessor.from_pretrained("saakshigupta/deepfake-blip-large")
|
412 |
+
model = BlipForConditionalGeneration.from_pretrained("saakshigupta/deepfake-blip-large")
|
413 |
return processor, model
|
414 |
except Exception as e:
|
415 |
st.error(f"Error loading BLIP model: {str(e)}")
|
|
|
418 |
# Function to generate image caption using BLIP's VQA approach for GradCAM
|
419 |
def generate_gradcam_caption(image, processor, model, max_length=60):
|
420 |
"""
|
421 |
+
Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
|
422 |
"""
|
423 |
try:
|
424 |
# Check for available GPU
|
425 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
426 |
model = model.to(device)
|
427 |
|
428 |
+
# Process image with BLIP
|
429 |
+
inputs = processor(image, return_tensors="pt").to(device)
|
|
|
|
|
|
|
|
|
430 |
|
431 |
+
# Generate caption
|
432 |
+
with torch.no_grad():
|
433 |
+
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
|
|
|
|
|
|
|
|
|
|
434 |
|
435 |
+
# Decode the output
|
436 |
+
caption = processor.decode(output[0], skip_special_tokens=True)
|
437 |
+
|
438 |
+
# Format into structured analysis
|
439 |
structured_output = f"""
|
440 |
+
**Heatmap Analysis**: {caption}
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
|
442 |
+
**Key Observations**:
|
443 |
+
- The red/yellow regions indicate areas of high importance in the detection process
|
444 |
+
- The green/cyan areas show regions of medium importance
|
445 |
+
- The blue/dark blue regions represent features with lower impact on the model's decision
|
446 |
"""
|
447 |
return structured_output.strip()
|
448 |
|