Update app.py
Browse files
app.py
CHANGED
@@ -403,17 +403,23 @@ def process_image_with_gradcam(image, model, device, pred_class):
|
|
403 |
|
404 |
# ----- BLIP Image Captioning -----
|
405 |
|
406 |
-
# Function to load BLIP captioning
|
407 |
@st.cache_resource
|
408 |
-
def
|
409 |
-
with st.spinner("Loading BLIP captioning
|
410 |
try:
|
411 |
-
|
412 |
-
|
413 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
414 |
except Exception as e:
|
415 |
-
st.error(f"Error loading BLIP
|
416 |
-
return None, None
|
417 |
|
418 |
# Function to generate image caption using BLIP's VQA approach for GradCAM
|
419 |
def generate_gradcam_caption(image, processor, model, max_length=60):
|
@@ -452,7 +458,7 @@ def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
452 |
|
453 |
# Function to generate caption for original image
|
454 |
def generate_image_caption(image, processor, model, max_length=75, num_beams=5):
|
455 |
-
"""Generate a caption for the original image using BLIP model"""
|
456 |
try:
|
457 |
# Check for available GPU
|
458 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -594,8 +600,10 @@ def main():
|
|
594 |
|
595 |
if 'blip_model_loaded' not in st.session_state:
|
596 |
st.session_state.blip_model_loaded = False
|
597 |
-
st.session_state.
|
598 |
-
st.session_state.
|
|
|
|
|
599 |
|
600 |
# Initialize chat history
|
601 |
if 'chat_history' not in st.session_state:
|
@@ -625,17 +633,19 @@ def main():
|
|
625 |
with blip_col:
|
626 |
if not st.session_state.blip_model_loaded:
|
627 |
if st.button("π₯ Load BLIP for Captioning", type="primary"):
|
628 |
-
# Load BLIP
|
629 |
-
|
630 |
-
if
|
631 |
-
st.session_state.
|
632 |
-
st.session_state.
|
|
|
|
|
633 |
st.session_state.blip_model_loaded = True
|
634 |
-
st.success("β
BLIP captioning
|
635 |
else:
|
636 |
-
st.error("β Failed to load BLIP
|
637 |
else:
|
638 |
-
st.success("β
BLIP captioning
|
639 |
|
640 |
with llm_col:
|
641 |
if not st.session_state.llm_model_loaded:
|
@@ -672,8 +682,8 @@ def main():
|
|
672 |
with st.spinner("Generating image description..."):
|
673 |
caption = generate_image_caption(
|
674 |
image,
|
675 |
-
st.session_state.
|
676 |
-
st.session_state.
|
677 |
)
|
678 |
st.session_state.image_caption = caption
|
679 |
|
@@ -729,8 +739,8 @@ def main():
|
|
729 |
with st.spinner("Analyzing GradCAM visualization..."):
|
730 |
gradcam_caption = generate_gradcam_caption(
|
731 |
overlay,
|
732 |
-
st.session_state.
|
733 |
-
st.session_state.
|
734 |
)
|
735 |
st.session_state.gradcam_caption = gradcam_caption
|
736 |
|
|
|
403 |
|
404 |
# ----- BLIP Image Captioning -----
|
405 |
|
406 |
+
# Function to load BLIP captioning models
|
407 |
@st.cache_resource
|
408 |
+
def load_blip_models():
|
409 |
+
with st.spinner("Loading BLIP captioning models..."):
|
410 |
try:
|
411 |
+
# Load original BLIP model for general image captioning
|
412 |
+
original_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
|
413 |
+
original_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
|
414 |
+
|
415 |
+
# Load fine-tuned BLIP model for GradCAM analysis
|
416 |
+
finetuned_processor = BlipProcessor.from_pretrained("saakshigupta/deepfake-blip-large")
|
417 |
+
finetuned_model = BlipForConditionalGeneration.from_pretrained("saakshigupta/deepfake-blip-large")
|
418 |
+
|
419 |
+
return original_processor, original_model, finetuned_processor, finetuned_model
|
420 |
except Exception as e:
|
421 |
+
st.error(f"Error loading BLIP models: {str(e)}")
|
422 |
+
return None, None, None, None
|
423 |
|
424 |
# Function to generate image caption using BLIP's VQA approach for GradCAM
|
425 |
def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
|
458 |
|
459 |
# Function to generate caption for original image
|
460 |
def generate_image_caption(image, processor, model, max_length=75, num_beams=5):
|
461 |
+
"""Generate a caption for the original image using the original BLIP model"""
|
462 |
try:
|
463 |
# Check for available GPU
|
464 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
600 |
|
601 |
if 'blip_model_loaded' not in st.session_state:
|
602 |
st.session_state.blip_model_loaded = False
|
603 |
+
st.session_state.original_processor = None
|
604 |
+
st.session_state.original_model = None
|
605 |
+
st.session_state.finetuned_processor = None
|
606 |
+
st.session_state.finetuned_model = None
|
607 |
|
608 |
# Initialize chat history
|
609 |
if 'chat_history' not in st.session_state:
|
|
|
633 |
with blip_col:
|
634 |
if not st.session_state.blip_model_loaded:
|
635 |
if st.button("π₯ Load BLIP for Captioning", type="primary"):
|
636 |
+
# Load BLIP models
|
637 |
+
original_processor, original_model, finetuned_processor, finetuned_model = load_blip_models()
|
638 |
+
if all([original_processor, original_model, finetuned_processor, finetuned_model]):
|
639 |
+
st.session_state.original_processor = original_processor
|
640 |
+
st.session_state.original_model = original_model
|
641 |
+
st.session_state.finetuned_processor = finetuned_processor
|
642 |
+
st.session_state.finetuned_model = finetuned_model
|
643 |
st.session_state.blip_model_loaded = True
|
644 |
+
st.success("β
BLIP captioning models loaded successfully!")
|
645 |
else:
|
646 |
+
st.error("β Failed to load BLIP models.")
|
647 |
else:
|
648 |
+
st.success("β
BLIP captioning models loaded and ready!")
|
649 |
|
650 |
with llm_col:
|
651 |
if not st.session_state.llm_model_loaded:
|
|
|
682 |
with st.spinner("Generating image description..."):
|
683 |
caption = generate_image_caption(
|
684 |
image,
|
685 |
+
st.session_state.original_processor,
|
686 |
+
st.session_state.original_model
|
687 |
)
|
688 |
st.session_state.image_caption = caption
|
689 |
|
|
|
739 |
with st.spinner("Analyzing GradCAM visualization..."):
|
740 |
gradcam_caption = generate_gradcam_caption(
|
741 |
overlay,
|
742 |
+
st.session_state.finetuned_processor,
|
743 |
+
st.session_state.finetuned_model
|
744 |
)
|
745 |
st.session_state.gradcam_caption = gradcam_caption
|
746 |
|