Spaces:

provetgrizzner
/

Image_Caption_CNNLSTM

Sleeping

lordpotato commited on Jul 2

Commit

b155593

1 Parent(s): 1d0aab4

added blip model as well as final captioning project notebook

Files changed (5) hide show

Image_Captioning_Project.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -1,17 +1,30 @@
 import gradio as gr
 from scripts.generate_image_caption import predict_caption
 # Create the Gradio interface
 iface = gr.Interface(
-    fn=predict_caption,
-    inputs=gr.Image(type="filepath", label="Upload Image"),
-    outputs=[
-        gr.Textbox(label="Greedy Search Caption"),
-        gr.Textbox(label="Beam Search Caption"),
     ],
-    title="Image Captioning with Greedy and Beam Search",
-    description="Upload an image to generate two different captions using Greedy Search and Beam Search.",
-    examples=[["examples/fight.jpg"],["examples/101669240_b2d3e7f17b.jpg"]],
 )
 # Launch the interface

 import gradio as gr
 from scripts.generate_image_caption import predict_caption
+from scripts.blip_model import generate_blip_caption
+def get_caption(image_path, model_choice):
+    if model_choice == "BLIP":
+        return generate_blip_caption(image_path)
+    elif model_choice == "CNN_LSTM":
+        greedy_caption, beam_caption = predict_caption(image_path)
+        return f"Greedy Search: {greedy_caption}\nBeam Search: {beam_caption}"
 # Create the Gradio interface
 iface = gr.Interface(
+    fn=get_caption,
+    inputs=[
+        gr.Image(type="filepath", label="Upload Image"),
+        gr.Dropdown(
+            ["BLIP", "CNN_LSTM"], label="Choose Model", value="BLIP"
+        ),
+    ],
+    outputs=gr.Textbox(label="Generated Caption"),
+    title="Image Captioning with BLIP and CNN-LSTM",
+    description="Upload an image and choose a model to generate a caption.",
+    examples=[
+        ["examples/fight.jpg"],
+        ["examples/101669240_b2d3e7f17b.jpg"],
     ],
 )
 # Launch the interface

notebooks/archived_versions/Image_Captioning_Project_with_trashed.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -3,3 +3,5 @@ tensorflow==2.18.0
 numpy
 requests
 pillow

 numpy
 requests
 pillow
+torch
+transformers

scripts/blip_model.py ADDED Viewed

+from transformers import BlipProcessor, BlipForConditionalGeneration
+from PIL import Image
+# Load the pre-trained BLIP model and processor
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+def generate_blip_caption(image_path):
+    """
+    Generates a caption for a given image using the BLIP model.
+    Args:
+        image_path (str): The path to the image file.
+    Returns:
+        str: The generated caption.
+    """
+    # Open the image
+    image = Image.open(image_path).convert("RGB")
+    # Preprocess the image and generate the caption
+    inputs = processor(images=image, return_tensors="pt")
+    outputs = model.generate(**inputs)
+    # Decode the generated caption
+    caption = processor.decode(outputs[0], skip_special_tokens=True)
+    return caption