lordpotato
commited on
Commit
·
b155593
1
Parent(s):
1d0aab4
added blip model as well as final captioning project notebook
Browse files- Image_Captioning_Project.ipynb +0 -0
- app.py +21 -8
- notebooks/archived_versions/Image_Captioning_Project_with_trashed.ipynb +0 -0
- requirements.txt +2 -0
- scripts/blip_model.py +28 -0
Image_Captioning_Project.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -1,17 +1,30 @@
|
|
1 |
import gradio as gr
|
2 |
from scripts.generate_image_caption import predict_caption
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
# Create the Gradio interface
|
5 |
iface = gr.Interface(
|
6 |
-
fn=
|
7 |
-
inputs=
|
8 |
-
|
9 |
-
gr.
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
],
|
12 |
-
title="Image Captioning with Greedy and Beam Search",
|
13 |
-
description="Upload an image to generate two different captions using Greedy Search and Beam Search.",
|
14 |
-
examples=[["examples/fight.jpg"],["examples/101669240_b2d3e7f17b.jpg"]],
|
15 |
)
|
16 |
|
17 |
# Launch the interface
|
|
|
1 |
import gradio as gr
|
2 |
from scripts.generate_image_caption import predict_caption
|
3 |
+
from scripts.blip_model import generate_blip_caption
|
4 |
+
|
5 |
+
def get_caption(image_path, model_choice):
|
6 |
+
if model_choice == "BLIP":
|
7 |
+
return generate_blip_caption(image_path)
|
8 |
+
elif model_choice == "CNN_LSTM":
|
9 |
+
greedy_caption, beam_caption = predict_caption(image_path)
|
10 |
+
return f"Greedy Search: {greedy_caption}\nBeam Search: {beam_caption}"
|
11 |
|
12 |
# Create the Gradio interface
|
13 |
iface = gr.Interface(
|
14 |
+
fn=get_caption,
|
15 |
+
inputs=[
|
16 |
+
gr.Image(type="filepath", label="Upload Image"),
|
17 |
+
gr.Dropdown(
|
18 |
+
["BLIP", "CNN_LSTM"], label="Choose Model", value="BLIP"
|
19 |
+
),
|
20 |
+
],
|
21 |
+
outputs=gr.Textbox(label="Generated Caption"),
|
22 |
+
title="Image Captioning with BLIP and CNN-LSTM",
|
23 |
+
description="Upload an image and choose a model to generate a caption.",
|
24 |
+
examples=[
|
25 |
+
["examples/fight.jpg"],
|
26 |
+
["examples/101669240_b2d3e7f17b.jpg"],
|
27 |
],
|
|
|
|
|
|
|
28 |
)
|
29 |
|
30 |
# Launch the interface
|
notebooks/archived_versions/Image_Captioning_Project_with_trashed.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -3,3 +3,5 @@ tensorflow==2.18.0
|
|
3 |
numpy
|
4 |
requests
|
5 |
pillow
|
|
|
|
|
|
3 |
numpy
|
4 |
requests
|
5 |
pillow
|
6 |
+
torch
|
7 |
+
transformers
|
scripts/blip_model.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
2 |
+
from PIL import Image
|
3 |
+
|
4 |
+
# Load the pre-trained BLIP model and processor
|
5 |
+
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
6 |
+
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
7 |
+
|
8 |
+
def generate_blip_caption(image_path):
|
9 |
+
"""
|
10 |
+
Generates a caption for a given image using the BLIP model.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
image_path (str): The path to the image file.
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
str: The generated caption.
|
17 |
+
"""
|
18 |
+
# Open the image
|
19 |
+
image = Image.open(image_path).convert("RGB")
|
20 |
+
|
21 |
+
# Preprocess the image and generate the caption
|
22 |
+
inputs = processor(images=image, return_tensors="pt")
|
23 |
+
outputs = model.generate(**inputs)
|
24 |
+
|
25 |
+
# Decode the generated caption
|
26 |
+
caption = processor.decode(outputs[0], skip_special_tokens=True)
|
27 |
+
|
28 |
+
return caption
|