lordpotato commited on
Commit
b155593
·
1 Parent(s): 1d0aab4

added blip model as well as final captioning project notebook

Browse files
Image_Captioning_Project.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -1,17 +1,30 @@
1
  import gradio as gr
2
  from scripts.generate_image_caption import predict_caption
 
 
 
 
 
 
 
 
3
 
4
  # Create the Gradio interface
5
  iface = gr.Interface(
6
- fn=predict_caption,
7
- inputs=gr.Image(type="filepath", label="Upload Image"),
8
- outputs=[
9
- gr.Textbox(label="Greedy Search Caption"),
10
- gr.Textbox(label="Beam Search Caption"),
 
 
 
 
 
 
 
 
11
  ],
12
- title="Image Captioning with Greedy and Beam Search",
13
- description="Upload an image to generate two different captions using Greedy Search and Beam Search.",
14
- examples=[["examples/fight.jpg"],["examples/101669240_b2d3e7f17b.jpg"]],
15
  )
16
 
17
  # Launch the interface
 
1
  import gradio as gr
2
  from scripts.generate_image_caption import predict_caption
3
+ from scripts.blip_model import generate_blip_caption
4
+
5
+ def get_caption(image_path, model_choice):
6
+ if model_choice == "BLIP":
7
+ return generate_blip_caption(image_path)
8
+ elif model_choice == "CNN_LSTM":
9
+ greedy_caption, beam_caption = predict_caption(image_path)
10
+ return f"Greedy Search: {greedy_caption}\nBeam Search: {beam_caption}"
11
 
12
  # Create the Gradio interface
13
  iface = gr.Interface(
14
+ fn=get_caption,
15
+ inputs=[
16
+ gr.Image(type="filepath", label="Upload Image"),
17
+ gr.Dropdown(
18
+ ["BLIP", "CNN_LSTM"], label="Choose Model", value="BLIP"
19
+ ),
20
+ ],
21
+ outputs=gr.Textbox(label="Generated Caption"),
22
+ title="Image Captioning with BLIP and CNN-LSTM",
23
+ description="Upload an image and choose a model to generate a caption.",
24
+ examples=[
25
+ ["examples/fight.jpg"],
26
+ ["examples/101669240_b2d3e7f17b.jpg"],
27
  ],
 
 
 
28
  )
29
 
30
  # Launch the interface
notebooks/archived_versions/Image_Captioning_Project_with_trashed.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -3,3 +3,5 @@ tensorflow==2.18.0
3
  numpy
4
  requests
5
  pillow
 
 
 
3
  numpy
4
  requests
5
  pillow
6
+ torch
7
+ transformers
scripts/blip_model.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BlipProcessor, BlipForConditionalGeneration
2
+ from PIL import Image
3
+
4
+ # Load the pre-trained BLIP model and processor
5
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
6
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
7
+
8
+ def generate_blip_caption(image_path):
9
+ """
10
+ Generates a caption for a given image using the BLIP model.
11
+
12
+ Args:
13
+ image_path (str): The path to the image file.
14
+
15
+ Returns:
16
+ str: The generated caption.
17
+ """
18
+ # Open the image
19
+ image = Image.open(image_path).convert("RGB")
20
+
21
+ # Preprocess the image and generate the caption
22
+ inputs = processor(images=image, return_tensors="pt")
23
+ outputs = model.generate(**inputs)
24
+
25
+ # Decode the generated caption
26
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
27
+
28
+ return caption