import gradio as gr | |
from scripts.generate_image_caption import predict_caption | |
from scripts.blip_model import generate_blip_caption | |
def get_caption(image_path, model_choice): | |
if model_choice == "BLIP": | |
return generate_blip_caption(image_path) | |
elif model_choice == "CNN_LSTM": | |
greedy_caption, beam_caption = predict_caption(image_path) | |
return f"Greedy Search: {greedy_caption}\nBeam Search: {beam_caption}" | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=get_caption, | |
inputs=[ | |
gr.Image(type="filepath", label="Upload Image"), | |
gr.Dropdown( | |
["BLIP", "CNN_LSTM"], label="Choose Model", value="BLIP" | |
), | |
], | |
outputs=gr.Textbox(label="Generated Caption"), | |
title="Image Captioning with BLIP and CNN-LSTM", | |
description="Upload an image and choose a model to generate a caption.", | |
examples=[ | |
["examples/fight.jpg"], | |
["examples/101669240_b2d3e7f17b.jpg"], | |
], | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
iface.launch() | |