cosine-match / app.py
iamrobotbear's picture
trying to include image name in the csv still
0b76391
raw
history blame
3.91 kB
import gradio as gr
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pandas as pd
from transformers import GitProcessor, GitModel, GitConfig
from PIL import Image
# Load models and processors
git_config = GitConfig.from_pretrained("microsoft/git-large-r")
git_processor_large_textcaps = GitProcessor.from_pretrained("microsoft/git-large-r")
git_model_large_textcaps = GitModel.from_pretrained("microsoft/git-large-r")
itm_model = hub.load("https://tfhub.dev/google/LaViT/1")
use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")
# List of statements for Image-Text Matching
statements = [
# (List of statements as provided in the original code)
]
# Function to generate image caption
def generate_caption(processor, model, image):
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
caption = processor.batch_decode(outputs.logits.argmax(-1), skip_special_tokens=True)
return caption[0]
# Function to compute textual similarity
def compute_textual_similarity(caption, statement):
captions_embeddings = use_model([caption])[0].numpy()
statements_embeddings = use_model([statement])[0].numpy()
similarity_score = np.inner(captions_embeddings, statements_embeddings)
return similarity_score[0]
# Function to compute ITM score
def compute_itm_score(image, statement):
image_features = itm_model(image)
statement_features = use_model([statement])[0].numpy()
similarity_score = np.inner(image_features, statement_features)
return similarity_score[0][0]
# Function to save DataFrame to CSV
def save_dataframe_to_csv(df):
csv_data = df.to_csv(index=False)
return csv_data
# Main function to perform image captioning and image-text matching
def process_image_and_statements(image, file_name):
all_results_list = []
caption = generate_caption(git_processor_large_textcaps, git_model_large_textcaps, image)
for statement in statements:
textual_similarity_score = compute_textual_similarity(caption, statement) * 100
itm_score_statement = compute_itm_score(image, statement) * 100
final_score = 0.5 * textual_similarity_score + 0.5 * itm_score_statement
all_results_list.append({
'Image File Name': file_name, # Include the image file name
'Statement': statement,
'Generated Caption': caption,
'Textual Similarity Score': f"{textual_similarity_score:.2f}%",
'ITM Score': f"{itm_score_statement:.2f}%",
'Final Combined Score': f"{final_score:.2f}%"
})
results_df = pd.DataFrame(all_results_list)
csv_results = save_dataframe_to_csv(results_df)
return results_df, csv_results
# Gradio interface with Image input to receive an image and its file name
image_input = gr.inputs.Image(label="Upload Image", image_mode='RGB', source="upload")
output_df = gr.outputs.Dataframe(type="pandas", label="Results")
output_csv = gr.outputs.File(label="Download CSV")
iface = gr.Interface(
fn=process_image_and_statements,
inputs=image_input,
outputs=[output_df, output_csv],
title="Image Captioning and Image-Text Matching",
# Gradio interface with Image input to receive an image and its file name
image_input = gr.inputs.Image(label="Upload Image", image_mode='RGB', source="upload")
output_df = gr.outputs.Dataframe(label="Results") # Corrected syntax
output_csv = gr.outputs.File(label="Download CSV")
iface = gr.Interface(
fn=process_image_and_statements,
inputs=image_input,
outputs=[output_df, output_csv],
title="Image Captioning and Image-Text Matching",
theme='sudeepshouche/minimalist',
css=".output { flex-direction: column; } .output .outputs { width: 100%; }", # Custom CSS
capture_session=True, # Capture errors and exceptions in Gradio interface
)
iface.launch(debug=True)