Spaces:
Build error
Build error
import gradio as gr | |
import tensorflow as tf | |
import tensorflow_hub as hub | |
import numpy as np | |
import pandas as pd | |
from transformers import GitProcessor, GitModel, GitConfig | |
from PIL import Image | |
# Load models and processors | |
git_config = GitConfig.from_pretrained("microsoft/git-large-r") | |
git_processor_large_textcaps = GitProcessor.from_pretrained("microsoft/git-large-r") | |
git_model_large_textcaps = GitModel.from_pretrained("microsoft/git-large-r") | |
itm_model = hub.load("https://tfhub.dev/google/LaViT/1") | |
use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5") | |
# List of statements for Image-Text Matching | |
statements = [ | |
# (List of statements as provided in the original code) | |
] | |
# Function to generate image caption | |
def generate_caption(processor, model, image): | |
inputs = processor(images=image, return_tensors="pt") | |
outputs = model(**inputs) | |
caption = processor.batch_decode(outputs.logits.argmax(-1), skip_special_tokens=True) | |
return caption[0] | |
# Function to compute textual similarity | |
def compute_textual_similarity(caption, statement): | |
captions_embeddings = use_model([caption])[0].numpy() | |
statements_embeddings = use_model([statement])[0].numpy() | |
similarity_score = np.inner(captions_embeddings, statements_embeddings) | |
return similarity_score[0] | |
# Function to compute ITM score | |
def compute_itm_score(image, statement): | |
image_features = itm_model(image) | |
statement_features = use_model([statement])[0].numpy() | |
similarity_score = np.inner(image_features, statement_features) | |
return similarity_score[0][0] | |
# Function to save DataFrame to CSV | |
def save_dataframe_to_csv(df): | |
csv_data = df.to_csv(index=False) | |
return csv_data | |
# Main function to perform image captioning and image-text matching | |
def process_image_and_statements(image, file_name): | |
all_results_list = [] | |
caption = generate_caption(git_processor_large_textcaps, git_model_large_textcaps, image) | |
for statement in statements: | |
textual_similarity_score = compute_textual_similarity(caption, statement) * 100 | |
itm_score_statement = compute_itm_score(image, statement) * 100 | |
final_score = 0.5 * textual_similarity_score + 0.5 * itm_score_statement | |
all_results_list.append({ | |
'Image File Name': file_name, # Include the image file name | |
'Statement': statement, | |
'Generated Caption': caption, | |
'Textual Similarity Score': f"{textual_similarity_score:.2f}%", | |
'ITM Score': f"{itm_score_statement:.2f}%", | |
'Final Combined Score': f"{final_score:.2f}%" | |
}) | |
results_df = pd.DataFrame(all_results_list) | |
csv_results = save_dataframe_to_csv(results_df) | |
return results_df, csv_results | |
# Gradio interface with Image input to receive an image and its file name | |
image_input = gr.inputs.Image(label="Upload Image", image_mode='RGB', source="upload") | |
output_df = gr.outputs.Dataframe(type="pandas", label="Results") | |
output_csv = gr.outputs.File(label="Download CSV") | |
iface = gr.Interface( | |
fn=process_image_and_statements, | |
inputs=image_input, | |
outputs=[output_df, output_csv], | |
title="Image Captioning and Image-Text Matching", | |
# Gradio interface with Image input to receive an image and its file name | |
image_input = gr.inputs.Image(label="Upload Image", image_mode='RGB', source="upload") | |
output_df = gr.outputs.Dataframe(label="Results") # Corrected syntax | |
output_csv = gr.outputs.File(label="Download CSV") | |
iface = gr.Interface( | |
fn=process_image_and_statements, | |
inputs=image_input, | |
outputs=[output_df, output_csv], | |
title="Image Captioning and Image-Text Matching", | |
theme='sudeepshouche/minimalist', | |
css=".output { flex-direction: column; } .output .outputs { width: 100%; }", # Custom CSS | |
capture_session=True, # Capture errors and exceptions in Gradio interface | |
) | |
iface.launch(debug=True) |