Spaces:
Build error
Build error
File size: 3,906 Bytes
8e34f80 c19e7bb 35e0000 c19e7bb 35e0000 c19e7bb 8e34f80 c19e7bb 35e0000 c19e7bb 35e0000 c19e7bb 8e34f80 c19e7bb 35e0000 c19e7bb 8e34f80 c19e7bb 35e0000 c19e7bb 35e0000 c19e7bb 35e0000 c19e7bb 35e0000 c19e7bb 35e0000 c19e7bb e981e7f c19e7bb 15b96ac c19e7bb 35e0000 c19e7bb 0b76391 080099f 8e34f80 ee4f4a6 c19e7bb ee4f4a6 c19e7bb ee4f4a6 e17785f c19e7bb ee4f4a6 4c0fb4c c19e7bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pandas as pd
from transformers import GitProcessor, GitModel, GitConfig
from PIL import Image
# Load models and processors
git_config = GitConfig.from_pretrained("microsoft/git-large-r")
git_processor_large_textcaps = GitProcessor.from_pretrained("microsoft/git-large-r")
git_model_large_textcaps = GitModel.from_pretrained("microsoft/git-large-r")
itm_model = hub.load("https://tfhub.dev/google/LaViT/1")
use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")
# List of statements for Image-Text Matching
statements = [
# (List of statements as provided in the original code)
]
# Function to generate image caption
def generate_caption(processor, model, image):
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
caption = processor.batch_decode(outputs.logits.argmax(-1), skip_special_tokens=True)
return caption[0]
# Function to compute textual similarity
def compute_textual_similarity(caption, statement):
captions_embeddings = use_model([caption])[0].numpy()
statements_embeddings = use_model([statement])[0].numpy()
similarity_score = np.inner(captions_embeddings, statements_embeddings)
return similarity_score[0]
# Function to compute ITM score
def compute_itm_score(image, statement):
image_features = itm_model(image)
statement_features = use_model([statement])[0].numpy()
similarity_score = np.inner(image_features, statement_features)
return similarity_score[0][0]
# Function to save DataFrame to CSV
def save_dataframe_to_csv(df):
csv_data = df.to_csv(index=False)
return csv_data
# Main function to perform image captioning and image-text matching
def process_image_and_statements(image, file_name):
all_results_list = []
caption = generate_caption(git_processor_large_textcaps, git_model_large_textcaps, image)
for statement in statements:
textual_similarity_score = compute_textual_similarity(caption, statement) * 100
itm_score_statement = compute_itm_score(image, statement) * 100
final_score = 0.5 * textual_similarity_score + 0.5 * itm_score_statement
all_results_list.append({
'Image File Name': file_name, # Include the image file name
'Statement': statement,
'Generated Caption': caption,
'Textual Similarity Score': f"{textual_similarity_score:.2f}%",
'ITM Score': f"{itm_score_statement:.2f}%",
'Final Combined Score': f"{final_score:.2f}%"
})
results_df = pd.DataFrame(all_results_list)
csv_results = save_dataframe_to_csv(results_df)
return results_df, csv_results
# Gradio interface with Image input to receive an image and its file name
image_input = gr.inputs.Image(label="Upload Image", image_mode='RGB', source="upload")
output_df = gr.outputs.Dataframe(type="pandas", label="Results")
output_csv = gr.outputs.File(label="Download CSV")
iface = gr.Interface(
fn=process_image_and_statements,
inputs=image_input,
outputs=[output_df, output_csv],
title="Image Captioning and Image-Text Matching",
# Gradio interface with Image input to receive an image and its file name
image_input = gr.inputs.Image(label="Upload Image", image_mode='RGB', source="upload")
output_df = gr.outputs.Dataframe(label="Results") # Corrected syntax
output_csv = gr.outputs.File(label="Download CSV")
iface = gr.Interface(
fn=process_image_and_statements,
inputs=image_input,
outputs=[output_df, output_csv],
title="Image Captioning and Image-Text Matching",
theme='sudeepshouche/minimalist',
css=".output { flex-direction: column; } .output .outputs { width: 100%; }", # Custom CSS
capture_session=True, # Capture errors and exceptions in Gradio interface
)
iface.launch(debug=True) |