File size: 3,906 Bytes
8e34f80
 
 
c19e7bb
 
 
 
35e0000
c19e7bb
 
 
 
 
 
35e0000
c19e7bb
 
 
 
8e34f80
c19e7bb
 
 
 
 
 
35e0000
c19e7bb
35e0000
c19e7bb
 
 
 
8e34f80
c19e7bb
35e0000
c19e7bb
 
 
 
8e34f80
c19e7bb
35e0000
c19e7bb
 
35e0000
 
c19e7bb
 
35e0000
 
c19e7bb
 
 
 
 
35e0000
c19e7bb
 
 
 
35e0000
c19e7bb
e981e7f
c19e7bb
15b96ac
c19e7bb
 
 
 
35e0000
c19e7bb
 
 
 
 
 
 
 
0b76391
080099f
8e34f80
ee4f4a6
c19e7bb
ee4f4a6
c19e7bb
ee4f4a6
e17785f
c19e7bb
 
ee4f4a6
4c0fb4c
c19e7bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pandas as pd
from transformers import GitProcessor, GitModel, GitConfig
from PIL import Image

# Load models and processors
git_config = GitConfig.from_pretrained("microsoft/git-large-r")
git_processor_large_textcaps = GitProcessor.from_pretrained("microsoft/git-large-r")
git_model_large_textcaps = GitModel.from_pretrained("microsoft/git-large-r")
itm_model = hub.load("https://tfhub.dev/google/LaViT/1")
use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")

# List of statements for Image-Text Matching
statements = [
    # (List of statements as provided in the original code)
]

# Function to generate image caption
def generate_caption(processor, model, image):
    inputs = processor(images=image, return_tensors="pt")
    outputs = model(**inputs)
    caption = processor.batch_decode(outputs.logits.argmax(-1), skip_special_tokens=True)
    return caption[0]

# Function to compute textual similarity
def compute_textual_similarity(caption, statement):
    captions_embeddings = use_model([caption])[0].numpy()
    statements_embeddings = use_model([statement])[0].numpy()
    similarity_score = np.inner(captions_embeddings, statements_embeddings)
    return similarity_score[0]

# Function to compute ITM score
def compute_itm_score(image, statement):
    image_features = itm_model(image)
    statement_features = use_model([statement])[0].numpy()
    similarity_score = np.inner(image_features, statement_features)
    return similarity_score[0][0]

# Function to save DataFrame to CSV
def save_dataframe_to_csv(df):
    csv_data = df.to_csv(index=False)
    return csv_data

# Main function to perform image captioning and image-text matching
def process_image_and_statements(image, file_name):
    all_results_list = []
    caption = generate_caption(git_processor_large_textcaps, git_model_large_textcaps, image)
    for statement in statements:
        textual_similarity_score = compute_textual_similarity(caption, statement) * 100
        itm_score_statement = compute_itm_score(image, statement) * 100
        final_score = 0.5 * textual_similarity_score + 0.5 * itm_score_statement
        all_results_list.append({
            'Image File Name': file_name,  # Include the image file name
            'Statement': statement,
            'Generated Caption': caption,
            'Textual Similarity Score': f"{textual_similarity_score:.2f}%",
            'ITM Score': f"{itm_score_statement:.2f}%",
            'Final Combined Score': f"{final_score:.2f}%"
        })
    results_df = pd.DataFrame(all_results_list)
    csv_results = save_dataframe_to_csv(results_df)
    return results_df, csv_results

# Gradio interface with Image input to receive an image and its file name
image_input = gr.inputs.Image(label="Upload Image", image_mode='RGB', source="upload")
output_df = gr.outputs.Dataframe(type="pandas", label="Results")
output_csv = gr.outputs.File(label="Download CSV")

iface = gr.Interface(
    fn=process_image_and_statements,
    inputs=image_input,
    outputs=[output_df, output_csv],
    title="Image Captioning and Image-Text Matching",
   
# Gradio interface with Image input to receive an image and its file name
image_input = gr.inputs.Image(label="Upload Image", image_mode='RGB', source="upload")
output_df = gr.outputs.Dataframe(label="Results")  # Corrected syntax
output_csv = gr.outputs.File(label="Download CSV")

iface = gr.Interface(
    fn=process_image_and_statements,
    inputs=image_input,
    outputs=[output_df, output_csv],
    title="Image Captioning and Image-Text Matching",
    theme='sudeepshouche/minimalist',
    css=".output { flex-direction: column; } .output .outputs { width: 100%; }", # Custom CSS
    capture_session=True,  # Capture errors and exceptions in Gradio interface
)

iface.launch(debug=True)