import gradio as gr import spaces import torch torch.jit.script = lambda f: f from t2v_metrics import VQAScore, list_all_vqascore_models print(list_all_vqascore_models()) # Initialize the model only once model_pipe = None def initialize_model(model_name): print("Initializing model...") global model_pipe if model_pipe is None: model_pipe = VQAScore(model=model_name) # our recommended scoring model print("Model initialized!") return model_pipe model_pipe = initialize_model("clip-flant5-xl") @spaces.GPU def generate(image, text): return model_pipe(image, text) iface = gr.Interface( fn=generate, # function to call # inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="pil"), gr.Textbox(label="Prompt")], # define the types of inputs inputs=[gr.Image(type="pil"), gr.Textbox(label="Prompt")], # define the types of inputs outputs="number", # define the type of output title="VQAScore", # title of the app description="This model evaluates the similarity between an image and a text prompt." ).launch()