Spaces:

zhiqiulin
/

VQAScore

Running on Zero

VQAScore / app.py

Update app.py

58ed5ef verified about 1 year ago

1.51 kB

	import gradio as gr
	import spaces

	import torch
	torch.jit.script = lambda f: f
	# torch.autocast = lambda device_type, dtype: torch.autocast(device_type, torch.float)

	# Initialize the model only once
	# if torch.cuda.is_available():
	# model_pipe = VQAScore(model="clip-flant5-xl", device="cpu") # our recommended scoring model
	# print("Model initialized!")

	@spaces.GPU
	def generate(model_name, image, text):

	from t2v_metrics import VQAScore, list_all_vqascore_models

	print(list_all_vqascore_models())

	# print("Model_name:", model_name)
	print("Image:", image)
	print("Text:", text)
	model_pipe = VQAScore(model="clip-flant5-xl") # our recommended scoring model
	# print("Model initialized, now moving to cuda")
	model_pipe.to("cuda")
	print("Generating!")
	# with torch.autocast(device_type='cuda'):
	# with torch.autocast(device_type='cuda', dtype=torch.float):
	result = model_pipe(images=[image], texts=[text])
	return result

	iface = gr.Interface(
	fn=generate, # function to call
	inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="filepath"), gr.Textbox(label="Prompt")], # define the types of inputs
	# inputs=[gr.Image(type="filepath"), gr.Textbox(label="Prompt")], # define the types of inputs
	outputs="number", # define the type of output
	title="VQAScore", # title of the app
	description="This model evaluates the similarity between an image and a text prompt."
	).launch()