Spaces:

Futuretop
/

Image-to-Text

Runtime error

App Files Files Community

Image-to-Text / app.py

Futuretop

Update app.py

a8150fc verified 3 months ago

raw

history blame

2.46 kB

	import gradio as gr
	import subprocess
	import torch
	from PIL import Image
	from transformers import AutoProcessor, AutoConfig
	import importlib.util, sys, os

	subprocess.run(
	"pip install --upgrade transformers>=4.50.0",
	shell=True,
	check=True
	)

	model_id = "microsoft/Florence-2-base-ft"

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)

	config_mod_name = config.__class__.__module__
	config_mod = sys.modules[config_mod_name]
	code_dir = os.path.dirname(config_mod.__file__)

	modeling_path = os.path.join(code_dir, "modeling_florence2.py")
	if not os.path.exists(modeling_path):
	raise FileNotFoundError(f"Couldn’t find {modeling_path}")

	spec = importlib.util.spec_from_file_location("florence2_modeling", modeling_path)
	flor_mod = importlib.util.module_from_spec(spec)
	sys.modules["florence2_modeling"] = flor_mod
	spec.loader.exec_module(flor_mod)

	FlorenceLM = flor_mod.Florence2LanguageForConditionalGeneration
	florence_model = FlorenceLM.from_pretrained(
	model_id,
	trust_remote_code=True
	).to(device).eval()
	florence_processor = AutoProcessor.from_pretrained(model, trust_remote_code=True)

	def generate_caption(image):
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt")
	inputs = {k: v.to(device) for k, v in inputs.items()}
	generated_ids = florence_model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	early_stopping=False,
	do_sample=False,
	num_beams=3,
	)
	generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = florence_processor.post_process_generation(
	generated_text,
	task="<MORE_DETAILED_CAPTION>",
	image_size=(image.width, image.height)
	)
	prompt = parsed_answer["<MORE_DETAILED_CAPTION>"]
	print("\n\nGeneration completed!:"+ prompt)
	return prompt

	demo = gr.Interface(generate_caption,
	inputs=[gr.Image(label="Input Image")],
	outputs = [gr.Textbox(label="Output Prompt", lines=3, show_copy_button = True),
	],
	theme="Yntec/HaleyCH_Theme_Orange",
	)
	demo.launch(debug=True)