Spaces:
Runtime error
Runtime error
| from typing import Tuple | |
| import gradio as gr | |
| import supervision as sv | |
| import torch | |
| from PIL import Image | |
| from utils.florence import load_model, run_inference, FLORENCE_DETAILED_CAPTION_TASK, \ | |
| FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK | |
| MARKDOWN = """ | |
| # Florence-2 + SAM2 🔥 | |
| """ | |
| DEVICE = torch.device("cuda") | |
| FLORENCE_MODEL, FLORENCE_PROCESSOR = load_model(device=DEVICE) | |
| BOX_ANNOTATOR = sv.BoxAnnotator(color_lookup=sv.ColorLookup.INDEX) | |
| LABEL_ANNOTATOR = sv.LabelAnnotator(color_lookup=sv.ColorLookup.INDEX) | |
| def process( | |
| image_input, | |
| ) -> Tuple[Image.Image, str]: | |
| _, result = run_inference( | |
| model=FLORENCE_MODEL, | |
| processor=FLORENCE_PROCESSOR, | |
| device=DEVICE, | |
| image=image_input, | |
| task=FLORENCE_DETAILED_CAPTION_TASK | |
| ) | |
| caption = result[FLORENCE_DETAILED_CAPTION_TASK] | |
| _, result = run_inference( | |
| model=FLORENCE_MODEL, | |
| processor=FLORENCE_PROCESSOR, | |
| device=DEVICE, | |
| image=image_input, | |
| task=FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, | |
| text=caption | |
| ) | |
| detections = sv.Detections.from_lmm( | |
| lmm=sv.LMM.FLORENCE_2, | |
| result=result, | |
| resolution_wh=image_input.size | |
| ) | |
| output_image = image_input.copy() | |
| output_image = BOX_ANNOTATOR.annotate(output_image, detections) | |
| output_image = LABEL_ANNOTATOR.annotate(output_image, detections) | |
| return output_image, caption | |
| with gr.Blocks() as demo: | |
| gr.Markdown(MARKDOWN) | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input_component = gr.Image( | |
| type='pil', label='Upload image') | |
| submit_button_component = gr.Button(value='Submit', variant='primary') | |
| with gr.Column(): | |
| image_output_component = gr.Image(type='pil', label='Image output') | |
| text_output_component = gr.Textbox(label='Caption output') | |
| submit_button_component.click( | |
| fn=process, | |
| inputs=[image_input_component], | |
| outputs=[ | |
| image_output_component, | |
| text_output_component | |
| ] | |
| ) | |
| demo.launch(debug=False, show_error=True, max_threads=1) | |