IZERE HIRWA Roger commited on
Commit
e4d2e2a
·
1 Parent(s): 21a3c55
Files changed (1) hide show
  1. app.py +20 -35
app.py CHANGED
@@ -1,36 +1,14 @@
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
- import spaces
4
-
5
  import requests
6
  import copy
7
-
8
- from PIL import Image, ImageDraw, ImageFont
9
  import io
10
  import matplotlib.pyplot as plt
11
  import matplotlib.patches as patches
12
-
13
  import random
14
  import numpy as np
15
 
16
- import subprocess
17
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
18
-
19
- models = {
20
- 'microsoft/Florence-2-large-ft': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-large-ft', trust_remote_code=True).to("cpu").eval(),
21
- 'microsoft/Florence-2-large': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True).to("cpu").eval(),
22
- 'microsoft/Florence-2-base-ft': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base-ft', trust_remote_code=True).to("cpu").eval(),
23
- 'microsoft/Florence-2-base': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to("cpu").eval(),
24
- }
25
-
26
- processors = {
27
- 'microsoft/Florence-2-large-ft': AutoProcessor.from_pretrained('microsoft/Florence-2-large-ft', trust_remote_code=True),
28
- 'microsoft/Florence-2-large': AutoProcessor.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True),
29
- 'microsoft/Florence-2-base-ft': AutoProcessor.from_pretrained('microsoft/Florence-2-base-ft', trust_remote_code=True),
30
- 'microsoft/Florence-2-base': AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True),
31
- }
32
-
33
-
34
  DESCRIPTION = "# [Florence-2 Demo](https://huggingface.co/microsoft/Florence-2-large)"
35
 
36
  colormap = ['blue','orange','green','purple','brown','pink','gray','olive','cyan','red',
@@ -42,7 +20,6 @@ def fig_to_pil(fig):
42
  buf.seek(0)
43
  return Image.open(buf)
44
 
45
- @spaces.GPU
46
  def run_example(task_prompt, image, text_input=None, model_id='microsoft/Florence-2-large'):
47
  model = models[model_id]
48
  processor = processors[model_id]
@@ -50,7 +27,7 @@ def run_example(task_prompt, image, text_input=None, model_id='microsoft/Florenc
50
  prompt = task_prompt
51
  else:
52
  prompt = task_prompt + text_input
53
- inputs = processor(text=prompt, images=image, return_tensors="pt").to("cpu")
54
  generated_ids = model.generate(
55
  input_ids=inputs["input_ids"],
56
  pixel_values=inputs["pixel_values"],
@@ -64,7 +41,6 @@ def run_example(task_prompt, image, text_input=None, model_id='microsoft/Florenc
64
  generated_text,
65
  task=task_prompt,
66
  image_size=(image.width, image.height)
67
- )
68
  return parsed_answer
69
 
70
  def plot_bbox(image, data):
@@ -79,7 +55,6 @@ def plot_bbox(image, data):
79
  return fig
80
 
81
  def draw_polygons(image, prediction, fill_mask=False):
82
-
83
  draw = ImageDraw.Draw(image)
84
  scale = 1
85
  for polygons, label in zip(prediction['polygons'], prediction['labels']):
@@ -219,7 +194,22 @@ def process_image(image, task_prompt, text_input=None, model_id='microsoft/Flore
219
  output_image = draw_ocr_bboxes(output_image, results['<OCR_WITH_REGION>'])
220
  return results, output_image
221
  else:
222
- return "", None # Return empty string and None for unknown task prompts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  css = """
225
  #output {
@@ -229,7 +219,6 @@ css = """
229
  }
230
  """
231
 
232
-
233
  single_task_list =[
234
  'Caption', 'Detailed Caption', 'More Detailed Caption', 'Object Detection',
235
  'Dense Region Caption', 'Region Proposal', 'Caption to Phrase Grounding',
@@ -242,15 +231,12 @@ cascased_task_list =[
242
  'Caption + Grounding', 'Detailed Caption + Grounding', 'More Detailed Caption + Grounding'
243
  ]
244
 
245
-
246
  def update_task_dropdown(choice):
247
  if choice == 'Cascased task':
248
- return gr.Dropdown(choices=cascaded_task_list, value='Caption + Grounding')
249
  else:
250
  return gr.Dropdown(choices=single_task_list, value='Caption')
251
 
252
-
253
-
254
  with gr.Blocks(css=css) as demo:
255
  gr.Markdown(DESCRIPTION)
256
  with gr.Tab(label="Florence-2 Image Captioning"):
@@ -258,7 +244,7 @@ with gr.Blocks(css=css) as demo:
258
  with gr.Column():
259
  input_img = gr.Image(label="Input Picture")
260
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='microsoft/Florence-2-large')
261
- task_type = gr.Radio(choices=['Single task', 'Cascaded task'], label='Task type selector', value='Single task')
262
  task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Caption")
263
  task_type.change(fn=update_task_dropdown, inputs=task_type, outputs=task_prompt)
264
  text_input = gr.Textbox(label="Text Input (optional)")
@@ -281,5 +267,4 @@ with gr.Blocks(css=css) as demo:
281
 
282
  submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text, output_img])
283
 
284
- # launch the demo
285
  demo.launch(debug=True)
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
 
 
3
  import requests
4
  import copy
5
+ from PIL import Image, ImageDraw
 
6
  import io
7
  import matplotlib.pyplot as plt
8
  import matplotlib.patches as patches
 
9
  import random
10
  import numpy as np
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  DESCRIPTION = "# [Florence-2 Demo](https://huggingface.co/microsoft/Florence-2-large)"
13
 
14
  colormap = ['blue','orange','green','purple','brown','pink','gray','olive','cyan','red',
 
20
  buf.seek(0)
21
  return Image.open(buf)
22
 
 
23
  def run_example(task_prompt, image, text_input=None, model_id='microsoft/Florence-2-large'):
24
  model = models[model_id]
25
  processor = processors[model_id]
 
27
  prompt = task_prompt
28
  else:
29
  prompt = task_prompt + text_input
30
+ inputs = processor(text=prompt, images=image, return_tensors="pt")
31
  generated_ids = model.generate(
32
  input_ids=inputs["input_ids"],
33
  pixel_values=inputs["pixel_values"],
 
41
  generated_text,
42
  task=task_prompt,
43
  image_size=(image.width, image.height)
 
44
  return parsed_answer
45
 
46
  def plot_bbox(image, data):
 
55
  return fig
56
 
57
  def draw_polygons(image, prediction, fill_mask=False):
 
58
  draw = ImageDraw.Draw(image)
59
  scale = 1
60
  for polygons, label in zip(prediction['polygons'], prediction['labels']):
 
194
  output_image = draw_ocr_bboxes(output_image, results['<OCR_WITH_REGION>'])
195
  return results, output_image
196
  else:
197
+ return "", None
198
+
199
+ # Load models for CPU
200
+ models = {
201
+ 'microsoft/Florence-2-large-ft': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-large-ft', trust_remote_code=True).eval(),
202
+ 'microsoft/Florence-2-large': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True).eval(),
203
+ 'microsoft/Florence-2-base-ft': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base-ft', trust_remote_code=True).eval(),
204
+ 'microsoft/Florence-2-base': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).eval(),
205
+ }
206
+
207
+ processors = {
208
+ 'microsoft/Florence-2-large-ft': AutoProcessor.from_pretrained('microsoft/Florence-2-large-ft', trust_remote_code=True),
209
+ 'microsoft/Florence-2-large': AutoProcessor.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True),
210
+ 'microsoft/Florence-2-base-ft': AutoProcessor.from_pretrained('microsoft/Florence-2-base-ft', trust_remote_code=True),
211
+ 'microsoft/Florence-2-base': AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True),
212
+ }
213
 
214
  css = """
215
  #output {
 
219
  }
220
  """
221
 
 
222
  single_task_list =[
223
  'Caption', 'Detailed Caption', 'More Detailed Caption', 'Object Detection',
224
  'Dense Region Caption', 'Region Proposal', 'Caption to Phrase Grounding',
 
231
  'Caption + Grounding', 'Detailed Caption + Grounding', 'More Detailed Caption + Grounding'
232
  ]
233
 
 
234
  def update_task_dropdown(choice):
235
  if choice == 'Cascased task':
236
+ return gr.Dropdown(choices=cascased_task_list, value='Caption + Grounding')
237
  else:
238
  return gr.Dropdown(choices=single_task_list, value='Caption')
239
 
 
 
240
  with gr.Blocks(css=css) as demo:
241
  gr.Markdown(DESCRIPTION)
242
  with gr.Tab(label="Florence-2 Image Captioning"):
 
244
  with gr.Column():
245
  input_img = gr.Image(label="Input Picture")
246
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='microsoft/Florence-2-large')
247
+ task_type = gr.Radio(choices=['Single task', 'Cascased task'], label='Task type selector', value='Single task')
248
  task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Caption")
249
  task_type.change(fn=update_task_dropdown, inputs=task_type, outputs=task_prompt)
250
  text_input = gr.Textbox(label="Text Input (optional)")
 
267
 
268
  submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text, output_img])
269
 
 
270
  demo.launch(debug=True)