cubuvl commited on
Commit
416819d
·
verified ·
1 Parent(s): 06a7136

anh trong suot

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 
3
 
4
  name = "chanelcolgate/trocr-base-printed_captcha_ocr"
5
  model = VisionEncoderDecoderModel.from_pretrained(name)
@@ -7,13 +8,16 @@ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
7
 
8
 
9
  def process_image(image):
10
- # prepare image
 
 
 
 
 
11
  pixel_values = processor(image, return_tensors="pt").pixel_values
12
 
13
- # generate (no beam search)
14
  generated_ids = model.generate(pixel_values)
15
-
16
- # decode
17
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
18
  return generated_text
19
 
@@ -21,9 +25,13 @@ def process_image(image):
21
  title = "Interactive demo: Captcha OCR"
22
  description = "Demo tracuumasothue captcha"
23
 
24
- interface = gr.Interface(fn=process_image,
25
- inputs="image",
26
- examples=[f"examples/captcha-{i}.png" for i in range(10)],
27
- outputs="text",
28
- title=title,
29
- description=description).launch()
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
+ from PIL import Image
4
 
5
  name = "chanelcolgate/trocr-base-printed_captcha_ocr"
6
  model = VisionEncoderDecoderModel.from_pretrained(name)
 
8
 
9
 
10
  def process_image(image):
11
+ # Chuyển ảnh sang RGB và thêm nền trắng nếu ảnh có alpha
12
+ image = image.convert("RGBA")
13
+ background = Image.new("RGBA", image.size, (255, 255, 255, 255)) # trắng hoàn toàn
14
+ image = Image.alpha_composite(background, image).convert("RGB") # merge và chuyển sang RGB
15
+
16
+ # Chuẩn bị ảnh cho model
17
  pixel_values = processor(image, return_tensors="pt").pixel_values
18
 
19
+ # Sinh kết quả
20
  generated_ids = model.generate(pixel_values)
 
 
21
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
22
  return generated_text
23
 
 
25
  title = "Interactive demo: Captcha OCR"
26
  description = "Demo tracuumasothue captcha"
27
 
28
+ interface = gr.Interface(
29
+ fn=process_image,
30
+ inputs="image",
31
+ examples=[f"examples/captcha-{i}.png" for i in range(10)],
32
+ outputs="text",
33
+ title=title,
34
+ description=description
35
+ )
36
+
37
+ interface.launch()