File size: 860 Bytes
56f8e03
8253fcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import gradio as gr
from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image

model = VisionEncoderDecoderModel.from_pretrained("AdamCodd/donut-receipts-extract", use_auth_token=True)
processor = DonutProcessor.from_pretrained("AdamCodd/donut-receipts-extract", use_auth_token=True)

def extract_info(image):
    image = image.convert("RGB")
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    task_prompt = "<s_receipt>"
    decoder_input_ids = processor.tokenizer(task_prompt, return_tensors="pt").input_ids
    outputs = model.generate(pixel_values, decoder_input_ids=decoder_input_ids, max_length=512)
    generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    return generated_text

gr.Interface(fn=extract_info, inputs=gr.Image(type="pil"), outputs="text").launch()