Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import DonutProcessor, VisionEncoderDecoderModel | |
from PIL import Image | |
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa") | |
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa") | |
def extract_info(image): | |
image = image.convert("RGB") | |
pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
# Prompt for question answering (DocVQA) | |
task_prompt = "What is the total amount?" | |
decoder_input_ids = processor.tokenizer(task_prompt, return_tensors="pt").input_ids | |
outputs = model.generate(pixel_values, decoder_input_ids=decoder_input_ids, max_length=512) | |
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
return generated_text | |
gr.Interface(fn=extract_info, inputs=gr.Image(type="pil"), outputs="text", title="Receipt Total Extractor").launch() | |