neoBIT656 commited on
Commit
98b1c09
verified
1 Parent(s): 5bd833b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -7
app.py CHANGED
@@ -1,7 +1,49 @@
1
- torch
2
- transformers
3
- pytorch-lightning
4
- timm
5
- pdf2image
6
- pillow
7
- gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import VisionEncoderDecoderModel, DonutProcessor
3
+ from PIL import Image
4
+ from pdf2image import convert_from_bytes
5
+ import gradio as gr
6
+
7
+ # Configuraci贸n del modelo Donut
8
+ MODEL_ID = "mychen76/invoice-and-receipts_donut_v1" # Cambia si usas otro modelo
9
+ print("Cargando modelo Donut...")
10
+ model = VisionEncoderDecoderModel.from_pretrained(MODEL_ID)
11
+ processor = DonutProcessor.from_pretrained(MODEL_ID)
12
+ model.eval()
13
+
14
+ # Funci贸n para procesar documentos
15
+ def process_document(file):
16
+ # Si es un PDF, convi茅rtelo a im谩genes
17
+ if file.name.endswith(".pdf"):
18
+ pdf_data = file.read()
19
+ images = convert_from_bytes(pdf_data, dpi=300)
20
+ else:
21
+ # Si es imagen, simplemente c谩rgala
22
+ images = [Image.open(file).convert("RGB")]
23
+
24
+ results = []
25
+ for img in images:
26
+ # Preprocesar la imagen
27
+ inputs = processor(img, return_tensors="pt", max_patches=1024)
28
+ # Generar predicciones
29
+ with torch.no_grad():
30
+ outputs = model.generate(**inputs)
31
+ # Decodificar resultado
32
+ result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
33
+ results.append(result)
34
+
35
+ # Combina los resultados si son m煤ltiples p谩ginas
36
+ return results
37
+
38
+ # Interfaz Gradio
39
+ iface = gr.Interface(
40
+ fn=process_document,
41
+ inputs=gr.File(label="Sube tu factura o recibo (PDF o imagen)", type="file"),
42
+ outputs="json",
43
+ title="Donut OCR - Extracci贸n de datos de facturas",
44
+ description="Sube un PDF o imagen y extrae informaci贸n estructurada (n煤mero de factura, fecha, monto, etc.) utilizando Donut OCR."
45
+ )
46
+
47
+ # Iniciar la aplicaci贸n
48
+ if __name__ == "__main__":
49
+ iface.launch()