GAS17 commited on
Commit
41311bb
·
verified ·
1 Parent(s): 08eaeab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -37
app.py CHANGED
@@ -1,52 +1,48 @@
1
  import gradio as gr
2
- import pytesseract
3
- from PIL import Image
4
- import os
5
- from pdf2image import convert_from_bytes
6
  import io
 
7
 
8
- # Configurar Tesseract para usar el modelo entrenado en Hugging Face Spaces
9
- tessdata_dir = "/home/user/.apt/usr/share/tesseract-ocr/4.00/tessdata"
10
- if os.path.exists(tessdata_dir):
11
- pytesseract.pytesseract.tesseract_cmd = '/home/user/.apt/usr/bin/tesseract'
12
- os.environ["TESSDATA_PREFIX"] = tessdata_dir
13
 
14
  def perform_ocr(file):
15
- if file is None:
16
- return "Por favor, sube un archivo."
17
-
18
- # Verificar si el archivo es un PDF
19
  if file.name.lower().endswith('.pdf'):
20
- # Convertir PDF a imágenes
21
- try:
22
- images = convert_from_bytes(file.read() if hasattr(file, 'read') else file)
23
- except Exception as e:
24
- return f"Error al procesar el PDF: {str(e)}"
25
-
26
  text = ""
27
- for image in images:
28
- text += pytesseract.image_to_string(image) + "\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  else:
30
- # Procesar como imagen
31
- try:
32
- if hasattr(file, 'read'):
33
- image = Image.open(io.BytesIO(file.read()))
34
- else:
35
- image = Image.open(file)
36
- text = pytesseract.image_to_string(image)
37
- except Exception as e:
38
- return f"Error al procesar la imagen: {str(e)}"
39
-
40
- return text
41
 
42
- # Crear la interfaz de Gradio
43
  iface = gr.Interface(
44
  fn=perform_ocr,
45
- inputs=gr.File(label="Sube una imagen o PDF"),
46
  outputs="text",
47
- title="Tesseract OCR para Imágenes y PDFs",
48
- description="Sube una imagen o un archivo PDF para extraer texto usando Tesseract OCR."
49
  )
50
 
51
- # Lanzar la interfaz
52
  iface.launch()
 
 
1
  import gradio as gr
2
+ from doctr.io import DocumentFile
3
+ from doctr.models import ocr_predictor
4
+ import fitz # PyMuPDF
 
5
  import io
6
+ from PIL import Image
7
 
8
+ # Initialize the OCR model
9
+ model = ocr_predictor(pretrained=True)
 
 
 
10
 
11
  def perform_ocr(file):
 
 
 
 
12
  if file.name.lower().endswith('.pdf'):
13
+ # Process PDF
 
 
 
 
 
14
  text = ""
15
+ pdf_document = fitz.open(file.name)
16
+ for page_num in range(pdf_document.page_count):
17
+ page = pdf_document[page_num]
18
+ pix = page.get_pixmap()
19
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
20
+
21
+ # Convert PIL Image to bytes
22
+ img_byte_arr = io.BytesIO()
23
+ img.save(img_byte_arr, format='PNG')
24
+ img_byte_arr = img_byte_arr.getvalue()
25
+
26
+ # Perform OCR on the image
27
+ doc = DocumentFile.from_images(img_byte_arr)
28
+ result = model(doc)
29
+ text += result.render() + "\n\n" # Add newlines between pages
30
+ return text.strip()
31
  else:
32
+ # Process image
33
+ doc = DocumentFile.from_images(file.name)
34
+ result = model(doc)
35
+ return result.render()
 
 
 
 
 
 
 
36
 
37
+ # Create Gradio interface
38
  iface = gr.Interface(
39
  fn=perform_ocr,
40
+ inputs=gr.File(label="Upload PDF or Image"),
41
  outputs="text",
42
+ title="OCR with doctr (PDF and Images)",
43
+ description="Upload a PDF file or an image to extract text using OCR."
44
  )
45
 
46
+ # Launch the interface
47
  iface.launch()
48
+