Biifruu commited on
Commit
bf60276
·
verified ·
1 Parent(s): f9fc4b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -16
app.py CHANGED
@@ -1,10 +1,11 @@
 
 
1
  import fitz # PyMuPDF
2
  from PIL import Image
3
  import pytesseract
4
  import os
5
  import numpy as np
6
  import cv2
7
- import gradio as gr
8
 
9
  def clean_ocr_text(text):
10
  lines = text.splitlines()
@@ -68,8 +69,14 @@ def extract_text_markdown(doc, image_paths, page_index, seen_xrefs):
68
  markdown_output += "\n---\n\n"
69
  return markdown_output.strip()
70
 
71
- def convert(pdf_file_path):
72
- doc = fitz.open(pdf_file_path)
 
 
 
 
 
 
73
  markdown_output = ""
74
  image_paths = []
75
  seen_xrefs = set()
@@ -128,16 +135,16 @@ def convert(pdf_file_path):
128
 
129
  return markdown_output.strip(), image_paths, markdown_path
130
 
131
- # --- Interfaz Gradio API-compatible ---
132
- iface = gr.Interface(
133
- fn=convert,
134
- inputs=gr.File(type="filepath", label="Archivo PDF"),
135
- outputs=[
136
- gr.Textbox(label="Markdown generado", lines=25),
137
- gr.Gallery(label="Imágenes extraídas", type="file"),
138
- gr.File(label="Descargar Markdown")
139
- ],
140
- title="Conversor PDF a Markdown"
141
- )
142
-
143
- iface.launch()
 
1
+ import spaces
2
+ import gradio as gr
3
  import fitz # PyMuPDF
4
  from PIL import Image
5
  import pytesseract
6
  import os
7
  import numpy as np
8
  import cv2
 
9
 
10
  def clean_ocr_text(text):
11
  lines = text.splitlines()
 
69
  markdown_output += "\n---\n\n"
70
  return markdown_output.strip()
71
 
72
+ @spaces.GPU
73
+ def convert(inputs):
74
+ # <- inputs es una lista, aunque solo tenga 1 archivo
75
+ pdf_file = inputs[0]
76
+ if not os.path.exists(pdf_file):
77
+ return "Archivo no encontrado", [], ""
78
+
79
+ doc = fitz.open(pdf_file)
80
  markdown_output = ""
81
  image_paths = []
82
  seen_xrefs = set()
 
135
 
136
  return markdown_output.strip(), image_paths, markdown_path
137
 
138
+ # Interfaz Gradio
139
+ with gr.Blocks() as demo:
140
+ with gr.Row():
141
+ pdf_input = gr.File(label="Sube tu PDF", type="filepath")
142
+ submit_btn = gr.Button("Procesar PDF")
143
+
144
+ markdown_output = gr.Textbox(label="Markdown estructurado", lines=25, interactive=True)
145
+ gallery_output = gr.Gallery(label="Imágenes extraídas", type="file")
146
+ download_md = gr.File(label="Descargar .md")
147
+
148
+ submit_btn.click(fn=convert, inputs=[pdf_input], outputs=[markdown_output, gallery_output, download_md])
149
+
150
+ demo.launch()