Biifruu commited on
Commit
f9fc4b4
verified
1 Parent(s): 7ad1608

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -22
app.py CHANGED
@@ -1,11 +1,10 @@
1
- import spaces
2
- import gradio as gr
3
  import fitz # PyMuPDF
4
  from PIL import Image
5
  import pytesseract
6
  import os
7
  import numpy as np
8
  import cv2
 
9
 
10
  def clean_ocr_text(text):
11
  lines = text.splitlines()
@@ -22,7 +21,6 @@ def extract_text_markdown(doc, image_paths, page_index, seen_xrefs):
22
  elements = []
23
 
24
  page = doc[0]
25
-
26
  blocks = page.get_text("dict")["blocks"]
27
 
28
  for b in blocks:
@@ -70,9 +68,8 @@ def extract_text_markdown(doc, image_paths, page_index, seen_xrefs):
70
  markdown_output += "\n---\n\n"
71
  return markdown_output.strip()
72
 
73
- @spaces.GPU
74
- def convert(pdf_file):
75
- doc = fitz.open(pdf_file)
76
  markdown_output = ""
77
  image_paths = []
78
  seen_xrefs = set()
@@ -131,19 +128,16 @@ def convert(pdf_file):
131
 
132
  return markdown_output.strip(), image_paths, markdown_path
133
 
134
- # Interfaz Gradio compatible
135
- with gr.Blocks() as demo:
136
- with gr.Row():
137
- pdf_input = gr.File(label="Sube tu PDF", type="filepath")
138
- submit_btn = gr.Button("Procesar PDF")
139
-
140
- # 馃攧 Bot贸n refrescar eliminado
141
-
142
- markdown_output = gr.Textbox(label="Markdown estructurado", lines=25, interactive=True)
143
- gallery_output = gr.Gallery(label="Im谩genes extra铆das", type="file")
144
- download_md = gr.File(label="Descargar .md")
145
-
146
- submit_btn.click(fn=convert, inputs=[pdf_input], outputs=[markdown_output, gallery_output, download_md])
147
-
148
- demo.launch()
149
-
 
 
 
1
  import fitz # PyMuPDF
2
  from PIL import Image
3
  import pytesseract
4
  import os
5
  import numpy as np
6
  import cv2
7
+ import gradio as gr
8
 
9
  def clean_ocr_text(text):
10
  lines = text.splitlines()
 
21
  elements = []
22
 
23
  page = doc[0]
 
24
  blocks = page.get_text("dict")["blocks"]
25
 
26
  for b in blocks:
 
68
  markdown_output += "\n---\n\n"
69
  return markdown_output.strip()
70
 
71
+ def convert(pdf_file_path):
72
+ doc = fitz.open(pdf_file_path)
 
73
  markdown_output = ""
74
  image_paths = []
75
  seen_xrefs = set()
 
128
 
129
  return markdown_output.strip(), image_paths, markdown_path
130
 
131
+ # --- Interfaz Gradio API-compatible ---
132
+ iface = gr.Interface(
133
+ fn=convert,
134
+ inputs=gr.File(type="filepath", label="Archivo PDF"),
135
+ outputs=[
136
+ gr.Textbox(label="Markdown generado", lines=25),
137
+ gr.Gallery(label="Im谩genes extra铆das", type="file"),
138
+ gr.File(label="Descargar Markdown")
139
+ ],
140
+ title="Conversor PDF a Markdown"
141
+ )
142
+
143
+ iface.launch()