Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -103,14 +103,12 @@ def convert(pdf_file):
|
|
103 |
if ocr_text.strip():
|
104 |
markdown_output += ocr_text + "\n"
|
105 |
|
106 |
-
# ✅ Detección limitada de imágenes embebidas (hasta 5 contornos grandes)
|
107 |
try:
|
108 |
img_cv = np.array(img)
|
109 |
gray = cv2.cvtColor(img_cv, cv2.COLOR_RGB2GRAY)
|
110 |
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
111 |
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
112 |
|
113 |
-
# Ordenar por área y limitar a 5 regiones grandes
|
114 |
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
|
115 |
|
116 |
for i, cnt in enumerate(contours):
|
@@ -131,27 +129,21 @@ def convert(pdf_file):
|
|
131 |
with open(markdown_path, "w", encoding="utf-8") as f:
|
132 |
f.write(markdown_output)
|
133 |
|
134 |
-
return markdown_output.strip(),
|
135 |
|
|
|
136 |
with gr.Blocks() as demo:
|
137 |
with gr.Row():
|
138 |
pdf_input = gr.File(label="Sube tu PDF", type="filepath")
|
139 |
submit_btn = gr.Button("Procesar PDF")
|
140 |
|
141 |
-
markdown_output = gr.Textbox(label="Markdown estructurado", lines=
|
142 |
-
|
143 |
gallery_output = gr.Gallery(label="Imágenes extraídas", type="file")
|
144 |
download_md = gr.File(label="Descargar .md")
|
145 |
|
146 |
-
with gr.Row():
|
147 |
-
reload_btn = gr.Button("🔄 Refrescar app")
|
148 |
-
|
149 |
-
# Procesar PDF
|
150 |
submit_btn.click(fn=convert, inputs=[pdf_input], outputs=[markdown_output, gallery_output, download_md])
|
151 |
|
152 |
-
#
|
153 |
-
|
154 |
|
155 |
demo.launch()
|
156 |
-
|
157 |
-
|
|
|
103 |
if ocr_text.strip():
|
104 |
markdown_output += ocr_text + "\n"
|
105 |
|
|
|
106 |
try:
|
107 |
img_cv = np.array(img)
|
108 |
gray = cv2.cvtColor(img_cv, cv2.COLOR_RGB2GRAY)
|
109 |
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
110 |
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
111 |
|
|
|
112 |
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
|
113 |
|
114 |
for i, cnt in enumerate(contours):
|
|
|
129 |
with open(markdown_path, "w", encoding="utf-8") as f:
|
130 |
f.write(markdown_output)
|
131 |
|
132 |
+
return markdown_output.strip(), image_paths, markdown_path
|
133 |
|
134 |
+
# Interfaz compatible con Gradio 3.x
|
135 |
with gr.Blocks() as demo:
|
136 |
with gr.Row():
|
137 |
pdf_input = gr.File(label="Sube tu PDF", type="filepath")
|
138 |
submit_btn = gr.Button("Procesar PDF")
|
139 |
|
140 |
+
markdown_output = gr.Textbox(label="Markdown estructurado", lines=25, interactive=True)
|
|
|
141 |
gallery_output = gr.Gallery(label="Imágenes extraídas", type="file")
|
142 |
download_md = gr.File(label="Descargar .md")
|
143 |
|
|
|
|
|
|
|
|
|
144 |
submit_btn.click(fn=convert, inputs=[pdf_input], outputs=[markdown_output, gallery_output, download_md])
|
145 |
|
146 |
+
# Recargar app (link HTML)
|
147 |
+
gr.HTML('<a href="" style="display:inline-block;padding:0.5em 1em;background:#eee;border-radius:4px;text-decoration:none;">🔄 Refrescar app</a>')
|
148 |
|
149 |
demo.launch()
|
|
|
|