import spaces import gradio as gr import fitz # PyMuPDF import os @spaces.GPU def convert(pdf_file): doc = fitz.open(pdf_file) markdown_output = "" image_dir = "extracted_images" os.makedirs(image_dir, exist_ok=True) image_counter = 0 for page_number, page in enumerate(doc): blocks = page.get_text("dict")["blocks"] elements = [] for b in blocks: if b["type"] == 0: # Texto for line in b["lines"]: for span in line["spans"]: y = span["bbox"][1] text = span["text"] elements.append((y, text.strip())) elif b["type"] == 1: # Imagen y = b["bbox"][1] img = page.get_image_list(full=True) if img: xref = img[0][0] pix = fitz.Pixmap(doc, xref) img_path = os.path.join(image_dir, f"imagen_{image_counter}.png") if pix.n > 4: # CMYK pix = fitz.Pixmap(fitz.csRGB, pix) pix.save(img_path) pix = None elements.append((y, f"![imagen]({img_path})")) image_counter += 1 # Ordenar por posición vertical (y) elements.sort(key=lambda x: x[0]) for _, content in elements: markdown_output += content + "\n\n" return markdown_output.strip(), {} gr.Interface( convert, inputs=[ gr.File(label="Upload PDF", type="filepath"), ], outputs=[ gr.Text(label="Markdown"), gr.JSON(label="Metadata"), ], ).launch()