import spaces import gradio as gr from pdf2image import convert_from_path import pytesseract from PIL import Image import os @spaces.GPU def convert(pdf_file): pages = convert_from_path(pdf_file) markdown_output = "" metadata = {} # Opcional: puedes extraer metadata con PyMuPDF si lo deseas for idx, page_image in enumerate(pages): # Realizar OCR text = pytesseract.image_to_string(page_image) if text.strip() == "": # Si no hay texto, insertar un enlace vacĂ­o markdown_output += f"[imagen]()\n\n" else: markdown_output += text.strip() + "\n\n" return markdown_output.strip(), metadata gr.Interface( convert, inputs=[ gr.File(label="Upload PDF", type="filepath"), ], outputs=[ gr.Text(label="Markdown"), gr.JSON(label="Metadata"), ], ).launch()