File size: 1,695 Bytes
145d936
 
d4b4544
beb65ba
b697ac0
145d936
312add7
d4b4544
3920f3b
beb65ba
 
 
c1d7645
beb65ba
d4b4544
 
145d936
d4b4544
beb65ba
d4b4544
 
beb65ba
 
 
 
 
 
 
 
 
 
145d936
beb65ba
 
 
 
 
 
 
 
 
d4b4544
 
 
beb65ba
d4b4544
 
145d936
 
 
c1d7645
 
 
 
 
 
 
145d936
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import spaces
import gradio as gr
import fitz  # PyMuPDF
import os

@spaces.GPU
def convert(pdf_file):
    doc = fitz.open(pdf_file)
    markdown_output = ""
    image_dir = "extracted_images"
    os.makedirs(image_dir, exist_ok=True)
    image_counter = 0

    for page_number, page in enumerate(doc):
        blocks = page.get_text("dict")["blocks"]
        elements = []

        for b in blocks:
            if b["type"] == 0:  # Texto
                for line in b["lines"]:
                    for span in line["spans"]:
                        y = span["bbox"][1]
                        text = span["text"]
                        elements.append((y, text.strip()))
            elif b["type"] == 1:  # Imagen
                y = b["bbox"][1]
                img = page.get_image_list(full=True)
                if img:
                    xref = img[0][0]
                    pix = fitz.Pixmap(doc, xref)
                    img_path = os.path.join(image_dir, f"imagen_{image_counter}.png")

                    if pix.n > 4:  # CMYK
                        pix = fitz.Pixmap(fitz.csRGB, pix)
                    pix.save(img_path)
                    pix = None

                    elements.append((y, f"![imagen]({img_path})"))
                    image_counter += 1

        # Ordenar por posición vertical (y)
        elements.sort(key=lambda x: x[0])

        for _, content in elements:
            markdown_output += content + "\n\n"

    return markdown_output.strip(), {}

gr.Interface(
    convert,
    inputs=[
        gr.File(label="Upload PDF", type="filepath"),
    ],
    outputs=[
        gr.Text(label="Markdown"),
        gr.JSON(label="Metadata"),
    ],
).launch()