Biifruu commited on
Commit
f2c9885
·
verified ·
1 Parent(s): 0c78c99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -25
app.py CHANGED
@@ -18,12 +18,7 @@ def extract_text_markdown(doc):
18
  for line in b["lines"]:
19
  line_y = line["bbox"][1]
20
  spans = line["spans"]
21
-
22
- if len(spans) > 1:
23
- line_text = " | ".join(span["text"].strip() for span in spans)
24
- else:
25
- line_text = " ".join(span["text"].strip() for span in spans)
26
-
27
  if line_text:
28
  elements.append((line_y, line_text))
29
  elif b["type"] == 1: # Imagen
@@ -56,34 +51,24 @@ def convert(pdf_file):
56
  doc = original_doc
57
 
58
  markdown = extract_text_markdown(doc)
59
- metadata = {} # Puedes rellenar si quieres
60
  return markdown, metadata
61
 
62
  # Gradio Blocks UI
63
- with gr.Blocks(title="Extractor PDF a Markdown") as demo:
64
- gr.Markdown("### PDF → Markdown con imágenes como enlaces y botón de copiar")
65
 
66
  pdf_input = gr.File(label="Sube tu PDF", type="filepath")
67
- markdown_output = gr.Textbox(label="Markdown generado", lines=25)
68
  metadata_output = gr.JSON(label="Metadata")
69
 
70
- hidden_textarea = gr.Textbox(visible=False)
71
 
72
- with gr.Row():
73
- convert_btn = gr.Button("Convertir PDF")
74
- copy_btn = gr.Button("📋 Copiar Markdown")
 
75
 
76
  convert_btn.click(fn=convert, inputs=pdf_input, outputs=[markdown_output, metadata_output])
77
- # Al hacer clic en copiar, movemos el contenido visible al invisible y ejecutamos JS
78
- copy_btn.click(lambda text: text, inputs=markdown_output, outputs=hidden_textarea).then(
79
- None,
80
- _js="""
81
- () => {
82
- const text = document.querySelectorAll("textarea")[1].value;
83
- navigator.clipboard.writeText(text);
84
- alert("¡Markdown copiado al portapapeles!");
85
- }
86
- """
87
- )
88
 
89
  demo.launch()
 
18
  for line in b["lines"]:
19
  line_y = line["bbox"][1]
20
  spans = line["spans"]
21
+ line_text = " ".join(span["text"].strip() for span in spans)
 
 
 
 
 
22
  if line_text:
23
  elements.append((line_y, line_text))
24
  elif b["type"] == 1: # Imagen
 
51
  doc = original_doc
52
 
53
  markdown = extract_text_markdown(doc)
54
+ metadata = {} # Puedes personalizarlo si quieres
55
  return markdown, metadata
56
 
57
  # Gradio Blocks UI
58
+ with gr.Blocks(title="PDF Markdown") as demo:
59
+ gr.Markdown("### PDF → Markdown con enlaces de imagen y botón copiar")
60
 
61
  pdf_input = gr.File(label="Sube tu PDF", type="filepath")
62
+ markdown_output = gr.Textbox(label="Markdown generado", lines=25, elem_id="markdown-box")
63
  metadata_output = gr.JSON(label="Metadata")
64
 
65
+ convert_btn = gr.Button("Convertir PDF")
66
 
67
+ # HTML manual con botón copiar
68
+ gr.HTML("""
69
+ <button onclick="navigator.clipboard.writeText(document.getElementById('markdown-box').value)">📋 Copiar Markdown</button>
70
+ """)
71
 
72
  convert_btn.click(fn=convert, inputs=pdf_input, outputs=[markdown_output, metadata_output])
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  demo.launch()