File size: 3,968 Bytes
7b19e66 7888f8c bb2ddfa 72423ad 1400040 579e5f2 7b19e66 df1c5ff 1400040 de6b2de 579e5f2 de6b2de e40d876 df1c5ff e40d876 df1c5ff e40d876 de6b2de df1c5ff 7b19e66 de6b2de e40d876 bb2ddfa de6b2de c52be9c bb2ddfa de6b2de 6f72e4f de6b2de 6f72e4f 1400040 7b19e66 579e5f2 7b19e66 df1c5ff 7b19e66 df1c5ff de6b2de 7b19e66 6f72e4f df1c5ff de6b2de bb2ddfa de6b2de 7b19e66 6f72e4f de6b2de 6f72e4f 579e5f2 7b19e66 579e5f2 7b19e66 de6b2de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
from pdfitdown.pdfconversion import Converter
import fitz
from typing import List
from PIL import Image
from loadimg import load_img
import io
from markitdown import MarkItDown
from docx import Document
import pdfplumber
converter = Converter()
md = MarkItDown()
def convert_file_to_pdf(filename: str) -> str:
"""
Converts a markdown file to PDF format.
Args:
filename: str
The path to the markdown file to be converted.
Returns:
str: The file path of the generated PDF file.
"""
output_path = filename.name.rsplit(".", 1)[0] + ".pdf"
converter.convert(filename.name, output_path)
return output_path
def convert_file_to_img(image_file: str = None, txt: str = "") -> List[Image.Image]:
"""
Convert an image file to PDF format.
Args:
image_file: A file object containing the image to be converted.
The file must be in a format supported by the converter
(e.g., PNG, JPG, JPEG).
Returns:
str: The file path of the generated PDF file. The output filename will be
the same as the input filename but with a .pdf extension.
"""
img_list = []
if txt != "":
img_list.append(load_img(txt, output_type="pil"))
if image_file is not None:
output_path = image_file.name.rsplit(".", 1)[0] + ".pdf"
converter.convert(image_file.name, output_path)
doc = fitz.open(output_path)
for page in doc:
page_bytes = page.get_pixmap().tobytes("png")
img_list.append(load_img(Image.open(io.BytesIO(page_bytes))).convert("RGB"))
doc.close()
return img_list
def convert_file_to_markdown(filename: str) -> str:
"""
Converts a file to markdown format using markitdown.
Args:
filename: str
The path to the file to be converted.
Returns:
str: The markdown representation of the file.
"""
return md.convert(filename.name).text_content
def convert_pdf_to_word(filename: str) -> str:
"""
Converts a PDF file to Word format.
Args:
filename: str
The path to the PDF file to be converted.
Returns:
str: The file path of the generated Word file.
"""
output_path = filename.name.rsplit(".", 1)[0] + ".docx"
doc = Document()
with pdfplumber.open(filename.name) as pdf:
for page in pdf.pages:
text = page.extract_text()
if text:
doc.add_paragraph(text)
doc.save(output_path)
return output_path
# Create individual interfaces
file_to_pdf = gr.Interface(
fn=convert_file_to_pdf,
inputs=gr.File(label="Upload README/Markdown file"),
outputs=gr.File(label="Converted PDF"),
title="File to PDF Converter",
description="Convert your files to PDF format",
)
file_to_image = gr.Interface(
fn=convert_file_to_img,
inputs=[gr.File(label="Upload Image"), gr.Textbox(label="base64, url")],
outputs=gr.Gallery(label="Converted Images"),
title="File to Images Converter",
description="Convert your images to an image format",
)
file_to_markdown = gr.Interface(
fn=convert_file_to_markdown,
inputs=gr.File(label="Upload File"),
outputs=gr.Textbox(label="Converted Markdown"),
title="File to Markdown Converter",
description="Convert your files to markdown format",
)
pdf_to_word = gr.Interface(
fn=convert_pdf_to_word,
inputs=gr.File(label="Upload PDF file"),
outputs=gr.File(label="Converted Word Document"),
title="PDF to Word Converter",
description="Convert your PDF files to Word format",
)
# Create tabbed interface
demo = gr.TabbedInterface(
[file_to_pdf, file_to_image, file_to_markdown, pdf_to_word],
["File to PDF", "File to Image", "File to Markdown", "PDF to Word"],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, mcp_server=True)
|