tools / app.py
not-lain's picture
add markdown support
6f72e4f
raw
history blame
3.14 kB
import gradio as gr
from pdfitdown.pdfconversion import Converter
import fitz
from typing import List
from PIL import Image
from loadimg import load_img
import io
from docling.document_converter import DocumentConverter
converter = Converter()
docling_converter = DocumentConverter()
def convert_file_to_pdf(filename:str) -> str:
"""
Converts a markdown file to PDF format.
Args:
filename: str
The path to the markdown file to be converted.
Returns:
str: The file path of the generated PDF file.
"""
output_path = filename.name.rsplit('.', 1)[0] + '.pdf'
converter.convert(filename.name, output_path)
return output_path
def convert_file_to_img(image_file:str=None,txt:str="") -> List[Image.Image] :
"""
Convert an image file to PDF format.
Args:
image_file: A file object containing the image to be converted.
The file must be in a format supported by the converter
(e.g., PNG, JPG, JPEG).
Returns:
str: The file path of the generated PDF file. The output filename will be
the same as the input filename but with a .pdf extension.
"""
img_list = []
if txt != "":
img_list.append(load_img(txt,output_type="pil"))
if image_file is not None:
output_path = image_file.name.rsplit('.', 1)[0] + '.pdf'
converter.convert(image_file.name, output_path)
doc = fitz.open(output_path)
for page in doc:
page_bytes = page.get_pixmap().tobytes("png")
img_list.append(load_img(Image.open(io.BytesIO(page_bytes))).convert("RGB"))
doc.close()
return img_list
def convert_file_to_markdown(filename:str) -> str:
"""
Converts a file to markdown format.
Args:
filename: str
The path to the file to be converted.
Returns:
str: The markdown representation of the file.
"""
result = converter.convert(filename)
return result.document.export_to_markdown()
# Create individual interfaces
file_to_pdf = gr.Interface(
fn=convert_file_to_pdf,
inputs=gr.File(label="Upload README/Markdown file"),
outputs=gr.File(label="Converted PDF"),
title="File to PDF Converter",
description="Convert your files to PDF format"
)
file_to_image = gr.Interface(
fn=convert_file_to_img,
inputs=[gr.File(label="Upload Image"),gr.Textbox(label="base64, url")],
outputs=gr.Gallery(label="Converted Images"),
title="File to Images Converter",
description="Convert your images to an image format"
)
file_to_markdown = gr.Interface(
fn=convert_file_to_markdown,
inputs=gr.File(label="Upload File"),
outputs=gr.Textbox(label="Converted Markdown"),
title="File to Markdown Converter",
description="Convert your files to markdown format"
)
# Create tabbed interface
demo = gr.TabbedInterface(
[file_to_pdf, file_to_image, file_to_markdown],
["File to PDF", "File to Image", "File to Markdown"],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, mcp_server=True)