File size: 3,968 Bytes
7b19e66
7888f8c
bb2ddfa
 
 
 
72423ad
1400040
579e5f2
 
7b19e66
df1c5ff
1400040
de6b2de
579e5f2
de6b2de
e40d876
 
 
 
df1c5ff
 
e40d876
 
df1c5ff
e40d876
de6b2de
df1c5ff
7b19e66
 
de6b2de
 
e40d876
 
 
 
 
 
 
 
 
 
 
 
bb2ddfa
de6b2de
 
 
 
c52be9c
 
 
 
 
bb2ddfa
 
de6b2de
 
 
6f72e4f
de6b2de
6f72e4f
 
 
 
 
 
1400040
7b19e66
 
579e5f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b19e66
df1c5ff
 
 
7b19e66
df1c5ff
de6b2de
7b19e66
 
6f72e4f
df1c5ff
de6b2de
bb2ddfa
 
de6b2de
7b19e66
 
6f72e4f
 
 
 
 
de6b2de
6f72e4f
 
579e5f2
 
 
 
 
 
 
 
7b19e66
 
579e5f2
 
7b19e66
 
 
de6b2de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
from pdfitdown.pdfconversion import Converter
import fitz
from typing import List
from PIL import Image
from loadimg import load_img
import io
from markitdown import MarkItDown
from docx import Document
import pdfplumber

converter = Converter()
md = MarkItDown()


def convert_file_to_pdf(filename: str) -> str:
    """
    Converts a markdown file to PDF format.

    Args:
        filename: str
            The path to the markdown file to be converted.

    Returns:
        str: The file path of the generated PDF file.
    """
    output_path = filename.name.rsplit(".", 1)[0] + ".pdf"
    converter.convert(filename.name, output_path)
    return output_path


def convert_file_to_img(image_file: str = None, txt: str = "") -> List[Image.Image]:
    """
    Convert an image file to PDF format.

    Args:
        image_file: A file object containing the image to be converted.
                    The file must be in a format supported by the converter
                    (e.g., PNG, JPG, JPEG).

    Returns:
        str: The file path of the generated PDF file. The output filename will be
             the same as the input filename but with a .pdf extension.
    """
    img_list = []
    if txt != "":
        img_list.append(load_img(txt, output_type="pil"))
    if image_file is not None:
        output_path = image_file.name.rsplit(".", 1)[0] + ".pdf"
        converter.convert(image_file.name, output_path)
        doc = fitz.open(output_path)
        for page in doc:
            page_bytes = page.get_pixmap().tobytes("png")
            img_list.append(load_img(Image.open(io.BytesIO(page_bytes))).convert("RGB"))
    doc.close()
    return img_list


def convert_file_to_markdown(filename: str) -> str:
    """
    Converts a file to markdown format using markitdown.
    Args:
        filename: str
            The path to the file to be converted.
    Returns:
        str: The markdown representation of the file.
    """
    return md.convert(filename.name).text_content


def convert_pdf_to_word(filename: str) -> str:
    """
    Converts a PDF file to Word format.

    Args:
        filename: str
            The path to the PDF file to be converted.

    Returns:
        str: The file path of the generated Word file.
    """
    output_path = filename.name.rsplit(".", 1)[0] + ".docx"
    doc = Document()

    with pdfplumber.open(filename.name) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            if text:
                doc.add_paragraph(text)

    doc.save(output_path)
    return output_path


# Create individual interfaces
file_to_pdf = gr.Interface(
    fn=convert_file_to_pdf,
    inputs=gr.File(label="Upload README/Markdown file"),
    outputs=gr.File(label="Converted PDF"),
    title="File to PDF Converter",
    description="Convert your files to PDF format",
)

file_to_image = gr.Interface(
    fn=convert_file_to_img,
    inputs=[gr.File(label="Upload Image"), gr.Textbox(label="base64, url")],
    outputs=gr.Gallery(label="Converted Images"),
    title="File to Images Converter",
    description="Convert your images to an image format",
)

file_to_markdown = gr.Interface(
    fn=convert_file_to_markdown,
    inputs=gr.File(label="Upload File"),
    outputs=gr.Textbox(label="Converted Markdown"),
    title="File to Markdown Converter",
    description="Convert your files to markdown format",
)

pdf_to_word = gr.Interface(
    fn=convert_pdf_to_word,
    inputs=gr.File(label="Upload PDF file"),
    outputs=gr.File(label="Converted Word Document"),
    title="PDF to Word Converter",
    description="Convert your PDF files to Word format",
)

# Create tabbed interface
demo = gr.TabbedInterface(
    [file_to_pdf, file_to_image, file_to_markdown, pdf_to_word],
    ["File to PDF", "File to Image", "File to Markdown", "PDF to Word"],
)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, mcp_server=True)