not-lain commited on
Commit
de6b2de
·
1 Parent(s): cbdde8f

switch from docling to markitdown

Browse files
Files changed (2) hide show
  1. app.py +20 -20
  2. requirements.txt +2 -1
app.py CHANGED
@@ -5,12 +5,12 @@ from typing import List
5
  from PIL import Image
6
  from loadimg import load_img
7
  import io
8
- from docling.document_converter import DocumentConverter
9
 
10
  converter = Converter()
11
- docling_converter = DocumentConverter()
12
 
13
- def convert_file_to_pdf(filename:str) -> str:
 
14
  """
15
  Converts a markdown file to PDF format.
16
 
@@ -21,11 +21,12 @@ def convert_file_to_pdf(filename:str) -> str:
21
  Returns:
22
  str: The file path of the generated PDF file.
23
  """
24
- output_path = filename.name.rsplit('.', 1)[0] + '.pdf'
25
  converter.convert(filename.name, output_path)
26
  return output_path
27
 
28
- def convert_file_to_img(image_file:str=None,txt:str="") -> List[Image.Image] :
 
29
  """
30
  Convert an image file to PDF format.
31
 
@@ -39,10 +40,10 @@ def convert_file_to_img(image_file:str=None,txt:str="") -> List[Image.Image] :
39
  the same as the input filename but with a .pdf extension.
40
  """
41
  img_list = []
42
- if txt != "":
43
- img_list.append(load_img(txt,output_type="pil"))
44
- if image_file is not None:
45
- output_path = image_file.name.rsplit('.', 1)[0] + '.pdf'
46
  converter.convert(image_file.name, output_path)
47
  doc = fitz.open(output_path)
48
  for page in doc:
@@ -50,19 +51,18 @@ def convert_file_to_img(image_file:str=None,txt:str="") -> List[Image.Image] :
50
  img_list.append(load_img(Image.open(io.BytesIO(page_bytes))).convert("RGB"))
51
  doc.close()
52
  return img_list
53
-
54
-
55
- def convert_file_to_markdown(filename:str) -> str:
56
  """
57
- Converts a file to markdown format.
58
  Args:
59
  filename: str
60
  The path to the file to be converted.
61
  Returns:
62
  str: The markdown representation of the file.
63
  """
64
- result = docling_converter.convert(filename)
65
- return result.document.export_to_markdown()
66
 
67
 
68
  # Create individual interfaces
@@ -71,15 +71,15 @@ file_to_pdf = gr.Interface(
71
  inputs=gr.File(label="Upload README/Markdown file"),
72
  outputs=gr.File(label="Converted PDF"),
73
  title="File to PDF Converter",
74
- description="Convert your files to PDF format"
75
  )
76
 
77
  file_to_image = gr.Interface(
78
  fn=convert_file_to_img,
79
- inputs=[gr.File(label="Upload Image"),gr.Textbox(label="base64, url")],
80
  outputs=gr.Gallery(label="Converted Images"),
81
  title="File to Images Converter",
82
- description="Convert your images to an image format"
83
  )
84
 
85
  file_to_markdown = gr.Interface(
@@ -87,7 +87,7 @@ file_to_markdown = gr.Interface(
87
  inputs=gr.File(label="Upload File"),
88
  outputs=gr.Textbox(label="Converted Markdown"),
89
  title="File to Markdown Converter",
90
- description="Convert your files to markdown format"
91
  )
92
 
93
  # Create tabbed interface
@@ -97,4 +97,4 @@ demo = gr.TabbedInterface(
97
  )
98
 
99
  if __name__ == "__main__":
100
- demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, mcp_server=True)
 
5
  from PIL import Image
6
  from loadimg import load_img
7
  import io
8
+ import markitdown
9
 
10
  converter = Converter()
 
11
 
12
+
13
+ def convert_file_to_pdf(filename: str) -> str:
14
  """
15
  Converts a markdown file to PDF format.
16
 
 
21
  Returns:
22
  str: The file path of the generated PDF file.
23
  """
24
+ output_path = filename.name.rsplit(".", 1)[0] + ".pdf"
25
  converter.convert(filename.name, output_path)
26
  return output_path
27
 
28
+
29
+ def convert_file_to_img(image_file: str = None, txt: str = "") -> List[Image.Image]:
30
  """
31
  Convert an image file to PDF format.
32
 
 
40
  the same as the input filename but with a .pdf extension.
41
  """
42
  img_list = []
43
+ if txt != "":
44
+ img_list.append(load_img(txt, output_type="pil"))
45
+ if image_file is not None:
46
+ output_path = image_file.name.rsplit(".", 1)[0] + ".pdf"
47
  converter.convert(image_file.name, output_path)
48
  doc = fitz.open(output_path)
49
  for page in doc:
 
51
  img_list.append(load_img(Image.open(io.BytesIO(page_bytes))).convert("RGB"))
52
  doc.close()
53
  return img_list
54
+
55
+
56
+ def convert_file_to_markdown(filename: str) -> str:
57
  """
58
+ Converts a file to markdown format using markitdown.
59
  Args:
60
  filename: str
61
  The path to the file to be converted.
62
  Returns:
63
  str: The markdown representation of the file.
64
  """
65
+ return markitdown.convert(filename.name)
 
66
 
67
 
68
  # Create individual interfaces
 
71
  inputs=gr.File(label="Upload README/Markdown file"),
72
  outputs=gr.File(label="Converted PDF"),
73
  title="File to PDF Converter",
74
+ description="Convert your files to PDF format",
75
  )
76
 
77
  file_to_image = gr.Interface(
78
  fn=convert_file_to_img,
79
+ inputs=[gr.File(label="Upload Image"), gr.Textbox(label="base64, url")],
80
  outputs=gr.Gallery(label="Converted Images"),
81
  title="File to Images Converter",
82
+ description="Convert your images to an image format",
83
  )
84
 
85
  file_to_markdown = gr.Interface(
 
87
  inputs=gr.File(label="Upload File"),
88
  outputs=gr.Textbox(label="Converted Markdown"),
89
  title="File to Markdown Converter",
90
+ description="Convert your files to markdown format",
91
  )
92
 
93
  # Create tabbed interface
 
97
  )
98
 
99
  if __name__ == "__main__":
100
+ demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, mcp_server=True)
requirements.txt CHANGED
@@ -8,4 +8,5 @@ gradio
8
  python-pptx
9
  pdfitdown
10
  loadimg
11
- docling
 
 
8
  python-pptx
9
  pdfitdown
10
  loadimg
11
+ docling
12
+ markitdown