Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,6 +19,7 @@ os.system('python download_models_hf.py')
|
|
| 19 |
os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
|
| 20 |
|
| 21 |
os.system('cp -r paddleocr /home/user/.paddleocr')
|
|
|
|
| 22 |
from gradio_pdf import PDF
|
| 23 |
|
| 24 |
import gradio as gr
|
|
@@ -110,6 +111,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
|
|
| 110 |
|
| 111 |
|
| 112 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
|
|
|
|
| 113 |
# 获取识别的md文件以及压缩包文件路径
|
| 114 |
local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
|
| 115 |
layout_mode, formula_enable, table_enable, language)
|
|
@@ -202,7 +204,7 @@ if __name__ == "__main__":
|
|
| 202 |
with gr.Row():
|
| 203 |
with gr.Column(variant='panel', scale=5):
|
| 204 |
file = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
|
| 205 |
-
max_pages = gr.Slider(1,
|
| 206 |
with gr.Row():
|
| 207 |
layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], label="Layout model", value="layoutlmv3")
|
| 208 |
language = gr.Dropdown(all_lang, label="Language", value="")
|
|
@@ -213,25 +215,25 @@ if __name__ == "__main__":
|
|
| 213 |
with gr.Row():
|
| 214 |
change_bu = gr.Button("Convert")
|
| 215 |
clear_bu = gr.ClearButton(value="Clear")
|
| 216 |
-
pdf_show = PDF(label=
|
| 217 |
with gr.Accordion("Examples:"):
|
| 218 |
example_root = os.path.join(os.path.dirname(__file__), "examples")
|
| 219 |
gr.Examples(
|
| 220 |
examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
|
| 221 |
_.endswith("pdf")],
|
| 222 |
-
inputs=
|
| 223 |
)
|
| 224 |
|
| 225 |
with gr.Column(variant='panel', scale=5):
|
| 226 |
output_file = gr.File(label="convert result", interactive=False)
|
| 227 |
with gr.Tabs():
|
| 228 |
with gr.Tab("Markdown rendering"):
|
| 229 |
-
md = gr.Markdown(label="Markdown rendering", height=
|
| 230 |
latex_delimiters=latex_delimiters, line_breaks=True)
|
| 231 |
with gr.Tab("Markdown text"):
|
| 232 |
md_text = gr.TextArea(lines=45, show_copy_button=True)
|
| 233 |
-
file.
|
| 234 |
-
change_bu.click(fn=to_markdown, inputs=[
|
| 235 |
outputs=[md, md_text, output_file, pdf_show], api_name=False)
|
| 236 |
clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr, table_enable, language])
|
| 237 |
|
|
|
|
| 19 |
os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
|
| 20 |
|
| 21 |
os.system('cp -r paddleocr /home/user/.paddleocr')
|
| 22 |
+
os.system('pip install -U gradio-pdf')
|
| 23 |
from gradio_pdf import PDF
|
| 24 |
|
| 25 |
import gradio as gr
|
|
|
|
| 111 |
|
| 112 |
|
| 113 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
|
| 114 |
+
file_path = to_pdf(file_path)
|
| 115 |
# 获取识别的md文件以及压缩包文件路径
|
| 116 |
local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
|
| 117 |
layout_mode, formula_enable, table_enable, language)
|
|
|
|
| 204 |
with gr.Row():
|
| 205 |
with gr.Column(variant='panel', scale=5):
|
| 206 |
file = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
|
| 207 |
+
max_pages = gr.Slider(1, 20, 10, step=1, label='Max convert pages')
|
| 208 |
with gr.Row():
|
| 209 |
layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], label="Layout model", value="layoutlmv3")
|
| 210 |
language = gr.Dropdown(all_lang, label="Language", value="")
|
|
|
|
| 215 |
with gr.Row():
|
| 216 |
change_bu = gr.Button("Convert")
|
| 217 |
clear_bu = gr.ClearButton(value="Clear")
|
| 218 |
+
pdf_show = PDF(label='PDF preview', interactive=False, visible=True, height=800)
|
| 219 |
with gr.Accordion("Examples:"):
|
| 220 |
example_root = os.path.join(os.path.dirname(__file__), "examples")
|
| 221 |
gr.Examples(
|
| 222 |
examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
|
| 223 |
_.endswith("pdf")],
|
| 224 |
+
inputs=file
|
| 225 |
)
|
| 226 |
|
| 227 |
with gr.Column(variant='panel', scale=5):
|
| 228 |
output_file = gr.File(label="convert result", interactive=False)
|
| 229 |
with gr.Tabs():
|
| 230 |
with gr.Tab("Markdown rendering"):
|
| 231 |
+
md = gr.Markdown(label="Markdown rendering", height=1100, show_copy_button=True,
|
| 232 |
latex_delimiters=latex_delimiters, line_breaks=True)
|
| 233 |
with gr.Tab("Markdown text"):
|
| 234 |
md_text = gr.TextArea(lines=45, show_copy_button=True)
|
| 235 |
+
file.change(fn=to_pdf, inputs=file, outputs=pdf_show)
|
| 236 |
+
change_bu.click(fn=to_markdown, inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
|
| 237 |
outputs=[md, md_text, output_file, pdf_show], api_name=False)
|
| 238 |
clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr, table_enable, language])
|
| 239 |
|