import gradio as gr from pathlib import Path import pandas as pd import importlib from docling.document_converter import DocumentConverter import llm_document_parser.config as config from llm_document_parser.instructor_llm import extract_json_data_using_ollama_llm, pull_ollama_model from llm_document_parser.convert_doc_docling import ( load_rapid_ocr_model, load_easy_ocr_model, load_ocr_mac_model, load_tesseract_model, image_to_text ) from llm_document_parser.export_data import export_as_csv, export_as_json, combine_json_data_into_df, convert_json_to_df print("RUNNING gradio_app.py FROM:", __file__) # Load OCR model based on config def load_ocr_model_from_config(model_type: str) -> DocumentConverter: """ Load the OCR model based on the configuration. Args: model_type (str): The type of OCR model to load. Returns: object: The loaded OCR model. """ if model_type == "rapid": # TODO: REFACTOR LOAD OCR MODEL TO JUST EITHER USE SERVER MODELS OR MOBILE MODELS return load_rapid_ocr_model( "PP-OCRv4/ch_PP-OCRv4_det_server_infer.onnx", "PP-OCRv3/ch_PP-OCRv3_rec_infer.onnx", "PP-OCRv3/ch_ppocr_mobile_v2.0_cls_train.onnx" ) if model_type == "easy": return load_easy_ocr_model() if model_type == "ocrmac": return load_ocr_mac_model() if model_type == "tesseract": return load_tesseract_model(config.TESSERACT_TESSDATA_LOCATION) raise ValueError(f"Unknown OCR model type in config: {model_type}") def save_results(export_type: str, output_file_name: str, df: pd.DataFrame, output_folder: str) -> str: """ Save the results in the specified format. Args: export_type (str): The type of export (e.g., "csv"). output_file_name (str): The name of the output file. json_data (str): The JSON data to save. output_folder (str): The folder to save the output file. Returns: output_data (str): The output data from the LLM formatted into the specified format """ if export_type == "csv": return export_as_csv(df=df, output_folder=output_folder, output_file_name=output_file_name) if export_type == "json": return export_as_json(df=df, output_folder=output_folder, output_file_name=output_file_name) return "" def process_file(input_path: Path, document_converter: DocumentConverter) -> str: conversion_result = image_to_text(document_converter, input_path) ocr_text_data = conversion_result.document.export_to_markdown() json_data = extract_json_data_using_ollama_llm( prompt=config.LLM_PROMPT, text_data=ocr_text_data, ollama_model=config.OLLAMA_MODEL, response_model=config.RESPONSE_MODEL ) return json_data # Full processing pipeline def run_full_pipeline(file_inputs): document_converter = load_ocr_model_from_config(config.OCR_MODEL) pull_ollama_model(config.OLLAMA_MODEL) df = pd.DataFrame() if type(file_inputs) == list: json_data_objects = list() for file in file_inputs: json_data = process_file(file, document_converter) json_data_objects.append(json_data) df = combine_json_data_into_df(json_data_objects) else: json_data = process_file(Path(file_inputs), document_converter) df = convert_json_to_df(json_data) return save_results(export_type=config.EXPORT_TYPE,output_file_name=config.OUTPUT_FILE_NAME, df=df, output_folder=config.OUTPUT_FOLDER) ''' base_dir = Path(os.path.dirname(__file__)) config_file_path = base_dir / "src" / "llm_document_parser" / "config.py" config_file_path = config_file_path.resolve() code_contents = config_file_path.read_text() def load_config(): return config_file_path.read_text() def save_config(updated_config): config_file_path.write_text(updated_config) importlib.reload(config) return "Config updated successfully!" ''' with gr.Blocks() as demo: gr.Markdown(f""" # LLM Document Parser Checkout the GitHub repo for this Blueprint: https://github.com/oronadavid/llm-document-parser This app extracts structured data from a document using OCR and a local LLM.\n Selected OCR model: `{config.OCR_MODEL}`\n Selected LLM model: `{config.OLLAMA_MODEL}`\n Export format: `{config.EXPORT_TYPE}`\n Response Model: `{config.RESPONSE_MODEL.__name__}` """) file_input = gr.File(file_types=["image", ".pdf"], file_count="multiple", label="Upload Document(s) (Image/PDF)") run_button = gr.Button("Parse Documents") output_text = gr.JSON(label="Extracted Data") run_button.click(fn=run_full_pipeline, inputs=file_input, outputs=output_text) ''' gr.Markdown("""# Config To update the config, make changes, then click "Update Config" below """) config_editor = gr.Code(code_contents, language="python", label="Config") save_config_button = gr.Button("Update Config") status = gr.Textbox(label="Status") demo.load(fn=load_config, outputs=config_editor) save_config_button.click(fn=save_config, inputs=config_editor, outputs=status) ''' if __name__ == "__main__": demo.launch(share=True)