Spaces:

tienanh2003
/

ocr

Sleeping

App Files Files Community

tienanh2003 commited on 20 days ago

Commit

7615a84

verified ·

1 Parent(s): f8416fc

Create app.py

Browse files

Files changed (1) hide show

app.py +566 -0

app.py ADDED Viewed

	@@ -0,0 +1,566 @@

+import os
+import json
+import re
+import hashlib
+import gc
+from io import BytesIO
+from collections import OrderedDict
+from PIL import Image, UnidentifiedImageError
+import torch
+from transformers import AutoProcessor, BitsAndBytesConfig
+from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
+from pdf2image import convert_from_bytes
+import gradio as gr
+import fitz
+# --- CONFIGURATION ---
+MODEL_ID = "prithivMLmods/Camel-Doc-OCR-062825"
+CACHE_MAX_SIZE = 128
+DPI = 300  # Giữ vừa đủ, không quá cao
+IMAGE_MAX_DIM = None  # Không resize nếu không cần
+JPEG_QUALITY = 80
+GPU_MEMORY_FRACTION = 0.8
+# --- 1. Device ---
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch.backends.cudnn.benchmark = True
+if device.type == 'cuda':
+    torch.cuda.set_per_process_memory_fraction(GPU_MEMORY_FRACTION, device=0)
+# --- 2. Load model ---
+from transformers import AutoProcessor, BitsAndBytesConfig
+from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
+bnb = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16
+)
+processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    MODEL_ID,
+    quantization_config=bnb,
+    device_map="auto",
+    trust_remote_code=True
+).eval()
+processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
+# --- 8. File handler ---
+import traceback
+from concurrent.futures import ThreadPoolExecutor
+def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=gr.Progress()):
+    try:
+        file_path = file.name if hasattr(file, "name") else file
+        filename = os.path.basename(file_path)
+        ext = filename.lower().split('.')[-1]
+        full_prompt = (prompt + "\n" + extra_prompt).strip() or ""
+        print(f"[INFO] handle_file → {filename} (.{ext})")
+        if ext == "pdf":
+            try:
+                with open(file_path, "rb") as f:
+                    pdf_bytes = f.read()
+                print(f"[INFO] Read PDF bytes: {len(pdf_bytes)} bytes")
+                doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+                pages = []
+                zoom = DPI
+                mat = fitz.Matrix(zoom, zoom)
+                for i, page in enumerate(doc):
+                    pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
+                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                    if max(img.size) > 3072:
+                        img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
+                    pages.append(img)
+                print(f"[INFO] Converted PDF → {len(pages)} pages")
+            except Exception as e:
+                traceback.print_exc()
+                return filename, f"[ERROR] PDF conversion failed: {e}"
+            outputs = []
+            with ThreadPoolExecutor(max_workers=4) as executor:
+                futures = [executor.submit(run_inference, img, full_prompt, max_new_tokens) for img in pages]
+                for idx, future in enumerate(futures):
+                    try:
+                        out = future.result()
+                    except Exception as e:
+                        traceback.print_exc()
+                        out = f"[ERROR] Inference page {idx+1} failed: {e}"
+                    outputs.append(out)
+                    progress((idx) / len(pages), desc=f"Page {idx+1}/{len(pages)}")
+            result = "\n\n--- Page Break ---\n\n".join(outputs)
+            print("[INFO] handle_file done")
+            return filename, result
+        else:
+            try:
+                img = Image.open(file_path)
+                print(f"[INFO] Opened image: {img.mode}, {img.size}")
+            except Exception as e:
+                traceback.print_exc()
+                return filename, f"[ERROR] Image open failed: {e}"
+            return filename, run_inference(img, full_prompt, max_new_tokens)
+    except Exception as e:
+        traceback.print_exc()
+        return "error", f"[ERROR] handle_file unexpected: {e}"
+def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
+    if img.mode != "RGB":
+        img = img.convert("RGB")
+    prompt_text = prompt.strip()
+    messages = [{
+        "role": "user",
+        "content": [
+            {"type": "image", "image": img},
+            {"type": "text", "text": prompt_text}
+        ]
+    }]
+    text_prompt = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    inputs = processor(
+        text=[text_prompt], images=[img], return_tensors="pt", padding=True
+    ).to(device)
+    with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(device.type == 'cuda')):
+        gen = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=False,
+            eos_token_id=processor.tokenizer.eos_token_id
+        )
+    trimmed = [o[len(i):] for i, o in zip(inputs['input_ids'], gen)]
+    result = processor.tokenizer.batch_decode(
+        trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True
+    )[0].strip()
+    return result
+# --- 9. Prompt templates & JSON export ---
+prompt_templates = {
+    "Electrolux": """Extract all structured information from the delivery order document image.
+You must return the result as a valid XML block that strictly follows the structure below.
+STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
+1. Return **ONLY** the XML block – nothing before or after it.
+2. DO NOT add, remove, rename, or reorder any XML tags.
+3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
+4. For every tag, fill in the exact value read from the image.
+   • NEVER copy or repeat the label/placeholder text.
+   • NEVER guess or invent values.
+5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
+6. DO NOT include Vietnamese text or translations inside tag values.
+7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
+8. Dates must be in YYYY-MM-DD format.
+9. Boolean tags must be exactly true or false (lower-case, no quotes).
+   ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
+10. **Inside each value**
+    • Replace every internal line-break with “, ” (comma + space).
+    • Trim leading/trailing whitespace.
+    • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
+11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
+12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
+13. Ignore any information not represented by the tags below.
+<s_electrolux_form>
+  <document_number>Số lệnh giao nhận hàng</document_number>
+  <order_number>Số đơn hàng</order_number>
+  <customer_code>Mã số khách hàng</customer_code>
+  <customer_order_code>Mã đơn khách hàng</customer_order_code>
+  <customer_order_date>Ngày đặt hàng của khách</customer_order_date>
+  <delivery_date>Ngày giao hàng</delivery_date>
+  <requested_delivery_date>Ngày giao hàng yêu cầu</requested_delivery_date>
+  <invoice_number>Số hóa đơn</invoice_number>
+  <shipper_company_name>Tên công ty gửi hàng</shipper_company_name>
+  <shipper_address>Địa chỉ gửi hàng</shipper_address>
+  <shipper_phone>Số điện thoại</shipper_phone>
+  <shipper_fax>Số fax</shipper_fax>
+  <shipper_tax_code>Mã số thuế</shipper_tax_code>
+  <consignee_customer_code>Mã khách hàng</consignee_customer_code>
+  <consignee_company_name>Tên công ty nhận hàng</consignee_company_name>
+  <shipping_address>Địa chỉ nhận hàng chi tiết</shipping_address>
+  <city_province>Tỉnh/Thành phố</city_province>
+  <postal_code>Mã bưu chính</postal_code>
+  <preparer_name>Họ tên người lập phiếu</preparer_name>
+  <preparer_date>Ngày lập phiếu</preparer_date>
+  <s_is_signed>Đã ký hay chưa (true hoặc false)</s_is_signed>
+</s_electrolux_form>
+""",
+    "Jotun": """Extract all structured information from the delivery order document.
+You must return the result as a valid XML block that strictly follows the structure below.
+STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
+1. Return **ONLY** the XML block – nothing before or after it.
+2. DO NOT add, remove, rename, or reorder any XML tags.
+3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
+4. For every tag, fill in the exact value read from the image.
+   • NEVER copy or repeat the label/placeholder text.
+   • NEVER guess or invent values.
+5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
+6. DO NOT include Vietnamese text or translations inside tag values.
+7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
+8. Dates must be in YYYY-MM-DD format.
+9. Boolean tags must be exactly true or false (lower-case, no quotes).
+   ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
+10. **Inside each value**
+    • Replace every internal line-break with “, ” (comma + space).
+    • Trim leading/trailing whitespace.
+    • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
+11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
+12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
+13. Ignore any information not represented by the tags below.
+<s_jotun_form>
+  <document_number>Số lệnh giao hàng</document_number>
+  <delivery_order_code>Số lệnh giao hàng số</delivery_order_code>
+  <customer_code>Mã khách hàng</customer_code>
+  <customer_name>Tên khách hàng</customer_name>
+  <customer_address>Địa chỉ khách hàng</customer_address>
+  <customer_phone>Điện thoại khách hàng</customer_phone>
+  <invoice_receiver_name>Tên người nhận hóa đơn</invoice_receiver_name>
+  <invoice_receiver_address>Địa chỉ người nhận hóa đơn</invoice_receiver_address>
+  <order_code>Số đơn đặt hàng</order_code>
+  <order_date>Ngày đặt hàng</order_date>
+  <order_number>Số đơn hàng</order_number>
+  <delivery_date>Ngày giao hàng</delivery_date>
+  <s_is_signed>Đã ký hay chưa (true hoặc false)</s_is_signed>
+</s_jotun_form>
+""",
+    "MAWB": """Extract all structured information from the Master Air Waybill (MAWB) document.
+You must return the result as a valid XML block that strictly follows the structure below.
+STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
+1. Return **ONLY** the XML block – nothing before or after it.
+2. DO NOT add, remove, rename, or reorder any XML tags.
+3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
+4. For every tag, fill in the exact value read from the image.
+   • NEVER copy or repeat the label/placeholder text.
+   • NEVER guess or invent values.
+5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
+6. DO NOT include Vietnamese text or translations inside tag values.
+7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
+8. Dates must be in YYYY-MM-DD format.
+9. Boolean tags must be exactly true or false (lower-case, no quotes).
+   ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
+10. **Inside each value**
+    • Replace every internal line-break with “, ” (comma + space).
+    • Trim leading/trailing whitespace.
+    • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
+11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
+12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
+13. Ignore any information not represented by the tags below.
+<s_mawb_form>
+  <air_waybill_number>Số MAWB</air_waybill_number>
+  <shipper_name>Tên người gửi hàng</shipper_name>
+  <shipper_address>Địa chỉ người gửi hàng</shipper_address>
+  <shipper_account_number>Mã tài khoản người gửi</shipper_account_number>
+  <consignee_name>Tên người nhận hàng</consignee_name>
+  <consignee_address>Địa chỉ người nhận hàng</consignee_address>
+  <consignee_account_number>Mã tài khoản người nhận</consignee_account_number>
+  <dangerous_goods_note>Ghi chú hàng nguy hiểm (true or false)</dangerous_goods_note>
+  <shipper_signature>Chữ ký người gửi</shipper_signature>
+</s_mawb_form>
+""",
+    "Phiếu Cân": """Extract all structured information from the document 'PHIẾU CÂN / SHIPPER’S LETTER OF INSTRUCTIONS'.
+You must return the result as a valid XML block that strictly follows the structure below.
+STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
+1. Return **ONLY** the XML block – nothing before or after it.
+2. DO NOT add, remove, rename, or reorder any XML tags.
+3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
+4. For every tag, fill in the exact value read from the image.
+   • NEVER copy or repeat the label/placeholder text.
+   • NEVER guess or invent values.
+5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
+6. DO NOT include Vietnamese text or translations inside tag values.
+7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
+8. Dates must be in YYYY-MM-DD format.
+9. Boolean tags must be exactly true or false (lower-case, no quotes).
+   ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
+10. **Inside each value**
+    • Replace every internal line-break with “, ” (comma + space).
+    • Trim leading/trailing whitespace.
+    • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
+11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
+12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
+13. Ignore any information not represented by the tags below.
+<s_weight_ticket>
+  <awb_number>Số AWB</awb_number>
+  <shipper_name>Tên người gửi hàng</shipper_name>
+  <shipper_address>Địa chỉ người gửi hàng</shipper_address>
+  <shipper_contact>Số điện thoại người gửi</shipper_contact>
+  <consignee_name>Tên người nhận hàng</consignee_name>
+  <consignee_address>Địa chỉ người nhận hàng</consignee_address>
+  <cargo_description>Tên hàng hóa</cargo_description>
+  <security_check_complete>Đã kiểm tra an ninh (true/false)</security_check_complete>
+  <acceptance_staff_name>Tên nhân viên tiếp nhận</acceptance_staff_name>
+  <acceptance_staff_signature>Chữ ký nhân viên tiếp nhận</acceptance_staff_signature>
+</s_weight_ticket>
+""",
+    "PC 3U": """Extract all structured information from the PC 3U air cargo instruction document.
+You must return the result as a valid XML block that strictly follows the structure below.
+STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
+1. Return **ONLY** the XML block – nothing before or after it.
+2. DO NOT add, remove, rename, or reorder any XML tags.
+3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
+4. For every tag, fill in the exact value read from the image.
+   • NEVER copy or repeat the label/placeholder text.
+   • NEVER guess or invent values.
+5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
+6. DO NOT include Vietnamese text or translations inside tag values.
+7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
+8. Dates must be in YYYY-MM-DD format.
+9. Boolean tags must be exactly true or false (lower-case, no quotes).
+   ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
+10. **Inside each value**
+    • Replace every internal line-break with “, ” (comma + space).
+    • Trim leading/trailing whitespace.
+    • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
+11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
+12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
+13. Ignore any information not represented by the tags below.
+<s_pc3u_form>
+  <awb_number>Số AWB</awb_number>
+  <cargo_service_code>Mã dịch vụ</cargo_service_code>
+  <shipper_name>Tên người gửi</shipper_name>
+  <shipper_address>Địa chỉ người gửi</shipper_address>
+  <shipper_contact>Thông tin liên hệ người gửi</shipper_contact>
+  <payer_name>Người thanh toán</payer_name>
+  <payer_tax_code>Mã số thuế người thanh toán</payer_tax_code>
+  <consignee_name>Tên người nhận</consignee_name>
+  <consignee_address>Địa chỉ người nhận</consignee_address>
+  <consignee_contact>Thông tin liên hệ người nhận</consignee_contact>
+  <shipper_signature>Chữ ký người gửi</shipper_signature>
+  <acceptance_staff_signature>Chữ ký nhân viên tiếp nhận</acceptance_staff_signature>
+</s_pc3u_form>
+""",
+    "SLIS-AVS DAD": """Extract all structured information from the document 'TỜ KHAI GỬI HÀNG - SHIPPER’S LETTER OF INSTRUCTION'.
+You must return the result as a valid XML block that strictly follows the structure below.
+STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
+1. Return **ONLY** the XML block – nothing before or after it.
+2. DO NOT add, remove, rename, or reorder any XML tags.
+3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
+4. For every tag, fill in the exact value read from the image.
+   • NEVER copy or repeat the label/placeholder text.
+   • NEVER guess or invent values.
+5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
+6. DO NOT include Vietnamese text or translations inside tag values.
+7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
+8. Dates must be in YYYY-MM-DD format.
+9. Boolean tags must be exactly true or false (lower-case, no quotes).
+   ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
+10. **Inside each value**
+    • Replace every internal line-break with “, ” (comma + space).
+    • Trim leading/trailing whitespace.
+    • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
+11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
+12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
+13. Ignore any information not represented by the tags below.
+<s_avs_dad>
+  <air_waybill_number>Số AWB</air_waybill_number>
+  <form_code>Mã biểu mẫu</form_code>
+  <shipper_name>Tên người gửi</shipper_name>
+  <shipper_address>Địa chỉ người gửi</shipper_address>
+  <shipper_phone>Điện thoại người gửi</shipper_phone>
+  <shipper_email>Email người gửi</shipper_email>
+  <shipper_tax_code>Mã số thuế người gửi</shipper_tax_code>
+  <consignee_name>Tên người nhận</consignee_name>
+  <consignee_address>Địa chỉ người nhận</consignee_address>
+  <consignee_phone>Điện thoại người nhận</consignee_phone>
+  <consignee_email>Email người nhận</consignee_email>
+  <departure_airport>Nơi đi</departure_airport>
+  <destination_airport>Nơi đến</destination_airport>
+  <acceptance_staff_name>Tên nhân viên tiếp nhận</acceptance_staff_name>
+  <acceptance_signature>Chữ ký nhân viên tiếp nhận</acceptance_signature>
+  <acceptance_time>Thời điểm tiếp nhận</acceptance_time>
+  <shipper_signature>Chữ ký người gửi</shipper_signature>
+  <shipper_signature_date>Ngày ký người gửi</shipper_signature_date>
+</s_avs_dad>
+"""
+}
+def insert_template(name):
+    return prompt_templates.get(name, "")
+def sanitize_filename(name):
+    return re.sub(r'[^a-zA-Z0-9_\-\.]', '_', name)
+def clean_text(text):
+    text = re.sub(r'<[^<> ]+?>', lambda m: m.group(0).strip(), text)
+    text = re.sub(r'<[^<>]+?>[^<>]*?<[^<>]+?>', lambda m: m.group(0).strip(), text)
+    return text.strip()
+def export_json(image_name, result_text):
+    try:
+        clean_name = sanitize_filename(image_name)
+        content = {"image": image_name, "text_sequence": clean_text(result_text)}
+        path = f"/tmp/{clean_name}.json"
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump(content, f, ensure_ascii=False, indent=2)
+        return path, json.dumps(content, ensure_ascii=False, indent=2)
+    except Exception as e:
+        return "", f"[Export JSON Failed]: {e}"
+# --- 10. Gradio UI ---
+css = """
+.gradio-textbox textarea {
+    font-size: 13px !important;
+    line-height: 1.3 !important;
+    padding: 6px 8px !important;
+}
+.gradio-textbox label {
+    font-size: 13px !important;
+    font-weight: 600 !important;
+    margin-bottom: 4px !important;
+}
+.gradio-button {
+    font-size: 12px !important;
+    padding: 4px 8px !important;
+    height: 28px !important;
+    min-height: 28px !important;
+    margin: 2px !important;
+}
+.gradio-button[data-variant="primary"] {
+    height: 36px !important;
+    font-size: 13px !important;
+    padding: 8px 16px !important;
+}
+.gradio-file {
+    font-size: 13px !important;
+}
+.gradio-file .file-upload {
+    padding: 8px !important;
+    min-height: 80px !important;
+}
+.gradio-markdown h3 {
+    font-size: 14px !important;
+    margin: 8px 0 4px 0 !important;
+}
+.gradio-markdown h2 {
+    font-size: 18px !important;
+    margin: 8px 0 !important;
+}
+.gradio-code {
+    font-size: 12px !important;
+}
+"""
+with gr.Blocks(title="Camel-Doc-OCR", css=css) as demo:
+    gr.Markdown("## 🧾 Camel-Doc-OCR (Qwen2.5-VL, 4-bit)")
+    # --- Main Layout: 2 Columns ---
+    with gr.Row():
+        # === LEFT COLUMN: Input ===
+        with gr.Column(scale=1):
+            gr.Markdown("### 📥 INPUT")
+            # File Input
+            file_input = gr.File(
+                label="📤 Tải ảnh hoặc PDF",
+                file_types=[".jpg", ".jpeg", ".png", ".pdf"],
+                height=100
+            )
+            # Prompt Input
+            prompt_input = gr.Textbox(
+                label="Prompt thuần",
+                lines=2,
+                placeholder="Nhập prompt tùy chỉnh...",
+                max_lines=3
+            )
+            # JSON Config
+            config_input = gr.Textbox(
+                label="JSON Prompt",
+                lines=6,
+                placeholder="Cấu hình JSON sẽ xuất hiện ở đây...",
+                max_lines=8
+            )
+            # Max New Tokens Radio
+            max_new_tokens_input = gr.Radio(
+                choices=[128, 256, 512, 1024, 1536, 2048],
+                value=512,
+                label="🔢 Chọn max_new_tokens (giới hạn độ dài đầu ra)",
+                info="Chọn độ dài tối đa cho đầu ra của mô hình"
+            )
+            # Prompt Templates
+            gr.Markdown("### 📑 Mẫu:")
+            with gr.Row():
+                for key in list(prompt_templates.keys()):  # All buttons in one row
+                    gr.Button(f"{key}", size="sm", scale=1).click(
+                        fn=lambda *, k=key: insert_template(k),
+                        inputs=[],
+                        outputs=config_input
+                    )
+            # Run Button
+            run_btn = gr.Button("🚀 Chạy OCR", variant="primary")
+        # === RIGHT COLUMN: Output ===
+        with gr.Column(scale=1):
+            gr.Markdown("### 📤 OUTPUT")
+            # Result Output
+            result_output = gr.Textbox(
+                label="Kết quả trích xuất",
+                lines=10,
+                placeholder="Kết quả sẽ hiển thị ở đây sau khi chạy OCR...",
+                max_lines=12
+            )
+            # Export Section
+            with gr.Row():
+                export_btn = gr.Button("📦 Xuất JSON", visible=False, variant="secondary", size="sm")
+            # JSON Output
+            json_text = gr.Code(
+                label="JSON Output",
+                language="json",
+                lines=6,
+                visible=False
+            )
+            # Download File
+            json_file = gr.File(
+                label="File JSON để tải",
+                visible=False,
+                file_types=[".json"]
+            )
+    # --- Hidden Fields ---
+    hidden_name = gr.Textbox(visible=False)
+    # --- Event Handlers ---
+    # Run Inference
+    run_btn.click(
+        fn=handle_file,
+        inputs=[file_input, prompt_input, config_input, max_new_tokens_input],
+        outputs=[hidden_name, result_output]
+    )
+    # Export JSON
+    export_btn.click(
+        fn=export_json,
+        inputs=[hidden_name, result_output],
+        outputs=[json_file, json_text]
+    )
+    export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_file])
+    export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_text])
+if __name__ == "__main__":
+    demo.launch()