Spaces:
Running
Running
| import os | |
| import json | |
| import re | |
| import hashlib | |
| import gc | |
| from io import BytesIO | |
| from collections import OrderedDict | |
| from PIL import Image, UnidentifiedImageError | |
| import torch | |
| from transformers import AutoProcessor, BitsAndBytesConfig | |
| from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration | |
| from pdf2image import convert_from_bytes | |
| import gradio as gr | |
| import fitz | |
| # --- CONFIGURATION --- | |
| MODEL_ID = "prithivMLmods/Camel-Doc-OCR-062825" | |
| CACHE_MAX_SIZE = 128 | |
| DPI = 300 # Giữ vừa đủ, không quá cao | |
| IMAGE_MAX_DIM = None # Không resize nếu không cần | |
| JPEG_QUALITY = 80 | |
| GPU_MEMORY_FRACTION = 0.8 | |
| # --- 1. Device --- | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| torch.backends.cudnn.benchmark = True | |
| if device.type == 'cuda': | |
| torch.cuda.set_per_process_memory_fraction(GPU_MEMORY_FRACTION, device=0) | |
| # --- 2. Load model --- | |
| # from transformers import AutoProcessor, BitsAndBytesConfig | |
| # from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration | |
| # bnb = BitsAndBytesConfig( | |
| # load_in_4bit=True, | |
| # bnb_4bit_use_double_quant=True, | |
| # bnb_4bit_quant_type="nf4", | |
| # bnb_4bit_compute_dtype=torch.float16 | |
| # ) | |
| # processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| # model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
| # MODEL_ID, | |
| # quantization_config=bnb, | |
| # device_map="auto", | |
| # trust_remote_code=True | |
| # ).eval() | |
| # processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id | |
| # --- 8. File handler --- | |
| import traceback | |
| from concurrent.futures import ThreadPoolExecutor | |
| def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=gr.Progress()): | |
| try: | |
| file_path = file.name if hasattr(file, "name") else file | |
| filename = os.path.basename(file_path) | |
| ext = filename.lower().split('.')[-1] | |
| full_prompt = (prompt + "\n" + extra_prompt).strip() or "" | |
| print(f"[INFO] handle_file → {filename} (.{ext})") | |
| if ext == "pdf": | |
| try: | |
| with open(file_path, "rb") as f: | |
| pdf_bytes = f.read() | |
| print(f"[INFO] Read PDF bytes: {len(pdf_bytes)} bytes") | |
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| pages = [] | |
| zoom = DPI | |
| mat = fitz.Matrix(zoom, zoom) | |
| for i, page in enumerate(doc): | |
| pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB) | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| if max(img.size) > 3072: | |
| img.thumbnail((3072, 3072), Image.Resampling.LANCZOS) | |
| pages.append(img) | |
| print(f"[INFO] Converted PDF → {len(pages)} pages") | |
| except Exception as e: | |
| traceback.print_exc() | |
| return filename, f"[ERROR] PDF conversion failed: {e}" | |
| outputs = [] | |
| with ThreadPoolExecutor(max_workers=4) as executor: | |
| futures = [executor.submit(run_inference, img, full_prompt, max_new_tokens) for img in pages] | |
| for idx, future in enumerate(futures): | |
| try: | |
| out = future.result() | |
| except Exception as e: | |
| traceback.print_exc() | |
| out = f"[ERROR] Inference page {idx+1} failed: {e}" | |
| outputs.append(out) | |
| progress((idx) / len(pages), desc=f"Page {idx+1}/{len(pages)}") | |
| result = "\n\n--- Page Break ---\n\n".join(outputs) | |
| print("[INFO] handle_file done") | |
| return filename, result | |
| else: | |
| try: | |
| img = Image.open(file_path) | |
| print(f"[INFO] Opened image: {img.mode}, {img.size}") | |
| except Exception as e: | |
| traceback.print_exc() | |
| return filename, f"[ERROR] Image open failed: {e}" | |
| return filename, run_inference(img, full_prompt, max_new_tokens) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return "error", f"[ERROR] handle_file unexpected: {e}" | |
| # def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str: | |
| # if img.mode != "RGB": | |
| # img = img.convert("RGB") | |
| # prompt_text = prompt.strip() | |
| # messages = [{ | |
| # "role": "user", | |
| # "content": [ | |
| # {"type": "image", "image": img}, | |
| # {"type": "text", "text": prompt_text} | |
| # ] | |
| # }] | |
| # text_prompt = processor.apply_chat_template( | |
| # messages, tokenize=False, add_generation_prompt=True | |
| # ) | |
| # inputs = processor( | |
| # text=[text_prompt], images=[img], return_tensors="pt", padding=True | |
| # ).to(device) | |
| # with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(device.type == 'cuda')): | |
| # gen = model.generate( | |
| # **inputs, | |
| # max_new_tokens=max_new_tokens, | |
| # do_sample=False, | |
| # eos_token_id=processor.tokenizer.eos_token_id | |
| # ) | |
| # trimmed = [o[len(i):] for i, o in zip(inputs['input_ids'], gen)] | |
| # result = processor.tokenizer.batch_decode( | |
| # trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True | |
| # )[0].strip() | |
| # return result | |
| def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str: | |
| if img.mode != "RGB": | |
| img = img.convert("RGB") | |
| return f"[DEBUG] Ảnh nhận: size={img.size}, prompt='{prompt[:30]}...'" | |
| # --- 9. Prompt templates & JSON export --- | |
| prompt_templates = { | |
| "Electrolux": """Extract all structured information from the delivery order document image. | |
| You must return the result as a valid XML block that strictly follows the structure below. | |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: | |
| 1. Return **ONLY** the XML block – nothing before or after it. | |
| 2. DO NOT add, remove, rename, or reorder any XML tags. | |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. | |
| 4. For every tag, fill in the exact value read from the image. | |
| • NEVER copy or repeat the label/placeholder text. | |
| • NEVER guess or invent values. | |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). | |
| 6. DO NOT include Vietnamese text or translations inside tag values. | |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. | |
| 8. Dates must be in YYYY-MM-DD format. | |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). | |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false | |
| 10. **Inside each value** | |
| • Replace every internal line-break with “, ” (comma + space). | |
| • Trim leading/trailing whitespace. | |
| • Escape XML special characters: & → &, < → <, > → >. | |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. | |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. | |
| 13. Ignore any information not represented by the tags below. | |
| <s_electrolux_form> | |
| <document_number>Số lệnh giao nhận hàng</document_number> | |
| <order_number>Số đơn hàng</order_number> | |
| <customer_code>Mã số khách hàng</customer_code> | |
| <customer_order_code>Mã đơn khách hàng</customer_order_code> | |
| <customer_order_date>Ngày đặt hàng của khách</customer_order_date> | |
| <delivery_date>Ngày giao hàng</delivery_date> | |
| <requested_delivery_date>Ngày giao hàng yêu cầu</requested_delivery_date> | |
| <invoice_number>Số hóa đơn</invoice_number> | |
| <shipper_company_name>Tên công ty gửi hàng</shipper_company_name> | |
| <shipper_address>Địa chỉ gửi hàng</shipper_address> | |
| <shipper_phone>Số điện thoại</shipper_phone> | |
| <shipper_fax>Số fax</shipper_fax> | |
| <shipper_tax_code>Mã số thuế</shipper_tax_code> | |
| <consignee_customer_code>Mã khách hàng</consignee_customer_code> | |
| <consignee_company_name>Tên công ty nhận hàng</consignee_company_name> | |
| <shipping_address>Địa chỉ nhận hàng chi tiết</shipping_address> | |
| <city_province>Tỉnh/Thành phố</city_province> | |
| <postal_code>Mã bưu chính</postal_code> | |
| <preparer_name>Họ tên người lập phiếu</preparer_name> | |
| <preparer_date>Ngày lập phiếu</preparer_date> | |
| <s_is_signed>Đã ký hay chưa (true hoặc false)</s_is_signed> | |
| </s_electrolux_form> | |
| """, | |
| "Jotun": """Extract all structured information from the delivery order document. | |
| You must return the result as a valid XML block that strictly follows the structure below. | |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: | |
| 1. Return **ONLY** the XML block – nothing before or after it. | |
| 2. DO NOT add, remove, rename, or reorder any XML tags. | |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. | |
| 4. For every tag, fill in the exact value read from the image. | |
| • NEVER copy or repeat the label/placeholder text. | |
| • NEVER guess or invent values. | |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). | |
| 6. DO NOT include Vietnamese text or translations inside tag values. | |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. | |
| 8. Dates must be in YYYY-MM-DD format. | |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). | |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false | |
| 10. **Inside each value** | |
| • Replace every internal line-break with “, ” (comma + space). | |
| • Trim leading/trailing whitespace. | |
| • Escape XML special characters: & → &, < → <, > → >. | |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. | |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. | |
| 13. Ignore any information not represented by the tags below. | |
| <s_jotun_form> | |
| <document_number>Số lệnh giao hàng</document_number> | |
| <delivery_order_code>Số lệnh giao hàng số</delivery_order_code> | |
| <customer_code>Mã khách hàng</customer_code> | |
| <customer_name>Tên khách hàng</customer_name> | |
| <customer_address>Địa chỉ khách hàng</customer_address> | |
| <customer_phone>Điện thoại khách hàng</customer_phone> | |
| <invoice_receiver_name>Tên người nhận hóa đơn</invoice_receiver_name> | |
| <invoice_receiver_address>Địa chỉ người nhận hóa đơn</invoice_receiver_address> | |
| <order_code>Số đơn đặt hàng</order_code> | |
| <order_date>Ngày đặt hàng</order_date> | |
| <order_number>Số đơn hàng</order_number> | |
| <delivery_date>Ngày giao hàng</delivery_date> | |
| <s_is_signed>Đã ký hay chưa (true hoặc false)</s_is_signed> | |
| </s_jotun_form> | |
| """, | |
| "MAWB": """Extract all structured information from the Master Air Waybill (MAWB) document. | |
| You must return the result as a valid XML block that strictly follows the structure below. | |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: | |
| 1. Return **ONLY** the XML block – nothing before or after it. | |
| 2. DO NOT add, remove, rename, or reorder any XML tags. | |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. | |
| 4. For every tag, fill in the exact value read from the image. | |
| • NEVER copy or repeat the label/placeholder text. | |
| • NEVER guess or invent values. | |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). | |
| 6. DO NOT include Vietnamese text or translations inside tag values. | |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. | |
| 8. Dates must be in YYYY-MM-DD format. | |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). | |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false | |
| 10. **Inside each value** | |
| • Replace every internal line-break with “, ” (comma + space). | |
| • Trim leading/trailing whitespace. | |
| • Escape XML special characters: & → &, < → <, > → >. | |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. | |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. | |
| 13. Ignore any information not represented by the tags below. | |
| <s_mawb_form> | |
| <air_waybill_number>Số MAWB</air_waybill_number> | |
| <shipper_name>Tên người gửi hàng</shipper_name> | |
| <shipper_address>Địa chỉ người gửi hàng</shipper_address> | |
| <shipper_account_number>Mã tài khoản người gửi</shipper_account_number> | |
| <consignee_name>Tên người nhận hàng</consignee_name> | |
| <consignee_address>Địa chỉ người nhận hàng</consignee_address> | |
| <consignee_account_number>Mã tài khoản người nhận</consignee_account_number> | |
| <dangerous_goods_note>Ghi chú hàng nguy hiểm (true or false)</dangerous_goods_note> | |
| <shipper_signature>Chữ ký người gửi</shipper_signature> | |
| </s_mawb_form> | |
| """, | |
| "Phiếu Cân": """Extract all structured information from the document 'PHIẾU CÂN / SHIPPER’S LETTER OF INSTRUCTIONS'. | |
| You must return the result as a valid XML block that strictly follows the structure below. | |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: | |
| 1. Return **ONLY** the XML block – nothing before or after it. | |
| 2. DO NOT add, remove, rename, or reorder any XML tags. | |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. | |
| 4. For every tag, fill in the exact value read from the image. | |
| • NEVER copy or repeat the label/placeholder text. | |
| • NEVER guess or invent values. | |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). | |
| 6. DO NOT include Vietnamese text or translations inside tag values. | |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. | |
| 8. Dates must be in YYYY-MM-DD format. | |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). | |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false | |
| 10. **Inside each value** | |
| • Replace every internal line-break with “, ” (comma + space). | |
| • Trim leading/trailing whitespace. | |
| • Escape XML special characters: & → &, < → <, > → >. | |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. | |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. | |
| 13. Ignore any information not represented by the tags below. | |
| <s_weight_ticket> | |
| <awb_number>Số AWB</awb_number> | |
| <shipper_name>Tên người gửi hàng</shipper_name> | |
| <shipper_address>Địa chỉ người gửi hàng</shipper_address> | |
| <shipper_contact>Số điện thoại người gửi</shipper_contact> | |
| <consignee_name>Tên người nhận hàng</consignee_name> | |
| <consignee_address>Địa chỉ người nhận hàng</consignee_address> | |
| <cargo_description>Tên hàng hóa</cargo_description> | |
| <security_check_complete>Đã kiểm tra an ninh (true/false)</security_check_complete> | |
| <acceptance_staff_name>Tên nhân viên tiếp nhận</acceptance_staff_name> | |
| <acceptance_staff_signature>Chữ ký nhân viên tiếp nhận</acceptance_staff_signature> | |
| </s_weight_ticket> | |
| """, | |
| "PC 3U": """Extract all structured information from the PC 3U air cargo instruction document. | |
| You must return the result as a valid XML block that strictly follows the structure below. | |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: | |
| 1. Return **ONLY** the XML block – nothing before or after it. | |
| 2. DO NOT add, remove, rename, or reorder any XML tags. | |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. | |
| 4. For every tag, fill in the exact value read from the image. | |
| • NEVER copy or repeat the label/placeholder text. | |
| • NEVER guess or invent values. | |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). | |
| 6. DO NOT include Vietnamese text or translations inside tag values. | |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. | |
| 8. Dates must be in YYYY-MM-DD format. | |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). | |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false | |
| 10. **Inside each value** | |
| • Replace every internal line-break with “, ” (comma + space). | |
| • Trim leading/trailing whitespace. | |
| • Escape XML special characters: & → &, < → <, > → >. | |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. | |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. | |
| 13. Ignore any information not represented by the tags below. | |
| <s_pc3u_form> | |
| <awb_number>Số AWB</awb_number> | |
| <cargo_service_code>Mã dịch vụ</cargo_service_code> | |
| <shipper_name>Tên người gửi</shipper_name> | |
| <shipper_address>Địa chỉ người gửi</shipper_address> | |
| <shipper_contact>Thông tin liên hệ người gửi</shipper_contact> | |
| <payer_name>Người thanh toán</payer_name> | |
| <payer_tax_code>Mã số thuế người thanh toán</payer_tax_code> | |
| <consignee_name>Tên người nhận</consignee_name> | |
| <consignee_address>Địa chỉ người nhận</consignee_address> | |
| <consignee_contact>Thông tin liên hệ người nhận</consignee_contact> | |
| <shipper_signature>Chữ ký người gửi</shipper_signature> | |
| <acceptance_staff_signature>Chữ ký nhân viên tiếp nhận</acceptance_staff_signature> | |
| </s_pc3u_form> | |
| """, | |
| "SLIS-AVS DAD": """Extract all structured information from the document 'TỜ KHAI GỬI HÀNG - SHIPPER’S LETTER OF INSTRUCTION'. | |
| You must return the result as a valid XML block that strictly follows the structure below. | |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: | |
| 1. Return **ONLY** the XML block – nothing before or after it. | |
| 2. DO NOT add, remove, rename, or reorder any XML tags. | |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. | |
| 4. For every tag, fill in the exact value read from the image. | |
| • NEVER copy or repeat the label/placeholder text. | |
| • NEVER guess or invent values. | |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). | |
| 6. DO NOT include Vietnamese text or translations inside tag values. | |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. | |
| 8. Dates must be in YYYY-MM-DD format. | |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). | |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false | |
| 10. **Inside each value** | |
| • Replace every internal line-break with “, ” (comma + space). | |
| • Trim leading/trailing whitespace. | |
| • Escape XML special characters: & → &, < → <, > → >. | |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. | |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. | |
| 13. Ignore any information not represented by the tags below. | |
| <s_avs_dad> | |
| <air_waybill_number>Số AWB</air_waybill_number> | |
| <form_code>Mã biểu mẫu</form_code> | |
| <shipper_name>Tên người gửi</shipper_name> | |
| <shipper_address>Địa chỉ người gửi</shipper_address> | |
| <shipper_phone>Điện thoại người gửi</shipper_phone> | |
| <shipper_email>Email người gửi</shipper_email> | |
| <shipper_tax_code>Mã số thuế người gửi</shipper_tax_code> | |
| <consignee_name>Tên người nhận</consignee_name> | |
| <consignee_address>Địa chỉ người nhận</consignee_address> | |
| <consignee_phone>Điện thoại người nhận</consignee_phone> | |
| <consignee_email>Email người nhận</consignee_email> | |
| <departure_airport>Nơi đi</departure_airport> | |
| <destination_airport>Nơi đến</destination_airport> | |
| <acceptance_staff_name>Tên nhân viên tiếp nhận</acceptance_staff_name> | |
| <acceptance_signature>Chữ ký nhân viên tiếp nhận</acceptance_signature> | |
| <acceptance_time>Thời điểm tiếp nhận</acceptance_time> | |
| <shipper_signature>Chữ ký người gửi</shipper_signature> | |
| <shipper_signature_date>Ngày ký người gửi</shipper_signature_date> | |
| </s_avs_dad> | |
| """ | |
| } | |
| def insert_template(name): | |
| return prompt_templates.get(name, "") | |
| def sanitize_filename(name): | |
| return re.sub(r'[^a-zA-Z0-9_\-\.]', '_', name) | |
| def clean_text(text): | |
| text = re.sub(r'<[^<> ]+?>', lambda m: m.group(0).strip(), text) | |
| text = re.sub(r'<[^<>]+?>[^<>]*?<[^<>]+?>', lambda m: m.group(0).strip(), text) | |
| return text.strip() | |
| def export_json(image_name, result_text): | |
| try: | |
| clean_name = sanitize_filename(image_name) | |
| content = {"image": image_name, "text_sequence": clean_text(result_text)} | |
| path = f"/tmp/{clean_name}.json" | |
| with open(path, "w", encoding="utf-8") as f: | |
| json.dump(content, f, ensure_ascii=False, indent=2) | |
| return path, json.dumps(content, ensure_ascii=False, indent=2) | |
| except Exception as e: | |
| return "", f"[Export JSON Failed]: {e}" | |
| # --- 10. Gradio UI --- | |
| css = """ | |
| .gradio-textbox textarea { | |
| font-size: 13px !important; | |
| line-height: 1.3 !important; | |
| padding: 6px 8px !important; | |
| } | |
| .gradio-textbox label { | |
| font-size: 13px !important; | |
| font-weight: 600 !important; | |
| margin-bottom: 4px !important; | |
| } | |
| .gradio-button { | |
| font-size: 12px !important; | |
| padding: 4px 8px !important; | |
| height: 28px !important; | |
| min-height: 28px !important; | |
| margin: 2px !important; | |
| } | |
| .gradio-button[data-variant="primary"] { | |
| height: 36px !important; | |
| font-size: 13px !important; | |
| padding: 8px 16px !important; | |
| } | |
| .gradio-file { | |
| font-size: 13px !important; | |
| } | |
| .gradio-file .file-upload { | |
| padding: 8px !important; | |
| min-height: 80px !important; | |
| } | |
| .gradio-markdown h3 { | |
| font-size: 14px !important; | |
| margin: 8px 0 4px 0 !important; | |
| } | |
| .gradio-markdown h2 { | |
| font-size: 18px !important; | |
| margin: 8px 0 !important; | |
| } | |
| .gradio-code { | |
| font-size: 12px !important; | |
| } | |
| """ | |
| with gr.Blocks(title="Camel-Doc-OCR", css=css) as demo: | |
| gr.Markdown("## 🧾 Camel-Doc-OCR (Qwen2.5-VL, 4-bit)") | |
| # --- Main Layout: 2 Columns --- | |
| with gr.Row(): | |
| # === LEFT COLUMN: Input === | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📥 INPUT") | |
| # File Input | |
| # file_input = gr.File( | |
| # label="📤 Tải ảnh hoặc PDF", | |
| # file_types=[".jpg", ".jpeg", ".png", ".pdf"], | |
| # height=100 | |
| # ) | |
| # Cho phép tất cả file: | |
| file_input = gr.File( | |
| label="📤 Tải ảnh hoặc PDF", | |
| file_types=None, # ← fix ở đây | |
| height=100 | |
| ) | |
| # Prompt Input | |
| prompt_input = gr.Textbox( | |
| label="Prompt thuần", | |
| lines=2, | |
| placeholder="Nhập prompt tùy chỉnh...", | |
| max_lines=3 | |
| ) | |
| # JSON Config | |
| config_input = gr.Textbox( | |
| label="JSON Prompt", | |
| lines=6, | |
| placeholder="Cấu hình JSON sẽ xuất hiện ở đây...", | |
| max_lines=8 | |
| ) | |
| # Max New Tokens Radio | |
| max_new_tokens_input = gr.Radio( | |
| choices=[128, 256, 512, 1024, 1536, 2048], | |
| value=512, | |
| label="🔢 Chọn max_new_tokens (giới hạn độ dài đầu ra)", | |
| info="Chọn độ dài tối đa cho đầu ra của mô hình" | |
| ) | |
| # Prompt Templates | |
| gr.Markdown("### 📑 Mẫu:") | |
| with gr.Row(): | |
| for key in list(prompt_templates.keys()): # All buttons in one row | |
| gr.Button(f"{key}", size="sm", scale=1).click( | |
| fn=lambda *, k=key: insert_template(k), | |
| inputs=[], | |
| outputs=config_input | |
| ) | |
| # Run Button | |
| run_btn = gr.Button("🚀 Chạy OCR", variant="primary") | |
| # === RIGHT COLUMN: Output === | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📤 OUTPUT") | |
| # Result Output | |
| result_output = gr.Textbox( | |
| label="Kết quả trích xuất", | |
| lines=10, | |
| placeholder="Kết quả sẽ hiển thị ở đây sau khi chạy OCR...", | |
| max_lines=12 | |
| ) | |
| # Export Section | |
| with gr.Row(): | |
| export_btn = gr.Button("📦 Xuất JSON", visible=False, variant="secondary", size="sm") | |
| # JSON Output | |
| json_text = gr.Code( | |
| label="JSON Output", | |
| language="json", | |
| lines=6, | |
| visible=False | |
| ) | |
| # Download File | |
| json_file = gr.File( | |
| label="File JSON để tải", | |
| visible=False, | |
| file_types=[".json"] | |
| ) | |
| # --- Hidden Fields --- | |
| hidden_name = gr.Textbox(visible=False) | |
| # --- Event Handlers --- | |
| # Run Inference | |
| run_btn.click( | |
| fn=handle_file, | |
| inputs=[file_input, prompt_input, config_input, max_new_tokens_input], | |
| outputs=[hidden_name, result_output] | |
| ) | |
| # Export JSON | |
| export_btn.click( | |
| fn=export_json, | |
| inputs=[hidden_name, result_output], | |
| outputs=[json_file, json_text] | |
| ) | |
| export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_file]) | |
| export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_text]) | |
| if __name__ == "__main__": | |
| demo.launch() |