Spaces:

tienanh2003
/

ocr

Sleeping

App Files Files Community

tienanh2003 commited on 22 days ago

Commit

6d80d48

verified ·

1 Parent(s): 5bb9df9

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -25

app.py CHANGED Viewed

@@ -128,64 +128,59 @@ def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=None):
         start_total = time.perf_counter()
         if ext == "pdf":
             start_convert = time.perf_counter()
             with open(file_path, "rb") as f:
                 pdf_bytes = f.read()
             doc = fitz.open(stream=pdf_bytes, filetype="pdf")
             scale = DPI / 72
             mat = fitz.Matrix(scale, scale)
             pages = []
             for page in doc:
                 pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
                 img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                 if max(img.size) > 3072:
                     img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
                 pages.append(img)
             end_convert = time.perf_counter()
-            print(f"[INFO] Converted PDF → {len(pages)} pages in {(end_convert - start_convert):.3f}s")
-            # --- Xử lý inference đa luồng ---
             start_infer = time.perf_counter()
             outputs = []
-            def infer_page(img, idx):
-                # Thay run_inference bằng hàm inference thật hoặc giả
                 out = run_inference(img, full_prompt, max_new_tokens)
-                print(f"[DEBUG] Page {idx+1} inference done")
                 if progress:
-                    progress((idx) / len(pages), desc=f"Page {idx+1}/{len(pages)}")
-                return out
-            with ThreadPoolExecutor(max_workers=4) as executor:
-                futures = {executor.submit(infer_page, img, idx): idx for idx, img in enumerate(pages)}
-                for future in as_completed(futures):
-                    try:
-                        outputs.append(future.result())
-                    except Exception as e:
-                        outputs.append(f"[ERROR] Inference page failed: {e}")
-            end_infer = time.perf_counter()
-            print(f"[INFO] Inference all pages done in {(end_infer - start_infer):.3f}s")
-            total_time = end_infer - start_total
-            # Ghép kết quả các trang (thường là nối chuỗi, hoặc JSON array tuỳ model)
-            result = "\n\n--- Page Break ---\n\n".join(outputs)
-            return filename, f"OKE (total time: {total_time:.3f}s)\n{result}"
         else:
             start_img = time.perf_counter()
             img = Image.open(file_path)
             if img.mode != "RGB":
                 img = img.convert("RGB")
             end_img = time.perf_counter()
-            print(f"[INFO] Opened image in {(end_img - start_img):.3f}s")
             start_infer = time.perf_counter()
             result = run_inference(img, full_prompt, max_new_tokens)
             end_infer = time.perf_counter()
             total_time = end_infer - start_img
             return filename, f"OKE (time: {total_time:.3f}s)\n{result}"

         start_total = time.perf_counter()
         if ext == "pdf":
+            # --- Chuyển PDF sang ảnh ---
             start_convert = time.perf_counter()
             with open(file_path, "rb") as f:
                 pdf_bytes = f.read()
             doc = fitz.open(stream=pdf_bytes, filetype="pdf")
             scale = DPI / 72
             mat = fitz.Matrix(scale, scale)
             pages = []
             for page in doc:
                 pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
                 img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                 if max(img.size) > 3072:
                     img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
                 pages.append(img)
             end_convert = time.perf_counter()
+            # --- Inference từng trang ---
             start_infer = time.perf_counter()
             outputs = []
+            for idx, img in enumerate(pages):
                 out = run_inference(img, full_prompt, max_new_tokens)
+                outputs.append(out)
                 if progress:
+                    progress(idx / len(pages), desc=f"Page {idx+1}/{len(pages)}")
+            end_infer = time.perf_counter()
+            # --- Tổng hợp kết quả ---
+            combined_text = "\n\n".join(outputs)  # hoặc json.dumps(outputs) tuỳ mục đích
+            # --- Gọi model tổng hợp cuối ---
+            start_agg = time.perf_counter()
+            final_result = run_inference_on_text(combined_text, max_new_tokens)  # bạn cần implement hàm này
+            end_agg = time.perf_counter()
+            total_time = end_agg - start_total
+            return filename, (
+                f"OKE (total time: {total_time:.3f}s, convert: {end_convert - start_convert:.3f}s, "
+                f"infer per page: {end_infer - start_infer:.3f}s, aggregate: {end_agg - start_agg:.3f}s)\n"
+                f"{final_result}"
+            )
         else:
+            # Xử lý ảnh đơn
             start_img = time.perf_counter()
             img = Image.open(file_path)
             if img.mode != "RGB":
                 img = img.convert("RGB")
             end_img = time.perf_counter()
             start_infer = time.perf_counter()
             result = run_inference(img, full_prompt, max_new_tokens)
             end_infer = time.perf_counter()
             total_time = end_infer - start_img
             return filename, f"OKE (time: {total_time:.3f}s)\n{result}"