tienanh2003 commited on
Commit
5bb9df9
·
verified ·
1 Parent(s): e94af7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -5
app.py CHANGED
@@ -114,15 +114,21 @@ from concurrent.futures import ThreadPoolExecutor
114
 
115
  import time
116
 
 
 
 
117
  def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=None):
118
  try:
119
  file_path = file.name if hasattr(file, "name") else file
120
  filename = os.path.basename(file_path)
121
  ext = filename.lower().split('.')[-1]
122
 
123
- start = time.perf_counter()
 
 
124
 
125
  if ext == "pdf":
 
126
  with open(file_path, "rb") as f:
127
  pdf_bytes = f.read()
128
 
@@ -138,17 +144,55 @@ def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=None):
138
  img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
139
  pages.append(img)
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  else:
 
142
  img = Image.open(file_path)
143
  if img.mode != "RGB":
144
  img = img.convert("RGB")
 
 
145
 
146
- end = time.perf_counter()
147
- elapsed = end - start
148
- return filename, f"OKE (time: {elapsed:.3f}s)"
 
 
149
 
150
  except Exception as e:
151
- return "error", f"[ERROR] handle_file test failed: {e}"
 
 
152
 
153
  # def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
154
  # if img.mode != "RGB":
 
114
 
115
  import time
116
 
117
+ import time
118
+ from concurrent.futures import ThreadPoolExecutor, as_completed
119
+
120
  def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=None):
121
  try:
122
  file_path = file.name if hasattr(file, "name") else file
123
  filename = os.path.basename(file_path)
124
  ext = filename.lower().split('.')[-1]
125
 
126
+ full_prompt = (prompt + "\n" + extra_prompt).strip() or ""
127
+
128
+ start_total = time.perf_counter()
129
 
130
  if ext == "pdf":
131
+ start_convert = time.perf_counter()
132
  with open(file_path, "rb") as f:
133
  pdf_bytes = f.read()
134
 
 
144
  img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
145
  pages.append(img)
146
 
147
+ end_convert = time.perf_counter()
148
+ print(f"[INFO] Converted PDF → {len(pages)} pages in {(end_convert - start_convert):.3f}s")
149
+
150
+ # --- Xử lý inference đa luồng ---
151
+ start_infer = time.perf_counter()
152
+ outputs = []
153
+
154
+ def infer_page(img, idx):
155
+ # Thay run_inference bằng hàm inference thật hoặc giả
156
+ out = run_inference(img, full_prompt, max_new_tokens)
157
+ print(f"[DEBUG] Page {idx+1} inference done")
158
+ if progress:
159
+ progress((idx) / len(pages), desc=f"Page {idx+1}/{len(pages)}")
160
+ return out
161
+
162
+ with ThreadPoolExecutor(max_workers=4) as executor:
163
+ futures = {executor.submit(infer_page, img, idx): idx for idx, img in enumerate(pages)}
164
+ for future in as_completed(futures):
165
+ try:
166
+ outputs.append(future.result())
167
+ except Exception as e:
168
+ outputs.append(f"[ERROR] Inference page failed: {e}")
169
+
170
+ end_infer = time.perf_counter()
171
+ print(f"[INFO] Inference all pages done in {(end_infer - start_infer):.3f}s")
172
+
173
+ total_time = end_infer - start_total
174
+ # Ghép kết quả các trang (thường là nối chuỗi, hoặc JSON array tuỳ model)
175
+ result = "\n\n--- Page Break ---\n\n".join(outputs)
176
+ return filename, f"OKE (total time: {total_time:.3f}s)\n{result}"
177
+
178
  else:
179
+ start_img = time.perf_counter()
180
  img = Image.open(file_path)
181
  if img.mode != "RGB":
182
  img = img.convert("RGB")
183
+ end_img = time.perf_counter()
184
+ print(f"[INFO] Opened image in {(end_img - start_img):.3f}s")
185
 
186
+ start_infer = time.perf_counter()
187
+ result = run_inference(img, full_prompt, max_new_tokens)
188
+ end_infer = time.perf_counter()
189
+ total_time = end_infer - start_img
190
+ return filename, f"OKE (time: {total_time:.3f}s)\n{result}"
191
 
192
  except Exception as e:
193
+ import traceback
194
+ traceback.print_exc()
195
+ return "error", f"[ERROR] handle_file failed: {e}"
196
 
197
  # def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
198
  # if img.mode != "RGB":