Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -128,64 +128,59 @@ def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=None):
|
|
128 |
start_total = time.perf_counter()
|
129 |
|
130 |
if ext == "pdf":
|
|
|
131 |
start_convert = time.perf_counter()
|
132 |
with open(file_path, "rb") as f:
|
133 |
pdf_bytes = f.read()
|
134 |
-
|
135 |
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
136 |
scale = DPI / 72
|
137 |
mat = fitz.Matrix(scale, scale)
|
138 |
pages = []
|
139 |
-
|
140 |
for page in doc:
|
141 |
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
|
142 |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
143 |
if max(img.size) > 3072:
|
144 |
img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
|
145 |
pages.append(img)
|
146 |
-
|
147 |
end_convert = time.perf_counter()
|
148 |
-
print(f"[INFO] Converted PDF → {len(pages)} pages in {(end_convert - start_convert):.3f}s")
|
149 |
|
150 |
-
# ---
|
151 |
start_infer = time.perf_counter()
|
152 |
outputs = []
|
153 |
-
|
154 |
-
def infer_page(img, idx):
|
155 |
-
# Thay run_inference bằng hàm inference thật hoặc giả
|
156 |
out = run_inference(img, full_prompt, max_new_tokens)
|
157 |
-
|
158 |
if progress:
|
159 |
-
progress(
|
160 |
-
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
for future in as_completed(futures):
|
165 |
-
try:
|
166 |
-
outputs.append(future.result())
|
167 |
-
except Exception as e:
|
168 |
-
outputs.append(f"[ERROR] Inference page failed: {e}")
|
169 |
|
170 |
-
|
171 |
-
|
|
|
|
|
172 |
|
173 |
-
total_time =
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
177 |
|
178 |
else:
|
|
|
179 |
start_img = time.perf_counter()
|
180 |
img = Image.open(file_path)
|
181 |
if img.mode != "RGB":
|
182 |
img = img.convert("RGB")
|
183 |
end_img = time.perf_counter()
|
184 |
-
print(f"[INFO] Opened image in {(end_img - start_img):.3f}s")
|
185 |
|
186 |
start_infer = time.perf_counter()
|
187 |
result = run_inference(img, full_prompt, max_new_tokens)
|
188 |
end_infer = time.perf_counter()
|
|
|
189 |
total_time = end_infer - start_img
|
190 |
return filename, f"OKE (time: {total_time:.3f}s)\n{result}"
|
191 |
|
|
|
128 |
start_total = time.perf_counter()
|
129 |
|
130 |
if ext == "pdf":
|
131 |
+
# --- Chuyển PDF sang ảnh ---
|
132 |
start_convert = time.perf_counter()
|
133 |
with open(file_path, "rb") as f:
|
134 |
pdf_bytes = f.read()
|
|
|
135 |
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
136 |
scale = DPI / 72
|
137 |
mat = fitz.Matrix(scale, scale)
|
138 |
pages = []
|
|
|
139 |
for page in doc:
|
140 |
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
|
141 |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
142 |
if max(img.size) > 3072:
|
143 |
img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
|
144 |
pages.append(img)
|
|
|
145 |
end_convert = time.perf_counter()
|
|
|
146 |
|
147 |
+
# --- Inference từng trang ---
|
148 |
start_infer = time.perf_counter()
|
149 |
outputs = []
|
150 |
+
for idx, img in enumerate(pages):
|
|
|
|
|
151 |
out = run_inference(img, full_prompt, max_new_tokens)
|
152 |
+
outputs.append(out)
|
153 |
if progress:
|
154 |
+
progress(idx / len(pages), desc=f"Page {idx+1}/{len(pages)}")
|
155 |
+
end_infer = time.perf_counter()
|
156 |
|
157 |
+
# --- Tổng hợp kết quả ---
|
158 |
+
combined_text = "\n\n".join(outputs) # hoặc json.dumps(outputs) tuỳ mục đích
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
+
# --- Gọi model tổng hợp cuối ---
|
161 |
+
start_agg = time.perf_counter()
|
162 |
+
final_result = run_inference_on_text(combined_text, max_new_tokens) # bạn cần implement hàm này
|
163 |
+
end_agg = time.perf_counter()
|
164 |
|
165 |
+
total_time = end_agg - start_total
|
166 |
+
return filename, (
|
167 |
+
f"OKE (total time: {total_time:.3f}s, convert: {end_convert - start_convert:.3f}s, "
|
168 |
+
f"infer per page: {end_infer - start_infer:.3f}s, aggregate: {end_agg - start_agg:.3f}s)\n"
|
169 |
+
f"{final_result}"
|
170 |
+
)
|
171 |
|
172 |
else:
|
173 |
+
# Xử lý ảnh đơn
|
174 |
start_img = time.perf_counter()
|
175 |
img = Image.open(file_path)
|
176 |
if img.mode != "RGB":
|
177 |
img = img.convert("RGB")
|
178 |
end_img = time.perf_counter()
|
|
|
179 |
|
180 |
start_infer = time.perf_counter()
|
181 |
result = run_inference(img, full_prompt, max_new_tokens)
|
182 |
end_infer = time.perf_counter()
|
183 |
+
|
184 |
total_time = end_infer - start_img
|
185 |
return filename, f"OKE (time: {total_time:.3f}s)\n{result}"
|
186 |
|