Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -114,15 +114,21 @@ from concurrent.futures import ThreadPoolExecutor
|
|
114 |
|
115 |
import time
|
116 |
|
|
|
|
|
|
|
117 |
def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=None):
|
118 |
try:
|
119 |
file_path = file.name if hasattr(file, "name") else file
|
120 |
filename = os.path.basename(file_path)
|
121 |
ext = filename.lower().split('.')[-1]
|
122 |
|
123 |
-
|
|
|
|
|
124 |
|
125 |
if ext == "pdf":
|
|
|
126 |
with open(file_path, "rb") as f:
|
127 |
pdf_bytes = f.read()
|
128 |
|
@@ -138,17 +144,55 @@ def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=None):
|
|
138 |
img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
|
139 |
pages.append(img)
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
else:
|
|
|
142 |
img = Image.open(file_path)
|
143 |
if img.mode != "RGB":
|
144 |
img = img.convert("RGB")
|
|
|
|
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
149 |
|
150 |
except Exception as e:
|
151 |
-
|
|
|
|
|
152 |
|
153 |
# def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
|
154 |
# if img.mode != "RGB":
|
|
|
114 |
|
115 |
import time
|
116 |
|
117 |
+
import time
|
118 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
119 |
+
|
120 |
def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=None):
|
121 |
try:
|
122 |
file_path = file.name if hasattr(file, "name") else file
|
123 |
filename = os.path.basename(file_path)
|
124 |
ext = filename.lower().split('.')[-1]
|
125 |
|
126 |
+
full_prompt = (prompt + "\n" + extra_prompt).strip() or ""
|
127 |
+
|
128 |
+
start_total = time.perf_counter()
|
129 |
|
130 |
if ext == "pdf":
|
131 |
+
start_convert = time.perf_counter()
|
132 |
with open(file_path, "rb") as f:
|
133 |
pdf_bytes = f.read()
|
134 |
|
|
|
144 |
img.thumbnail((3072, 3072), Image.Resampling.LANCZOS)
|
145 |
pages.append(img)
|
146 |
|
147 |
+
end_convert = time.perf_counter()
|
148 |
+
print(f"[INFO] Converted PDF → {len(pages)} pages in {(end_convert - start_convert):.3f}s")
|
149 |
+
|
150 |
+
# --- Xử lý inference đa luồng ---
|
151 |
+
start_infer = time.perf_counter()
|
152 |
+
outputs = []
|
153 |
+
|
154 |
+
def infer_page(img, idx):
|
155 |
+
# Thay run_inference bằng hàm inference thật hoặc giả
|
156 |
+
out = run_inference(img, full_prompt, max_new_tokens)
|
157 |
+
print(f"[DEBUG] Page {idx+1} inference done")
|
158 |
+
if progress:
|
159 |
+
progress((idx) / len(pages), desc=f"Page {idx+1}/{len(pages)}")
|
160 |
+
return out
|
161 |
+
|
162 |
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
163 |
+
futures = {executor.submit(infer_page, img, idx): idx for idx, img in enumerate(pages)}
|
164 |
+
for future in as_completed(futures):
|
165 |
+
try:
|
166 |
+
outputs.append(future.result())
|
167 |
+
except Exception as e:
|
168 |
+
outputs.append(f"[ERROR] Inference page failed: {e}")
|
169 |
+
|
170 |
+
end_infer = time.perf_counter()
|
171 |
+
print(f"[INFO] Inference all pages done in {(end_infer - start_infer):.3f}s")
|
172 |
+
|
173 |
+
total_time = end_infer - start_total
|
174 |
+
# Ghép kết quả các trang (thường là nối chuỗi, hoặc JSON array tuỳ model)
|
175 |
+
result = "\n\n--- Page Break ---\n\n".join(outputs)
|
176 |
+
return filename, f"OKE (total time: {total_time:.3f}s)\n{result}"
|
177 |
+
|
178 |
else:
|
179 |
+
start_img = time.perf_counter()
|
180 |
img = Image.open(file_path)
|
181 |
if img.mode != "RGB":
|
182 |
img = img.convert("RGB")
|
183 |
+
end_img = time.perf_counter()
|
184 |
+
print(f"[INFO] Opened image in {(end_img - start_img):.3f}s")
|
185 |
|
186 |
+
start_infer = time.perf_counter()
|
187 |
+
result = run_inference(img, full_prompt, max_new_tokens)
|
188 |
+
end_infer = time.perf_counter()
|
189 |
+
total_time = end_infer - start_img
|
190 |
+
return filename, f"OKE (time: {total_time:.3f}s)\n{result}"
|
191 |
|
192 |
except Exception as e:
|
193 |
+
import traceback
|
194 |
+
traceback.print_exc()
|
195 |
+
return "error", f"[ERROR] handle_file failed: {e}"
|
196 |
|
197 |
# def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
|
198 |
# if img.mode != "RGB":
|