Update app.py
Browse files
app.py
CHANGED
@@ -33,7 +33,9 @@ try:
|
|
33 |
model="username/bitnet-finetuned-invoice", # Replace with your fine-tuned BitNet model
|
34 |
device="cpu",
|
35 |
enforce_eager=True, # Disable CUDA graph compilation
|
36 |
-
|
|
|
|
|
37 |
)
|
38 |
except Exception as e:
|
39 |
logger.error(f"Failed to load BitNet model: {str(e)}")
|
@@ -87,7 +89,7 @@ async def process_pdf_page(img, page_idx):
|
|
87 |
logger.info(f"Completed OCR for PDF page {page_idx}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
|
88 |
return page_text + "\n"
|
89 |
except Exception as e:
|
90 |
-
logger.error(f"OCR failed for PDF page {
|
91 |
return ""
|
92 |
|
93 |
async def process_with_bitnet(filename: str, raw_text: str):
|
@@ -264,7 +266,7 @@ async def extract_and_structure(files: List[UploadFile] = File(...)):
|
|
264 |
if not raw_text.strip():
|
265 |
try:
|
266 |
convert_start_time = time.time()
|
267 |
-
images = convert_from_bytes(file_bytes,
|
268 |
logger.info(f"PDF to images conversion for {file.filename}, {len(images)} pages, took {time.time() - convert_start_time:.2f} seconds, {log_memory_usage()}")
|
269 |
|
270 |
ocr_start_time = time.time()
|
|
|
33 |
model="username/bitnet-finetuned-invoice", # Replace with your fine-tuned BitNet model
|
34 |
device="cpu",
|
35 |
enforce_eager=True, # Disable CUDA graph compilation
|
36 |
+
tensor_parallel_size=1, # Single CPU process
|
37 |
+
disable_custom_all_reduce=True, # Avoid GPU optimizations
|
38 |
+
max_model_len=2048, # Fit within 16GB RAM
|
39 |
)
|
40 |
except Exception as e:
|
41 |
logger.error(f"Failed to load BitNet model: {str(e)}")
|
|
|
89 |
logger.info(f"Completed OCR for PDF page {page_idx}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
|
90 |
return page_text + "\n"
|
91 |
except Exception as e:
|
92 |
+
logger.error(f"OCR failed for PDF page {page_idx}: {str(e)}, {log_memory_usage()}")
|
93 |
return ""
|
94 |
|
95 |
async def process_with_bitnet(filename: str, raw_text: str):
|
|
|
266 |
if not raw_text.strip():
|
267 |
try:
|
268 |
convert_start_time = time.time()
|
269 |
+
images = convert_from_bytes(file_bytes, dpi=100)
|
270 |
logger.info(f"PDF to images conversion for {file.filename}, {len(images)} pages, took {time.time() - convert_start_time:.2f} seconds, {log_memory_usage()}")
|
271 |
|
272 |
ocr_start_time = time.time()
|