Spaces:

ChintanSatva
/

bitnet_expense_categorization

Sleeping

ChintanSatva commited on Jun 21

Commit

f99044c

verified ·

1 Parent(s): 019354b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,7 +33,9 @@ try:
         model="username/bitnet-finetuned-invoice",  # Replace with your fine-tuned BitNet model
         device="cpu",
         enforce_eager=True,  # Disable CUDA graph compilation
-        max_model_len=2048,  # Adjust based on memory (16GB RAM)
     )
 except Exception as e:
     logger.error(f"Failed to load BitNet model: {str(e)}")
@@ -87,7 +89,7 @@ async def process_pdf_page(img, page_idx):
         logger.info(f"Completed OCR for PDF page {page_idx}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
         return page_text + "\n"
     except Exception as e:
-        logger.error(f"OCR failed for PDF page {idx}: {str(e)}, {log_memory_usage()}")
         return ""
 async def process_with_bitnet(filename: str, raw_text: str):
@@ -264,7 +266,7 @@ async def extract_and_structure(files: List[UploadFile] = File(...)):
                 if not raw_text.strip():
                     try:
                         convert_start_time = time.time()
-                        images = convert_from_bytes(file_bytes, poppler_path="/usr/local/bin", dpi=100)
                         logger.info(f"PDF to images conversion for {file.filename}, {len(images)} pages, took {time.time() - convert_start_time:.2f} seconds, {log_memory_usage()}")
                         ocr_start_time = time.time()

         model="username/bitnet-finetuned-invoice",  # Replace with your fine-tuned BitNet model
         device="cpu",
         enforce_eager=True,  # Disable CUDA graph compilation
+        tensor_parallel_size=1,  # Single CPU process
+        disable_custom_all_reduce=True,  # Avoid GPU optimizations
+        max_model_len=2048,  # Fit within 16GB RAM
     )
 except Exception as e:
     logger.error(f"Failed to load BitNet model: {str(e)}")
         logger.info(f"Completed OCR for PDF page {page_idx}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
         return page_text + "\n"
     except Exception as e:
+        logger.error(f"OCR failed for PDF page {page_idx}: {str(e)}, {log_memory_usage()}")
         return ""
 async def process_with_bitnet(filename: str, raw_text: str):
                 if not raw_text.strip():
                     try:
                         convert_start_time = time.time()
+                        images = convert_from_bytes(file_bytes, dpi=100)
                         logger.info(f"PDF to images conversion for {file.filename}, {len(images)} pages, took {time.time() - convert_start_time:.2f} seconds, {log_memory_usage()}")
                         ocr_start_time = time.time()