ChintanSatva commited on
Commit
f99044c
·
verified ·
1 Parent(s): 019354b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -33,7 +33,9 @@ try:
33
  model="username/bitnet-finetuned-invoice", # Replace with your fine-tuned BitNet model
34
  device="cpu",
35
  enforce_eager=True, # Disable CUDA graph compilation
36
- max_model_len=2048, # Adjust based on memory (16GB RAM)
 
 
37
  )
38
  except Exception as e:
39
  logger.error(f"Failed to load BitNet model: {str(e)}")
@@ -87,7 +89,7 @@ async def process_pdf_page(img, page_idx):
87
  logger.info(f"Completed OCR for PDF page {page_idx}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
88
  return page_text + "\n"
89
  except Exception as e:
90
- logger.error(f"OCR failed for PDF page {idx}: {str(e)}, {log_memory_usage()}")
91
  return ""
92
 
93
  async def process_with_bitnet(filename: str, raw_text: str):
@@ -264,7 +266,7 @@ async def extract_and_structure(files: List[UploadFile] = File(...)):
264
  if not raw_text.strip():
265
  try:
266
  convert_start_time = time.time()
267
- images = convert_from_bytes(file_bytes, poppler_path="/usr/local/bin", dpi=100)
268
  logger.info(f"PDF to images conversion for {file.filename}, {len(images)} pages, took {time.time() - convert_start_time:.2f} seconds, {log_memory_usage()}")
269
 
270
  ocr_start_time = time.time()
 
33
  model="username/bitnet-finetuned-invoice", # Replace with your fine-tuned BitNet model
34
  device="cpu",
35
  enforce_eager=True, # Disable CUDA graph compilation
36
+ tensor_parallel_size=1, # Single CPU process
37
+ disable_custom_all_reduce=True, # Avoid GPU optimizations
38
+ max_model_len=2048, # Fit within 16GB RAM
39
  )
40
  except Exception as e:
41
  logger.error(f"Failed to load BitNet model: {str(e)}")
 
89
  logger.info(f"Completed OCR for PDF page {page_idx}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
90
  return page_text + "\n"
91
  except Exception as e:
92
+ logger.error(f"OCR failed for PDF page {page_idx}: {str(e)}, {log_memory_usage()}")
93
  return ""
94
 
95
  async def process_with_bitnet(filename: str, raw_text: str):
 
266
  if not raw_text.strip():
267
  try:
268
  convert_start_time = time.time()
269
+ images = convert_from_bytes(file_bytes, dpi=100)
270
  logger.info(f"PDF to images conversion for {file.filename}, {len(images)} pages, took {time.time() - convert_start_time:.2f} seconds, {log_memory_usage()}")
271
 
272
  ocr_start_time = time.time()