updates to include original file name in chunk names

#1
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -90,7 +90,9 @@ def estimate_writer_size(writer):
90
  writer.write(f)
91
  return f.tell()
92
 
93
- def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
 
 
94
  logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
95
  reader = PdfReader(input_path)
96
  n_pages = len(reader.pages)
@@ -127,11 +129,13 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
127
 
128
  split_files = []
129
  input_size = os.path.getsize(input_path) / (1024 * 1024)
 
130
  for idx, (start, end) in enumerate(splits):
131
  writer = PdfWriter()
132
  for p in range(start, end):
133
  writer.add_page(reader.pages[p])
134
- out_path = os.path.join(session_dir, f'split_part_{idx+1}.pdf')
 
135
  with open(out_path, 'wb') as f:
136
  writer.write(f)
137
  size = os.path.getsize(out_path) / (1024 * 1024)
@@ -448,7 +452,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, de
448
  try:
449
  logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
450
  with lock:
451
- split_files = intelligent_pdf_split(pdf_path, session_dir)
452
  for fi in split_files:
453
  logging.info(f"Split file saved: {fi['path']} ({fi['size']:.2f} MB)")
454
  zip_path = make_zip_of_splits(split_files, session_dir)
 
90
  writer.write(f)
91
  return f.tell()
92
 
93
+ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4, orig_filename=None):
94
+ import io
95
+ from pathlib import Path
96
  logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
97
  reader = PdfReader(input_path)
98
  n_pages = len(reader.pages)
 
129
 
130
  split_files = []
131
  input_size = os.path.getsize(input_path) / (1024 * 1024)
132
+ stem = Path(orig_filename).stem if orig_filename else "split_part"
133
  for idx, (start, end) in enumerate(splits):
134
  writer = PdfWriter()
135
  for p in range(start, end):
136
  writer.add_page(reader.pages[p])
137
+ part_name = f"{stem}_part_{idx+1}.pdf"
138
+ out_path = os.path.join(session_dir, part_name)
139
  with open(out_path, 'wb') as f:
140
  writer.write(f)
141
  size = os.path.getsize(out_path) / (1024 * 1024)
 
452
  try:
453
  logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
454
  with lock:
455
+ split_files = intelligent_pdf_split(pdf_path, session_dir, orig_filename=orig_filename)
456
  for fi in split_files:
457
  logging.info(f"Split file saved: {fi['path']} ({fi['size']:.2f} MB)")
458
  zip_path = make_zip_of_splits(split_files, session_dir)