zegner commited on
Commit
3385c9f
·
verified ·
1 Parent(s): ff01f81

updates to include original file name in chunk names

Browse files

have not tested this yet, but it should make it so we can utilize the original file name

Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -90,7 +90,9 @@ def estimate_writer_size(writer):
90
  writer.write(f)
91
  return f.tell()
92
 
93
- def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
 
 
94
  logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
95
  reader = PdfReader(input_path)
96
  n_pages = len(reader.pages)
@@ -127,11 +129,13 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
127
 
128
  split_files = []
129
  input_size = os.path.getsize(input_path) / (1024 * 1024)
 
130
  for idx, (start, end) in enumerate(splits):
131
  writer = PdfWriter()
132
  for p in range(start, end):
133
  writer.add_page(reader.pages[p])
134
- out_path = os.path.join(session_dir, f'split_part_{idx+1}.pdf')
 
135
  with open(out_path, 'wb') as f:
136
  writer.write(f)
137
  size = os.path.getsize(out_path) / (1024 * 1024)
@@ -448,7 +452,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, de
448
  try:
449
  logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
450
  with lock:
451
- split_files = intelligent_pdf_split(pdf_path, session_dir)
452
  for fi in split_files:
453
  logging.info(f"Split file saved: {fi['path']} ({fi['size']:.2f} MB)")
454
  zip_path = make_zip_of_splits(split_files, session_dir)
 
90
  writer.write(f)
91
  return f.tell()
92
 
93
+ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4, orig_filename=None):
94
+ import io
95
+ from pathlib import Path
96
  logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
97
  reader = PdfReader(input_path)
98
  n_pages = len(reader.pages)
 
129
 
130
  split_files = []
131
  input_size = os.path.getsize(input_path) / (1024 * 1024)
132
+ stem = Path(orig_filename).stem if orig_filename else "split_part"
133
  for idx, (start, end) in enumerate(splits):
134
  writer = PdfWriter()
135
  for p in range(start, end):
136
  writer.add_page(reader.pages[p])
137
+ part_name = f"{stem}_part_{idx+1}.pdf"
138
+ out_path = os.path.join(session_dir, part_name)
139
  with open(out_path, 'wb') as f:
140
  writer.write(f)
141
  size = os.path.getsize(out_path) / (1024 * 1024)
 
452
  try:
453
  logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
454
  with lock:
455
+ split_files = intelligent_pdf_split(pdf_path, session_dir, orig_filename=orig_filename)
456
  for fi in split_files:
457
  logging.info(f"Split file saved: {fi['path']} ({fi['size']:.2f} MB)")
458
  zip_path = make_zip_of_splits(split_files, session_dir)