updates to include original file name in chunk names
Browse fileshave not tested this yet, but it should make it so we can utilize the original file name
app.py
CHANGED
|
@@ -90,7 +90,9 @@ def estimate_writer_size(writer):
|
|
| 90 |
writer.write(f)
|
| 91 |
return f.tell()
|
| 92 |
|
| 93 |
-
def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
|
|
|
|
|
|
|
| 94 |
logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
|
| 95 |
reader = PdfReader(input_path)
|
| 96 |
n_pages = len(reader.pages)
|
|
@@ -127,11 +129,13 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
|
|
| 127 |
|
| 128 |
split_files = []
|
| 129 |
input_size = os.path.getsize(input_path) / (1024 * 1024)
|
|
|
|
| 130 |
for idx, (start, end) in enumerate(splits):
|
| 131 |
writer = PdfWriter()
|
| 132 |
for p in range(start, end):
|
| 133 |
writer.add_page(reader.pages[p])
|
| 134 |
-
|
|
|
|
| 135 |
with open(out_path, 'wb') as f:
|
| 136 |
writer.write(f)
|
| 137 |
size = os.path.getsize(out_path) / (1024 * 1024)
|
|
@@ -448,7 +452,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, de
|
|
| 448 |
try:
|
| 449 |
logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
|
| 450 |
with lock:
|
| 451 |
-
split_files = intelligent_pdf_split(pdf_path, session_dir)
|
| 452 |
for fi in split_files:
|
| 453 |
logging.info(f"Split file saved: {fi['path']} ({fi['size']:.2f} MB)")
|
| 454 |
zip_path = make_zip_of_splits(split_files, session_dir)
|
|
|
|
| 90 |
writer.write(f)
|
| 91 |
return f.tell()
|
| 92 |
|
| 93 |
+
def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4, orig_filename=None):
|
| 94 |
+
import io
|
| 95 |
+
from pathlib import Path
|
| 96 |
logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
|
| 97 |
reader = PdfReader(input_path)
|
| 98 |
n_pages = len(reader.pages)
|
|
|
|
| 129 |
|
| 130 |
split_files = []
|
| 131 |
input_size = os.path.getsize(input_path) / (1024 * 1024)
|
| 132 |
+
stem = Path(orig_filename).stem if orig_filename else "split_part"
|
| 133 |
for idx, (start, end) in enumerate(splits):
|
| 134 |
writer = PdfWriter()
|
| 135 |
for p in range(start, end):
|
| 136 |
writer.add_page(reader.pages[p])
|
| 137 |
+
part_name = f"{stem}_part_{idx+1}.pdf"
|
| 138 |
+
out_path = os.path.join(session_dir, part_name)
|
| 139 |
with open(out_path, 'wb') as f:
|
| 140 |
writer.write(f)
|
| 141 |
size = os.path.getsize(out_path) / (1024 * 1024)
|
|
|
|
| 452 |
try:
|
| 453 |
logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
|
| 454 |
with lock:
|
| 455 |
+
split_files = intelligent_pdf_split(pdf_path, session_dir, orig_filename=orig_filename)
|
| 456 |
for fi in split_files:
|
| 457 |
logging.info(f"Split file saved: {fi['path']} ({fi['size']:.2f} MB)")
|
| 458 |
zip_path = make_zip_of_splits(split_files, session_dir)
|