updates to include original file name in chunk names
#1
by
zegner
- opened
app.py
CHANGED
@@ -90,7 +90,9 @@ def estimate_writer_size(writer):
|
|
90 |
writer.write(f)
|
91 |
return f.tell()
|
92 |
|
93 |
-
def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
|
|
|
|
|
94 |
logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
|
95 |
reader = PdfReader(input_path)
|
96 |
n_pages = len(reader.pages)
|
@@ -127,11 +129,13 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
|
|
127 |
|
128 |
split_files = []
|
129 |
input_size = os.path.getsize(input_path) / (1024 * 1024)
|
|
|
130 |
for idx, (start, end) in enumerate(splits):
|
131 |
writer = PdfWriter()
|
132 |
for p in range(start, end):
|
133 |
writer.add_page(reader.pages[p])
|
134 |
-
|
|
|
135 |
with open(out_path, 'wb') as f:
|
136 |
writer.write(f)
|
137 |
size = os.path.getsize(out_path) / (1024 * 1024)
|
@@ -448,7 +452,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, de
|
|
448 |
try:
|
449 |
logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
|
450 |
with lock:
|
451 |
-
split_files = intelligent_pdf_split(pdf_path, session_dir)
|
452 |
for fi in split_files:
|
453 |
logging.info(f"Split file saved: {fi['path']} ({fi['size']:.2f} MB)")
|
454 |
zip_path = make_zip_of_splits(split_files, session_dir)
|
|
|
90 |
writer.write(f)
|
91 |
return f.tell()
|
92 |
|
93 |
+
def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4, orig_filename=None):
|
94 |
+
import io
|
95 |
+
from pathlib import Path
|
96 |
logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
|
97 |
reader = PdfReader(input_path)
|
98 |
n_pages = len(reader.pages)
|
|
|
129 |
|
130 |
split_files = []
|
131 |
input_size = os.path.getsize(input_path) / (1024 * 1024)
|
132 |
+
stem = Path(orig_filename).stem if orig_filename else "split_part"
|
133 |
for idx, (start, end) in enumerate(splits):
|
134 |
writer = PdfWriter()
|
135 |
for p in range(start, end):
|
136 |
writer.add_page(reader.pages[p])
|
137 |
+
part_name = f"{stem}_part_{idx+1}.pdf"
|
138 |
+
out_path = os.path.join(session_dir, part_name)
|
139 |
with open(out_path, 'wb') as f:
|
140 |
writer.write(f)
|
141 |
size = os.path.getsize(out_path) / (1024 * 1024)
|
|
|
452 |
try:
|
453 |
logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
|
454 |
with lock:
|
455 |
+
split_files = intelligent_pdf_split(pdf_path, session_dir, orig_filename=orig_filename)
|
456 |
for fi in split_files:
|
457 |
logging.info(f"Split file saved: {fi['path']} ({fi['size']:.2f} MB)")
|
458 |
zip_path = make_zip_of_splits(split_files, session_dir)
|