Spaces:

MicroHealth
/

maiko-file-splitter

Paused

App Files Files Community

bluenevus commited on May 1

Commit

be234d8

1 Parent(s): bb44c88

Update app.py via AI Editor

Browse files

Files changed (1) hide show

app.py +25 -4

app.py CHANGED Viewed

@@ -91,6 +91,7 @@ def estimate_writer_size(writer):
     return f.tell()
 def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
     reader = PdfReader(input_path)
     n_pages = len(reader.pages)
     splits = []
@@ -107,6 +108,8 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
         chapter = is_chapter_header(header)
         split_here = False
         if size >= max_mb:
             split_here = True
         elif size >= min_split_mb:
@@ -115,6 +118,7 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
         if split_here:
             splits.append((last_split_at, i+1))
             last_split_at = i+1
             current_writer = PdfWriter()
         last_header = header
@@ -132,13 +136,18 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
             writer.write(f)
         size = os.path.getsize(out_path) / (1024 * 1024)
         split_files.append({'filename': os.path.basename(out_path), 'size': size, 'path': out_path})
     return split_files
 def make_zip_of_splits(split_files, session_dir):
     zip_path = os.path.join(session_dir, "split_files.zip")
     with zipfile.ZipFile(zip_path, 'w') as zipf:
         for file in split_files:
             zipf.write(file['path'], arcname=file['filename'])
     return zip_path
 external_stylesheets = [dbc.themes.BOOTSTRAP]
@@ -210,7 +219,6 @@ def persist_session_cookie():
         resp = flask.make_response("")
         resp.set_cookie('session-id', session_id, max_age=60*60*24*3)
         flask.g.session_id = session_id
-        # Attach the response only if needed
         flask.request.session_id_set = session_id
     else:
         flask.g.session_id = session_id
@@ -221,7 +229,6 @@ def persist_session_cookie():
     prevent_initial_call=False
 )
 def ensure_session_id(session_id):
-    # On first load, set session-id-store from cookie or generate new
     try:
         sid = session_id
         if not sid:
@@ -252,9 +259,11 @@ def ensure_session_id(session_id):
 )
 def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, session_data, session_id):
     trigger = ctx.triggered_id
     if not session_id:
         session_id = str(uuid.uuid4())
     flask.g.session_id = session_id
     session_dir = get_session_dir(session_id)
     lock = get_session_lock(session_id)
@@ -264,6 +273,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
     # Handle Clear Session
     if trigger == 'clear-session':
         clean_session(session_id)
         resp_data = {}
         logging.info(f"Session cleared for {session_id}")
@@ -287,14 +297,18 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
         if os.path.exists(session_dir):
             for file in os.listdir(session_dir):
                 os.remove(os.path.join(session_dir, file))
         return "", True, get_split_results_placeholder(), {}
     # Handle Upload
     if trigger == 'upload-pdf':
         if not contents:
             return "", True, get_split_results_placeholder(), {}
         if not allowed_file(filename):
             return html.Div("Only .pdf files are allowed.", style={'color': 'red'}), True, get_split_results_placeholder(), {}
         try:
@@ -317,6 +331,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
                     width=3, style={'display': 'flex', 'justifyContent': 'end'}
                 )
             ], className='mb-3', align='center', style={'marginTop': "15px", 'marginBottom': '25px'})
             return file_info, False, get_split_results_placeholder(), session_data
         except Exception as e:
             logging.error(f"Error processing PDF: {e}")
@@ -331,11 +346,13 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
                 width=3, style={'display': 'flex', 'justifyContent': 'end'}
             )
         ], className='mb-3', align='center', style={'marginTop': "15px", 'marginBottom': '25px'})
         return file_info, False, get_split_results_placeholder(), session_data
     # Handle Split
     if trigger == 'split-btn':
         orig_filename = session_data.get('orig_filename')
         if not orig_filename:
             logging.error(f"Split button clicked but no file to split for session {session_id}")
             return html.Div("No file to split.", style={'color': 'red'}), True, get_split_results_placeholder(), session_data
@@ -344,10 +361,12 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
             logging.error(f"Split button clicked but uploaded file not found for session {session_id}")
             return html.Div("Uploaded file not found. Please upload again.", style={'color': 'red'}), True, get_split_results_placeholder(), {}
         try:
-            logging.info(f"Splitting PDF for session {session_id} on user request. File: {pdf_path}")
             with lock:
                 split_files = intelligent_pdf_split(pdf_path, session_dir)
                 zip_path = make_zip_of_splits(split_files, session_dir)
             session_data['split_files'] = split_files
             session_data['zip_ready'] = True
             file_info = dbc.Row([
@@ -374,7 +393,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
             logging.info(f"PDF split into {len(split_files)} chunks for session {session_id}, zip ready.")
             return file_info, False, results, session_data
         except Exception as e:
-            logging.error(f"Error splitting PDF: {e}")
             return html.Div(f"Error: {e}", style={'color': 'red'}), False, get_split_results_placeholder(), session_data
     # Restore after split
@@ -402,8 +421,10 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
             split_files_list,
             html.Div(download_zip_btn, style={'marginTop': '30px'})
         ], id="split-results-inner")
         return file_info, False, results, session_data
     return "", True, get_split_results_placeholder(), session_data
 @app.server.route('/download_zip/<session_id>/<filename>')

     return f.tell()
 def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
+    logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
     reader = PdfReader(input_path)
     n_pages = len(reader.pages)
     splits = []
         chapter = is_chapter_header(header)
         split_here = False
+        logging.debug(f"Page {i}: size={size:.2f}MB, header='{header}', blank={blank}, chapter={chapter}")
         if size >= max_mb:
             split_here = True
         elif size >= min_split_mb:
         if split_here:
             splits.append((last_split_at, i+1))
+            logging.info(f"Splitting at pages {last_split_at}-{i+1} (size ~{size:.2f}MB)")
             last_split_at = i+1
             current_writer = PdfWriter()
         last_header = header
             writer.write(f)
         size = os.path.getsize(out_path) / (1024 * 1024)
         split_files.append({'filename': os.path.basename(out_path), 'size': size, 'path': out_path})
+        logging.info(f"Saved split file {out_path} ({size:.2f} MB) for pages {start}-{end-1}")
+    logging.info(f"intelligent_pdf_split: Finished. Total {len(split_files)} files created.")
     return split_files
 def make_zip_of_splits(split_files, session_dir):
     zip_path = os.path.join(session_dir, "split_files.zip")
+    logging.info(f"Creating ZIP at {zip_path} with {len(split_files)} files.")
     with zipfile.ZipFile(zip_path, 'w') as zipf:
         for file in split_files:
             zipf.write(file['path'], arcname=file['filename'])
+            logging.info(f"Added {file['filename']} to ZIP.")
+    logging.info(f"ZIP created at {zip_path}")
     return zip_path
 external_stylesheets = [dbc.themes.BOOTSTRAP]
         resp = flask.make_response("")
         resp.set_cookie('session-id', session_id, max_age=60*60*24*3)
         flask.g.session_id = session_id
         flask.request.session_id_set = session_id
     else:
         flask.g.session_id = session_id
     prevent_initial_call=False
 )
 def ensure_session_id(session_id):
     try:
         sid = session_id
         if not sid:
 )
 def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, session_data, session_id):
     trigger = ctx.triggered_id
+    logging.info(f"handle_upload: Triggered by {trigger}, session_id={session_id}")
     if not session_id:
         session_id = str(uuid.uuid4())
+        logging.info(f"handle_upload: Generated new session_id {session_id}")
     flask.g.session_id = session_id
     session_dir = get_session_dir(session_id)
     lock = get_session_lock(session_id)
     # Handle Clear Session
     if trigger == 'clear-session':
+        logging.info(f"handle_upload: Clear session button pressed for {session_id}")
         clean_session(session_id)
         resp_data = {}
         logging.info(f"Session cleared for {session_id}")
         if os.path.exists(session_dir):
             for file in os.listdir(session_dir):
                 os.remove(os.path.join(session_dir, file))
+        logging.info(f"Session files deleted for {session_id}")
         return "", True, get_split_results_placeholder(), {}
     # Handle Upload
     if trigger == 'upload-pdf':
+        logging.info(f"handle_upload: Upload triggered for filename={filename}, session_id={session_id}")
         if not contents:
+            logging.warning("No contents received in upload.")
             return "", True, get_split_results_placeholder(), {}
         if not allowed_file(filename):
+            logging.warning(f"Disallowed file attempted upload: {filename}")
             return html.Div("Only .pdf files are allowed.", style={'color': 'red'}), True, get_split_results_placeholder(), {}
         try:
                     width=3, style={'display': 'flex', 'justifyContent': 'end'}
                 )
             ], className='mb-3', align='center', style={'marginTop': "15px", 'marginBottom': '25px'})
+            logging.info(f"handle_upload: File info UI updated, split button enabled.")
             return file_info, False, get_split_results_placeholder(), session_data
         except Exception as e:
             logging.error(f"Error processing PDF: {e}")
                 width=3, style={'display': 'flex', 'justifyContent': 'end'}
             )
         ], className='mb-3', align='center', style={'marginTop': "15px", 'marginBottom': '25px'})
+        logging.info(f"handle_upload: Restoring view after upload, split button enabled.")
         return file_info, False, get_split_results_placeholder(), session_data
     # Handle Split
     if trigger == 'split-btn':
         orig_filename = session_data.get('orig_filename')
+        logging.info(f"handle_upload: Split button clicked for {session_id}, orig_filename={orig_filename}")
         if not orig_filename:
             logging.error(f"Split button clicked but no file to split for session {session_id}")
             return html.Div("No file to split.", style={'color': 'red'}), True, get_split_results_placeholder(), session_data
             logging.error(f"Split button clicked but uploaded file not found for session {session_id}")
             return html.Div("Uploaded file not found. Please upload again.", style={'color': 'red'}), True, get_split_results_placeholder(), {}
         try:
+            logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
             with lock:
+                logging.info(f"handle_upload: Acquired lock for session {session_id}, starting split.")
                 split_files = intelligent_pdf_split(pdf_path, session_dir)
                 zip_path = make_zip_of_splits(split_files, session_dir)
+                logging.info(f"handle_upload: Split/ZIP finished for {session_id}, zip_path={zip_path}")
             session_data['split_files'] = split_files
             session_data['zip_ready'] = True
             file_info = dbc.Row([
             logging.info(f"PDF split into {len(split_files)} chunks for session {session_id}, zip ready.")
             return file_info, False, results, session_data
         except Exception as e:
+            logging.error(f"Error splitting PDF for session {session_id}: {e}")
             return html.Div(f"Error: {e}", style={'color': 'red'}), False, get_split_results_placeholder(), session_data
     # Restore after split
             split_files_list,
             html.Div(download_zip_btn, style={'marginTop': '30px'})
         ], id="split-results-inner")
+        logging.info(f"handle_upload: Restoring split results for session {session_id}, {len(split_files)} files.")
         return file_info, False, results, session_data
+    logging.info(f"handle_upload: No action taken, returning current session_data for session {session_id}")
     return "", True, get_split_results_placeholder(), session_data
 @app.server.route('/download_zip/<session_id>/<filename>')