bluenevus commited on
Commit
be234d8
·
1 Parent(s): bb44c88

Update app.py via AI Editor

Browse files
Files changed (1) hide show
  1. app.py +25 -4
app.py CHANGED
@@ -91,6 +91,7 @@ def estimate_writer_size(writer):
91
  return f.tell()
92
 
93
  def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
 
94
  reader = PdfReader(input_path)
95
  n_pages = len(reader.pages)
96
  splits = []
@@ -107,6 +108,8 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
107
  chapter = is_chapter_header(header)
108
  split_here = False
109
 
 
 
110
  if size >= max_mb:
111
  split_here = True
112
  elif size >= min_split_mb:
@@ -115,6 +118,7 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
115
 
116
  if split_here:
117
  splits.append((last_split_at, i+1))
 
118
  last_split_at = i+1
119
  current_writer = PdfWriter()
120
  last_header = header
@@ -132,13 +136,18 @@ def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
132
  writer.write(f)
133
  size = os.path.getsize(out_path) / (1024 * 1024)
134
  split_files.append({'filename': os.path.basename(out_path), 'size': size, 'path': out_path})
 
 
135
  return split_files
136
 
137
  def make_zip_of_splits(split_files, session_dir):
138
  zip_path = os.path.join(session_dir, "split_files.zip")
 
139
  with zipfile.ZipFile(zip_path, 'w') as zipf:
140
  for file in split_files:
141
  zipf.write(file['path'], arcname=file['filename'])
 
 
142
  return zip_path
143
 
144
  external_stylesheets = [dbc.themes.BOOTSTRAP]
@@ -210,7 +219,6 @@ def persist_session_cookie():
210
  resp = flask.make_response("")
211
  resp.set_cookie('session-id', session_id, max_age=60*60*24*3)
212
  flask.g.session_id = session_id
213
- # Attach the response only if needed
214
  flask.request.session_id_set = session_id
215
  else:
216
  flask.g.session_id = session_id
@@ -221,7 +229,6 @@ def persist_session_cookie():
221
  prevent_initial_call=False
222
  )
223
  def ensure_session_id(session_id):
224
- # On first load, set session-id-store from cookie or generate new
225
  try:
226
  sid = session_id
227
  if not sid:
@@ -252,9 +259,11 @@ def ensure_session_id(session_id):
252
  )
253
  def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, session_data, session_id):
254
  trigger = ctx.triggered_id
 
255
 
256
  if not session_id:
257
  session_id = str(uuid.uuid4())
 
258
  flask.g.session_id = session_id
259
  session_dir = get_session_dir(session_id)
260
  lock = get_session_lock(session_id)
@@ -264,6 +273,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
264
 
265
  # Handle Clear Session
266
  if trigger == 'clear-session':
 
267
  clean_session(session_id)
268
  resp_data = {}
269
  logging.info(f"Session cleared for {session_id}")
@@ -287,14 +297,18 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
287
  if os.path.exists(session_dir):
288
  for file in os.listdir(session_dir):
289
  os.remove(os.path.join(session_dir, file))
 
290
  return "", True, get_split_results_placeholder(), {}
291
 
292
  # Handle Upload
293
  if trigger == 'upload-pdf':
 
294
  if not contents:
 
295
  return "", True, get_split_results_placeholder(), {}
296
 
297
  if not allowed_file(filename):
 
298
  return html.Div("Only .pdf files are allowed.", style={'color': 'red'}), True, get_split_results_placeholder(), {}
299
 
300
  try:
@@ -317,6 +331,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
317
  width=3, style={'display': 'flex', 'justifyContent': 'end'}
318
  )
319
  ], className='mb-3', align='center', style={'marginTop': "15px", 'marginBottom': '25px'})
 
320
  return file_info, False, get_split_results_placeholder(), session_data
321
  except Exception as e:
322
  logging.error(f"Error processing PDF: {e}")
@@ -331,11 +346,13 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
331
  width=3, style={'display': 'flex', 'justifyContent': 'end'}
332
  )
333
  ], className='mb-3', align='center', style={'marginTop': "15px", 'marginBottom': '25px'})
 
334
  return file_info, False, get_split_results_placeholder(), session_data
335
 
336
  # Handle Split
337
  if trigger == 'split-btn':
338
  orig_filename = session_data.get('orig_filename')
 
339
  if not orig_filename:
340
  logging.error(f"Split button clicked but no file to split for session {session_id}")
341
  return html.Div("No file to split.", style={'color': 'red'}), True, get_split_results_placeholder(), session_data
@@ -344,10 +361,12 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
344
  logging.error(f"Split button clicked but uploaded file not found for session {session_id}")
345
  return html.Div("Uploaded file not found. Please upload again.", style={'color': 'red'}), True, get_split_results_placeholder(), {}
346
  try:
347
- logging.info(f"Splitting PDF for session {session_id} on user request. File: {pdf_path}")
348
  with lock:
 
349
  split_files = intelligent_pdf_split(pdf_path, session_dir)
350
  zip_path = make_zip_of_splits(split_files, session_dir)
 
351
  session_data['split_files'] = split_files
352
  session_data['zip_ready'] = True
353
  file_info = dbc.Row([
@@ -374,7 +393,7 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
374
  logging.info(f"PDF split into {len(split_files)} chunks for session {session_id}, zip ready.")
375
  return file_info, False, results, session_data
376
  except Exception as e:
377
- logging.error(f"Error splitting PDF: {e}")
378
  return html.Div(f"Error: {e}", style={'color': 'red'}), False, get_split_results_placeholder(), session_data
379
 
380
  # Restore after split
@@ -402,8 +421,10 @@ def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, se
402
  split_files_list,
403
  html.Div(download_zip_btn, style={'marginTop': '30px'})
404
  ], id="split-results-inner")
 
405
  return file_info, False, results, session_data
406
 
 
407
  return "", True, get_split_results_placeholder(), session_data
408
 
409
  @app.server.route('/download_zip/<session_id>/<filename>')
 
91
  return f.tell()
92
 
93
  def intelligent_pdf_split(input_path, session_dir, max_mb=5, min_split_mb=4):
94
+ logging.info(f"intelligent_pdf_split: Starting split for {input_path} in {session_dir}")
95
  reader = PdfReader(input_path)
96
  n_pages = len(reader.pages)
97
  splits = []
 
108
  chapter = is_chapter_header(header)
109
  split_here = False
110
 
111
+ logging.debug(f"Page {i}: size={size:.2f}MB, header='{header}', blank={blank}, chapter={chapter}")
112
+
113
  if size >= max_mb:
114
  split_here = True
115
  elif size >= min_split_mb:
 
118
 
119
  if split_here:
120
  splits.append((last_split_at, i+1))
121
+ logging.info(f"Splitting at pages {last_split_at}-{i+1} (size ~{size:.2f}MB)")
122
  last_split_at = i+1
123
  current_writer = PdfWriter()
124
  last_header = header
 
136
  writer.write(f)
137
  size = os.path.getsize(out_path) / (1024 * 1024)
138
  split_files.append({'filename': os.path.basename(out_path), 'size': size, 'path': out_path})
139
+ logging.info(f"Saved split file {out_path} ({size:.2f} MB) for pages {start}-{end-1}")
140
+ logging.info(f"intelligent_pdf_split: Finished. Total {len(split_files)} files created.")
141
  return split_files
142
 
143
  def make_zip_of_splits(split_files, session_dir):
144
  zip_path = os.path.join(session_dir, "split_files.zip")
145
+ logging.info(f"Creating ZIP at {zip_path} with {len(split_files)} files.")
146
  with zipfile.ZipFile(zip_path, 'w') as zipf:
147
  for file in split_files:
148
  zipf.write(file['path'], arcname=file['filename'])
149
+ logging.info(f"Added {file['filename']} to ZIP.")
150
+ logging.info(f"ZIP created at {zip_path}")
151
  return zip_path
152
 
153
  external_stylesheets = [dbc.themes.BOOTSTRAP]
 
219
  resp = flask.make_response("")
220
  resp.set_cookie('session-id', session_id, max_age=60*60*24*3)
221
  flask.g.session_id = session_id
 
222
  flask.request.session_id_set = session_id
223
  else:
224
  flask.g.session_id = session_id
 
229
  prevent_initial_call=False
230
  )
231
  def ensure_session_id(session_id):
 
232
  try:
233
  sid = session_id
234
  if not sid:
 
259
  )
260
  def handle_upload(contents, filename, clear_n, delete_upload_n_list, split_n, session_data, session_id):
261
  trigger = ctx.triggered_id
262
+ logging.info(f"handle_upload: Triggered by {trigger}, session_id={session_id}")
263
 
264
  if not session_id:
265
  session_id = str(uuid.uuid4())
266
+ logging.info(f"handle_upload: Generated new session_id {session_id}")
267
  flask.g.session_id = session_id
268
  session_dir = get_session_dir(session_id)
269
  lock = get_session_lock(session_id)
 
273
 
274
  # Handle Clear Session
275
  if trigger == 'clear-session':
276
+ logging.info(f"handle_upload: Clear session button pressed for {session_id}")
277
  clean_session(session_id)
278
  resp_data = {}
279
  logging.info(f"Session cleared for {session_id}")
 
297
  if os.path.exists(session_dir):
298
  for file in os.listdir(session_dir):
299
  os.remove(os.path.join(session_dir, file))
300
+ logging.info(f"Session files deleted for {session_id}")
301
  return "", True, get_split_results_placeholder(), {}
302
 
303
  # Handle Upload
304
  if trigger == 'upload-pdf':
305
+ logging.info(f"handle_upload: Upload triggered for filename={filename}, session_id={session_id}")
306
  if not contents:
307
+ logging.warning("No contents received in upload.")
308
  return "", True, get_split_results_placeholder(), {}
309
 
310
  if not allowed_file(filename):
311
+ logging.warning(f"Disallowed file attempted upload: {filename}")
312
  return html.Div("Only .pdf files are allowed.", style={'color': 'red'}), True, get_split_results_placeholder(), {}
313
 
314
  try:
 
331
  width=3, style={'display': 'flex', 'justifyContent': 'end'}
332
  )
333
  ], className='mb-3', align='center', style={'marginTop': "15px", 'marginBottom': '25px'})
334
+ logging.info(f"handle_upload: File info UI updated, split button enabled.")
335
  return file_info, False, get_split_results_placeholder(), session_data
336
  except Exception as e:
337
  logging.error(f"Error processing PDF: {e}")
 
346
  width=3, style={'display': 'flex', 'justifyContent': 'end'}
347
  )
348
  ], className='mb-3', align='center', style={'marginTop': "15px", 'marginBottom': '25px'})
349
+ logging.info(f"handle_upload: Restoring view after upload, split button enabled.")
350
  return file_info, False, get_split_results_placeholder(), session_data
351
 
352
  # Handle Split
353
  if trigger == 'split-btn':
354
  orig_filename = session_data.get('orig_filename')
355
+ logging.info(f"handle_upload: Split button clicked for {session_id}, orig_filename={orig_filename}")
356
  if not orig_filename:
357
  logging.error(f"Split button clicked but no file to split for session {session_id}")
358
  return html.Div("No file to split.", style={'color': 'red'}), True, get_split_results_placeholder(), session_data
 
361
  logging.error(f"Split button clicked but uploaded file not found for session {session_id}")
362
  return html.Div("Uploaded file not found. Please upload again.", style={'color': 'red'}), True, get_split_results_placeholder(), {}
363
  try:
364
+ logging.info(f"Splitting PDF for session {session_id}. File: {pdf_path}")
365
  with lock:
366
+ logging.info(f"handle_upload: Acquired lock for session {session_id}, starting split.")
367
  split_files = intelligent_pdf_split(pdf_path, session_dir)
368
  zip_path = make_zip_of_splits(split_files, session_dir)
369
+ logging.info(f"handle_upload: Split/ZIP finished for {session_id}, zip_path={zip_path}")
370
  session_data['split_files'] = split_files
371
  session_data['zip_ready'] = True
372
  file_info = dbc.Row([
 
393
  logging.info(f"PDF split into {len(split_files)} chunks for session {session_id}, zip ready.")
394
  return file_info, False, results, session_data
395
  except Exception as e:
396
+ logging.error(f"Error splitting PDF for session {session_id}: {e}")
397
  return html.Div(f"Error: {e}", style={'color': 'red'}), False, get_split_results_placeholder(), session_data
398
 
399
  # Restore after split
 
421
  split_files_list,
422
  html.Div(download_zip_btn, style={'marginTop': '30px'})
423
  ], id="split-results-inner")
424
+ logging.info(f"handle_upload: Restoring split results for session {session_id}, {len(split_files)} files.")
425
  return file_info, False, results, session_data
426
 
427
+ logging.info(f"handle_upload: No action taken, returning current session_data for session {session_id}")
428
  return "", True, get_split_results_placeholder(), session_data
429
 
430
  @app.server.route('/download_zip/<session_id>/<filename>')