import base64 import io import os import zipfile import logging from threading import Thread import json import dash import dash_bootstrap_components as dbc from dash import dcc, html, Input, Output, State, callback, MATCH, ALL from dash.exceptions import PreventUpdate from PyPDF2 import PdfReader, PdfWriter # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) # Global variables generated_file = None progress = 0 # Layout app.layout = dbc.Container([ html.H1("PDF Splitter", className="my-4"), dcc.Upload( id='upload-pdf', children=html.Div(['Drag and Drop or ', html.A('Select PDF')]), style={ 'width': '100%', 'height': '60px', 'lineHeight': '60px', 'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px' }, multiple=False ), html.Div(id='pdf-name'), dbc.Card([ dbc.CardBody([ html.Div(id='ranges-container', children=[ dbc.Row([ dbc.Col(dbc.Input(id={'type': 'range-input', 'index': 0}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10), dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': 0}, color="danger", size="sm"), width=2), ], className="mb-2"), ]), dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"), ]) ], className="my-3"), dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True), dbc.Progress(id='progress-bar', className="my-3"), dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3"), dcc.Download(id="download-zip"), html.Div(id='log-output', style={'whiteSpace': 'pre-line'}), ], fluid=True) @callback( Output('pdf-name', 'children'), Output('split-button', 'disabled'), Input('upload-pdf', 'contents'), Input('upload-pdf', 'filename') ) def update_output(contents, filename): if contents is not None: logger.info(f"PDF uploaded: {filename}") return html.Div(f"Uploaded: {filename}"), False return "", True @callback( Output('ranges-container', 'children'), Input('add-range', 'n_clicks'), State('ranges-container', 'children'), prevent_initial_call=True ) def add_range(n_clicks, existing_ranges): if n_clicks: new_index = len(existing_ranges) new_range = dbc.Row([ dbc.Col(dbc.Input(id={'type': 'range-input', 'index': new_index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10), dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': new_index}, color="danger", size="sm"), width=2), ], className="mb-2") existing_ranges.append(new_range) return existing_ranges @callback( Output('ranges-container', 'children', allow_duplicate=True), Input({'type': 'remove-range', 'index': ALL}, 'n_clicks'), State('ranges-container', 'children'), prevent_initial_call=True ) def remove_range(n_clicks, existing_ranges): ctx = dash.callback_context if not ctx.triggered: raise PreventUpdate button_id = ctx.triggered[0]['prop_id'].split('.')[0] index_to_remove = json.loads(button_id)['index'] return [range for i, range in enumerate(existing_ranges) if i != index_to_remove] @callback( Output('progress-bar', 'value'), Output('log-output', 'children'), Input('split-button', 'n_clicks'), State('upload-pdf', 'contents'), State('upload-pdf', 'filename'), State({'type': 'range-input', 'index': ALL}, 'value'), prevent_initial_call=True ) def split_pdf(n_clicks, contents, filename, ranges): global progress if not contents or not ranges: logger.warning("Split PDF clicked but no content or ranges provided") raise PreventUpdate logger.info("Split PDF button clicked") ranges = [r for r in ranges if r] # Filter out empty ranges logger.info(f"Processing {len(ranges)} ranges") progress = 0 # Reset progress thread = Thread(target=process_pdf, args=(contents, filename, ranges)) thread.start() return 0, "PDF splitting process started. Check console for detailed logs." def process_pdf(contents, filename, ranges): global progress, generated_file try: # Decode PDF content content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) # Read the PDF pdf = PdfReader(io.BytesIO(decoded)) total_pages = len(pdf.pages) # Create a ZIP file in memory zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, 'w') as zf: for i, page_range in enumerate(ranges): start, end = map(int, page_range.split('-')) writer = PdfWriter() for page_num in range(start - 1, min(end, total_pages)): writer.add_page(pdf.pages[page_num]) # Save the split PDF to the ZIP file output = io.BytesIO() writer.write(output) output.seek(0) zf.writestr(f'split_{i+1}.pdf', output.getvalue()) progress = (i + 1) / len(ranges) * 100 zip_buffer.seek(0) generated_file = zip_buffer.getvalue() progress = 100 except Exception as e: logger.error(f"Error processing PDF: {str(e)}") progress = -1 @callback( Output('progress-bar', 'value', allow_duplicate=True), Output('log-output', 'children', allow_duplicate=True), Input('progress-bar', 'value'), prevent_initial_call=True ) def update_progress(value): global progress if progress == 100: logger.info("PDF splitting completed") return 100, "PDF splitting completed. Click 'Download ZIP' to get your files." elif progress == -1: logger.error("PDF splitting failed") return 0, "Error occurred during PDF splitting. Check console for details." else: return progress, f"Processing... {progress:.0f}% complete" @callback( Output("download-zip", "data"), Input("download-button", "n_clicks"), prevent_initial_call=True ) def download_zip(n_clicks): global generated_file if generated_file is not None: logger.info("Initiating ZIP file download") return dcc.send_bytes(generated_file, "split_pdfs.zip") logger.warning("Download attempted but no file generated") raise PreventUpdate if __name__ == '__main__': logger.info("Starting the Dash application...") app.run(debug=True, host='0.0.0.0', port=7860) logger.info("Dash application has finished running.")