import base64 import io import zipfile import logging from threading import Thread import json import time import dash import dash_bootstrap_components as dbc from dash import dcc, html, Input, Output, State, callback, MATCH, ALL from dash.exceptions import PreventUpdate from PyPDF2 import PdfReader, PdfWriter # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) # Global variables generated_file = None progress = 0 is_processing = False # Layout app.layout = dbc.Container([ html.H1("PDF Splitter", className="my-4"), dcc.Upload( id='upload-pdf', children=html.Div(['Drag and Drop or ', html.A('Select PDF')]), style={ 'width': '100%', 'height': '60px', 'lineHeight': '60px', 'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px' }, multiple=False ), dbc.Spinner(html.Div(id='pdf-name'), color="primary", type="grow"), dbc.Card([ dbc.CardBody([ html.Div(id='ranges-container', children=[]), dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"), ]) ], className="my-3"), dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True), dbc.Progress(id='progress-bar', className="my-3"), html.Div([ dbc.Spinner(html.Div(), id="processing-spinner", color="primary", type="border"), html.Div(id='processing-status') ], id='processing-container', style={'display': 'none'}), dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True), dcc.Download(id="download-zip"), html.Div(id='log-output', style={'whiteSpace': 'pre-line'}), dcc.Interval(id='interval-component', interval=1000, n_intervals=0), # 1 second interval ], fluid=True) @callback( Output('pdf-name', 'children'), Output('split-button', 'disabled'), Output('ranges-container', 'children'), Input('upload-pdf', 'contents'), Input('upload-pdf', 'filename') ) def update_output(contents, filename): if contents is not None: logger.info(f"PDF uploaded: {filename}") initial_range = create_range_input(0) return html.Div(f"Uploaded: {filename}"), False, [initial_range] return "", True, [] def create_range_input(index): return dbc.Row([ dbc.Col(dbc.Input(id={'type': 'range-input', 'index': index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10), dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': index}, color="danger", size="sm"), width=2), ], className="mb-2") @callback( Output('ranges-container', 'children', allow_duplicate=True), Input('add-range', 'n_clicks'), Input({'type': 'remove-range', 'index': ALL}, 'n_clicks'), State('ranges-container', 'children'), prevent_initial_call=True ) def manage_ranges(add_clicks, remove_clicks, existing_ranges): ctx = dash.callback_context triggered_id = ctx.triggered[0]['prop_id'].split('.')[0] if triggered_id == 'add-range': new_index = len(existing_ranges) new_range = create_range_input(new_index) existing_ranges.append(new_range) elif 'remove-range' in triggered_id: remove_index = json.loads(triggered_id)['index'] existing_ranges = [range for i, range in enumerate(existing_ranges) if i != remove_index] return existing_ranges @callback( Output('processing-status', 'children'), Output('split-button', 'disabled', allow_duplicate=True), Output('download-button', 'disabled'), Output('processing-container', 'style'), Input('split-button', 'n_clicks'), State('upload-pdf', 'contents'), State('upload-pdf', 'filename'), State({'type': 'range-input', 'index': ALL}, 'value'), prevent_initial_call=True ) def split_pdf(n_clicks, contents, filename, ranges): global progress, is_processing if not contents or not ranges: logger.warning("Split PDF clicked but no content or ranges provided") raise PreventUpdate logger.info("Split PDF button clicked") ranges = [r for r in ranges if r] # Filter out empty ranges logger.info(f"Processing {len(ranges)} ranges") progress = 0 # Reset progress is_processing = True thread = Thread(target=process_pdf, args=(contents, filename, ranges)) thread.start() return "Processing started...", True, True, {'display': 'block'} def process_pdf(contents, filename, ranges): global progress, generated_file, is_processing try: # Decode PDF content content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) # Read the PDF pdf = PdfReader(io.BytesIO(decoded)) total_pages = len(pdf.pages) # Create a ZIP file in memory zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, 'w') as zf: for i, page_range in enumerate(ranges): start, end = map(int, page_range.split('-')) writer = PdfWriter() for page_num in range(start - 1, min(end, total_pages)): writer.add_page(pdf.pages[page_num]) # Save the split PDF to the ZIP file output = io.BytesIO() writer.write(output) output.seek(0) zf.writestr(f'split_{i+1}.pdf', output.getvalue()) progress = (i + 1) / len(ranges) * 100 time.sleep(0.1) # Simulate some processing time zip_buffer.seek(0) generated_file = zip_buffer.getvalue() progress = 100 except Exception as e: logger.error(f"Error processing PDF: {str(e)}") progress = -1 finally: is_processing = False @callback( Output('progress-bar', 'value'), Output('processing-status', 'children', allow_duplicate=True), Output('download-button', 'disabled', allow_duplicate=True), Output('processing-container', 'style', allow_duplicate=True), Input('interval-component', 'n_intervals'), prevent_initial_call=True ) def update_progress(n): global progress, is_processing, generated_file if is_processing: return progress, f"Processing... {progress:.0f}% complete", True, {'display': 'block'} elif progress == 100 and generated_file is not None: return 100, "PDF splitting completed. Click 'Download ZIP' to get your files.", False, {'display': 'none'} elif progress == -1: return 0, "Error occurred during PDF splitting. Please try again.", True, {'display': 'none'} else: raise PreventUpdate @callback( Output("download-zip", "data"), Input("download-button", "n_clicks"), prevent_initial_call=True ) def download_zip(n_clicks): global generated_file if generated_file is not None: logger.info("Initiating ZIP file download") return dcc.send_bytes(generated_file, "split_pdfs.zip") logger.warning("Download attempted but no file generated") raise PreventUpdate if __name__ == '__main__': logger.info("Starting the Dash application...") app.run(debug=True, host='0.0.0.0', port=7860) logger.info("Dash application has finished running.")