File size: 7,072 Bytes
557bfa3
 
 
 
57c7f35
557bfa3
02dbd73
557bfa3
 
 
ba2629d
557bfa3
f0a8607
557bfa3
 
57c7f35
 
 
 
557bfa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba2629d
 
 
 
 
 
557bfa3
 
 
 
ba2629d
02dbd73
557bfa3
f0a8607
557bfa3
 
5b28261
b69229a
5b28261
b69229a
 
5b28261
b69229a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557bfa3
 
 
57c7f35
557bfa3
 
 
ba2629d
557bfa3
 
 
5b28261
557bfa3
57c7f35
557bfa3
 
57c7f35
ba2629d
57c7f35
 
5b28261
 
557bfa3
 
 
02dbd73
557bfa3
5b28261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557bfa3
 
57c7f35
557bfa3
 
 
 
 
 
57c7f35
02dbd73
557bfa3
57c7f35
02dbd73
557bfa3
02dbd73
557bfa3
 
 
 
 
 
 
 
 
57c7f35
557bfa3
57c7f35
557bfa3
 
 
57c7f35
557bfa3
57c7f35
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import base64
import io
import os
import zipfile
import logging
from threading import Thread
import json

import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output, State, callback, MATCH, ALL
from dash.exceptions import PreventUpdate

from PyPDF2 import PdfReader, PdfWriter

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Global variables
generated_file = None
progress = 0

# Layout
app.layout = dbc.Container([
    html.H1("PDF Splitter", className="my-4"),
    dcc.Upload(
        id='upload-pdf',
        children=html.Div(['Drag and Drop or ', html.A('Select PDF')]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        multiple=False
    ),
    html.Div(id='pdf-name'),
    dbc.Card([
        dbc.CardBody([
            html.Div(id='ranges-container', children=[
                dbc.Row([
                    dbc.Col(dbc.Input(id={'type': 'range-input', 'index': 0}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
                    dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': 0}, color="danger", size="sm"), width=2),
                ], className="mb-2"),
            ]),
            dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"),
        ])
    ], className="my-3"),
    dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True),
    dbc.Progress(id='progress-bar', className="my-3"),
    dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3"),
    dcc.Download(id="download-zip"),
    html.Div(id='log-output', style={'whiteSpace': 'pre-line'}),
], fluid=True)

@callback(
    Output('pdf-name', 'children'),
    Output('split-button', 'disabled'),
    Input('upload-pdf', 'contents'),
    Input('upload-pdf', 'filename')
)
def update_output(contents, filename):
    if contents is not None:
        logger.info(f"PDF uploaded: {filename}")
        return html.Div(f"Uploaded: {filename}"), False
    return "", True

@callback(
    Output('ranges-container', 'children'),
    Input('add-range', 'n_clicks'),
    State('ranges-container', 'children'),
    prevent_initial_call=True
)
def add_range(n_clicks, existing_ranges):
    if n_clicks:
        new_index = len(existing_ranges)
        new_range = dbc.Row([
            dbc.Col(dbc.Input(id={'type': 'range-input', 'index': new_index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
            dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': new_index}, color="danger", size="sm"), width=2),
        ], className="mb-2")
        existing_ranges.append(new_range)
    return existing_ranges

@callback(
    Output('ranges-container', 'children', allow_duplicate=True),
    Input({'type': 'remove-range', 'index': ALL}, 'n_clicks'),
    State('ranges-container', 'children'),
    prevent_initial_call=True
)
def remove_range(n_clicks, existing_ranges):
    ctx = dash.callback_context
    if not ctx.triggered:
        raise PreventUpdate
    button_id = ctx.triggered[0]['prop_id'].split('.')[0]
    index_to_remove = json.loads(button_id)['index']
    return [range for i, range in enumerate(existing_ranges) if i != index_to_remove]

@callback(
    Output('progress-bar', 'value'),
    Output('log-output', 'children'),
    Input('split-button', 'n_clicks'),
    State('upload-pdf', 'contents'),
    State('upload-pdf', 'filename'),
    State({'type': 'range-input', 'index': ALL}, 'value'),
    prevent_initial_call=True
)
def split_pdf(n_clicks, contents, filename, ranges):
    global progress
    if not contents or not ranges:
        logger.warning("Split PDF clicked but no content or ranges provided")
        raise PreventUpdate
    
    logger.info("Split PDF button clicked")
    ranges = [r for r in ranges if r]  # Filter out empty ranges
    logger.info(f"Processing {len(ranges)} ranges")
    
    progress = 0  # Reset progress
    
    thread = Thread(target=process_pdf, args=(contents, filename, ranges))
    thread.start()
    
    return 0, "PDF splitting process started. Check console for detailed logs."

def process_pdf(contents, filename, ranges):
    global progress, generated_file
    try:
        # Decode PDF content
        content_type, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        
        # Read the PDF
        pdf = PdfReader(io.BytesIO(decoded))
        total_pages = len(pdf.pages)
        
        # Create a ZIP file in memory
        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(zip_buffer, 'w') as zf:
            for i, page_range in enumerate(ranges):
                start, end = map(int, page_range.split('-'))
                writer = PdfWriter()
                
                for page_num in range(start - 1, min(end, total_pages)):
                    writer.add_page(pdf.pages[page_num])
                
                # Save the split PDF to the ZIP file
                output = io.BytesIO()
                writer.write(output)
                output.seek(0)
                zf.writestr(f'split_{i+1}.pdf', output.getvalue())
                
                progress = (i + 1) / len(ranges) * 100
        
        zip_buffer.seek(0)
        generated_file = zip_buffer.getvalue()
        progress = 100
    except Exception as e:
        logger.error(f"Error processing PDF: {str(e)}")
        progress = -1

@callback(
    Output('progress-bar', 'value', allow_duplicate=True),
    Output('log-output', 'children', allow_duplicate=True),
    Input('progress-bar', 'value'),
    prevent_initial_call=True
)
def update_progress(value):
    global progress
    if progress == 100:
        logger.info("PDF splitting completed")
        return 100, "PDF splitting completed. Click 'Download ZIP' to get your files."
    elif progress == -1:
        logger.error("PDF splitting failed")
        return 0, "Error occurred during PDF splitting. Check console for details."
    else:
        return progress, f"Processing... {progress:.0f}% complete"

@callback(
    Output("download-zip", "data"),
    Input("download-button", "n_clicks"),
    prevent_initial_call=True
)
def download_zip(n_clicks):
    global generated_file
    if generated_file is not None:
        logger.info("Initiating ZIP file download")
        return dcc.send_bytes(generated_file, "split_pdfs.zip")
    logger.warning("Download attempted but no file generated")
    raise PreventUpdate

if __name__ == '__main__':
    logger.info("Starting the Dash application...")
    app.run(debug=True, host='0.0.0.0', port=7860)
    logger.info("Dash application has finished running.")