File size: 7,633 Bytes
557bfa3
 
 
57c7f35
557bfa3
02dbd73
b2ab0f1
557bfa3
 
 
ba2629d
557bfa3
f0a8607
557bfa3
 
57c7f35
 
 
 
557bfa3
 
 
 
 
b2ab0f1
557bfa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1eb1557
557bfa3
 
1eb1557
557bfa3
 
 
 
ba2629d
2e88003
83c24f4
b2ab0f1
83c24f4
b2ab0f1
557bfa3
f0a8607
b2ab0f1
557bfa3
 
5b28261
b69229a
5b28261
1eb1557
b69229a
 
5b28261
b69229a
 
 
9be275e
1eb1557
 
b69229a
9be275e
 
 
 
 
 
b69229a
bdfc164
b69229a
aed9663
b69229a
 
 
aed9663
 
 
 
 
b69229a
9be275e
b69229a
aed9663
 
 
b69229a
aed9663
557bfa3
 
b2ab0f1
 
 
83c24f4
557bfa3
 
 
ba2629d
557bfa3
 
 
b2ab0f1
557bfa3
57c7f35
557bfa3
 
57c7f35
ba2629d
57c7f35
 
5b28261
b2ab0f1
5b28261
557bfa3
 
 
83c24f4
557bfa3
5b28261
b2ab0f1
5b28261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2ab0f1
5b28261
 
 
 
 
 
 
b2ab0f1
 
5b28261
557bfa3
b2ab0f1
 
 
83c24f4
b2ab0f1
557bfa3
 
b2ab0f1
 
 
83c24f4
b2ab0f1
83c24f4
557bfa3
83c24f4
557bfa3
b2ab0f1
557bfa3
 
 
 
 
 
 
 
 
57c7f35
557bfa3
57c7f35
557bfa3
 
 
57c7f35
557bfa3
57c7f35
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import base64
import io
import zipfile
import logging
from threading import Thread
import json
import time

import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output, State, callback, MATCH, ALL
from dash.exceptions import PreventUpdate

from PyPDF2 import PdfReader, PdfWriter

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Global variables
generated_file = None
progress = 0
is_processing = False

# Layout
app.layout = dbc.Container([
    html.H1("PDF Splitter", className="my-4"),
    dcc.Upload(
        id='upload-pdf',
        children=html.Div(['Drag and Drop or ', html.A('Select PDF')]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        multiple=False
    ),
    dbc.Spinner(html.Div(id='pdf-name'), color="primary", type="grow"),
    dbc.Card([
        dbc.CardBody([
            html.Div(id='ranges-container', children=[]),
            dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"),
        ])
    ], className="my-3"),
    dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True),
    dbc.Progress(id='progress-bar', className="my-3"),
    html.Div([
        dbc.Spinner(html.Div(), id="processing-spinner", color="primary", type="border"),
        html.Div(id='processing-status')
    ], id='processing-container', style={'display': 'none'}),
    dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True),
    dcc.Download(id="download-zip"),
    html.Div(id='log-output', style={'whiteSpace': 'pre-line'}),
    dcc.Interval(id='interval-component', interval=1000, n_intervals=0),  # 1 second interval
], fluid=True)

@callback(
    Output('pdf-name', 'children'),
    Output('split-button', 'disabled'),
    Output('ranges-container', 'children'),
    Input('upload-pdf', 'contents'),
    Input('upload-pdf', 'filename')
)
def update_output(contents, filename):
    if contents is not None:
        logger.info(f"PDF uploaded: {filename}")
        initial_range = create_range_input(0)
        return html.Div(f"Uploaded: {filename}"), False, [initial_range]
    return "", True, []

def create_range_input(index):
    return dbc.Row([
        dbc.Col(dbc.Input(id={'type': 'range-input', 'index': index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
        dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': index}, color="danger", size="sm"), width=2),
    ], className="mb-2")

@callback(
    Output('ranges-container', 'children', allow_duplicate=True),
    Input('add-range', 'n_clicks'),
    Input({'type': 'remove-range', 'index': ALL}, 'n_clicks'),
    State('ranges-container', 'children'),
    prevent_initial_call=True
)
def manage_ranges(add_clicks, remove_clicks, existing_ranges):
    ctx = dash.callback_context
    triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]

    if triggered_id == 'add-range':
        new_index = len(existing_ranges)
        new_range = create_range_input(new_index)
        existing_ranges.append(new_range)
    elif 'remove-range' in triggered_id:
        remove_index = json.loads(triggered_id)['index']
        existing_ranges = [range for i, range in enumerate(existing_ranges) if i != remove_index]

    return existing_ranges

@callback(
    Output('processing-status', 'children'),
    Output('split-button', 'disabled', allow_duplicate=True),
    Output('download-button', 'disabled'),
    Output('processing-container', 'style'),
    Input('split-button', 'n_clicks'),
    State('upload-pdf', 'contents'),
    State('upload-pdf', 'filename'),
    State({'type': 'range-input', 'index': ALL}, 'value'),
    prevent_initial_call=True
)
def split_pdf(n_clicks, contents, filename, ranges):
    global progress, is_processing
    if not contents or not ranges:
        logger.warning("Split PDF clicked but no content or ranges provided")
        raise PreventUpdate
    
    logger.info("Split PDF button clicked")
    ranges = [r for r in ranges if r]  # Filter out empty ranges
    logger.info(f"Processing {len(ranges)} ranges")
    
    progress = 0  # Reset progress
    is_processing = True
    
    thread = Thread(target=process_pdf, args=(contents, filename, ranges))
    thread.start()
    
    return "Processing started...", True, True, {'display': 'block'}

def process_pdf(contents, filename, ranges):
    global progress, generated_file, is_processing
    try:
        # Decode PDF content
        content_type, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        
        # Read the PDF
        pdf = PdfReader(io.BytesIO(decoded))
        total_pages = len(pdf.pages)
        
        # Create a ZIP file in memory
        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(zip_buffer, 'w') as zf:
            for i, page_range in enumerate(ranges):
                start, end = map(int, page_range.split('-'))
                writer = PdfWriter()
                
                for page_num in range(start - 1, min(end, total_pages)):
                    writer.add_page(pdf.pages[page_num])
                
                # Save the split PDF to the ZIP file
                output = io.BytesIO()
                writer.write(output)
                output.seek(0)
                zf.writestr(f'split_{i+1}.pdf', output.getvalue())
                
                progress = (i + 1) / len(ranges) * 100
                time.sleep(0.1)  # Simulate some processing time
        
        zip_buffer.seek(0)
        generated_file = zip_buffer.getvalue()
        progress = 100
    except Exception as e:
        logger.error(f"Error processing PDF: {str(e)}")
        progress = -1
    finally:
        is_processing = False

@callback(
    Output('progress-bar', 'value'),
    Output('processing-status', 'children', allow_duplicate=True),
    Output('download-button', 'disabled', allow_duplicate=True),
    Output('processing-container', 'style', allow_duplicate=True),
    Input('interval-component', 'n_intervals'),
    prevent_initial_call=True
)
def update_progress(n):
    global progress, is_processing, generated_file
    if is_processing:
        return progress, f"Processing... {progress:.0f}% complete", True, {'display': 'block'}
    elif progress == 100 and generated_file is not None:
        return 100, "PDF splitting completed. Click 'Download ZIP' to get your files.", False, {'display': 'none'}
    elif progress == -1:
        return 0, "Error occurred during PDF splitting. Please try again.", True, {'display': 'none'}
    else:
        raise PreventUpdate

@callback(
    Output("download-zip", "data"),
    Input("download-button", "n_clicks"),
    prevent_initial_call=True
)
def download_zip(n_clicks):
    global generated_file
    if generated_file is not None:
        logger.info("Initiating ZIP file download")
        return dcc.send_bytes(generated_file, "split_pdfs.zip")
    logger.warning("Download attempted but no file generated")
    raise PreventUpdate

if __name__ == '__main__':
    logger.info("Starting the Dash application...")
    app.run(debug=True, host='0.0.0.0', port=7860)
    logger.info("Dash application has finished running.")