File size: 5,943 Bytes
557bfa3
 
 
 
 
 
 
 
ba2629d
557bfa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba2629d
 
 
 
 
 
557bfa3
 
 
 
ba2629d
557bfa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba2629d
557bfa3
ba2629d
 
 
557bfa3
ba2629d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557bfa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba2629d
557bfa3
 
 
 
 
 
ba2629d
557bfa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import base64
import io
import os
import zipfile
from threading import Thread

import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output, State, callback, MATCH, ALL
from dash.exceptions import PreventUpdate
from PyPDF2 import PdfReader, PdfWriter

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Global variables
generated_file = None
progress = 0

# Layout
app.layout = dbc.Container([
    html.H1("PDF Splitter", className="my-4"),
    dcc.Upload(
        id='upload-pdf',
        children=html.Div(['Drag and Drop or ', html.A('Select PDF')]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        multiple=False
    ),
    html.Div(id='pdf-name'),
    dbc.Card([
        dbc.CardBody([
            html.Div(id='ranges-container', children=[
                dbc.Row([
                    dbc.Col(dbc.Input(id={'type': 'range-input', 'index': 0}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
                    dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': 0}, color="danger", size="sm"), width=2),
                ], className="mb-2"),
            ]),
            dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"),
        ])
    ], className="my-3"),
    dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True),
    dbc.Progress(id='progress-bar', className="my-3"),
    dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True),
    dcc.Download(id="download-zip"),
], fluid=True)

@callback(
    Output('pdf-name', 'children'),
    Output('split-button', 'disabled'),
    Input('upload-pdf', 'contents'),
    State('upload-pdf', 'filename')
)
def update_output(contents, filename):
    if contents is not None:
        return f"Selected file: {filename}", False
    return "No file selected", True

@callback(
    Output('ranges-container', 'children'),
    Input('add-range', 'n_clicks'),
    Input({'type': 'remove-range', 'index': ALL}, 'n_clicks'),
    State('ranges-container', 'children'),
    prevent_initial_call=True
)
def manage_ranges(add_clicks, remove_clicks, existing_ranges):
    ctx = dash.callback_context
    triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]

    if triggered_id == 'add-range':
        new_index = len(existing_ranges)
        new_range = dbc.Row([
            dbc.Col(dbc.Input(id={'type': 'range-input', 'index': new_index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
            dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': new_index}, color="danger", size="sm"), width=2),
        ], className="mb-2")
        existing_ranges.append(new_range)
    elif 'remove-range' in triggered_id:
        remove_index = json.loads(triggered_id)['index']
        existing_ranges = [range for range in existing_ranges if json.loads(range['props']['children'][1]['props']['children']['props']['id'])['index'] != remove_index]

    return existing_ranges

def process_pdf(contents, filename, ranges):
    global generated_file, progress
    progress = 0
    
    try:
        content_type, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        pdf = PdfReader(io.BytesIO(decoded))
        
        writers = []
        for range_str in ranges:
            start, end = map(int, range_str.split('-'))
            writer = PdfWriter()
            for i in range(start - 1, min(end, len(pdf.pages))):
                writer.add_page(pdf.pages[i])
            writers.append(writer)
        
        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
            for i, writer in enumerate(writers):
                progress = (i + 1) / len(writers) * 100
                pdf_buffer = io.BytesIO()
                writer.write(pdf_buffer)
                pdf_buffer.seek(0)
                zf.writestr(f"split_{i+1}.pdf", pdf_buffer.getvalue())
        
        zip_buffer.seek(0)
        generated_file = zip_buffer.getvalue()
        progress = 100
    except Exception as e:
        print(f"Error processing PDF: {str(e)}")
        progress = -1

@callback(
    Output('progress-bar', 'value'),
    Output('download-button', 'disabled'),
    Input('split-button', 'n_clicks'),
    State('upload-pdf', 'contents'),
    State('upload-pdf', 'filename'),
    State({'type': 'range-input', 'index': ALL}, 'value'),
    prevent_initial_call=True
)
def split_pdf(n_clicks, contents, filename, ranges):
    if not contents or not ranges:
        raise PreventUpdate
    
    ranges = [r for r in ranges if r]  # Filter out empty ranges
    thread = Thread(target=process_pdf, args=(contents, filename, ranges))
    thread.start()
    
    return 0, True

@callback(
    Output('progress-bar', 'value', allow_duplicate=True),
    Output('download-button', 'disabled', allow_duplicate=True),
    Input('progress-bar', 'value'),
    prevent_initial_call=True
)
def update_progress(value):
    global progress
    if progress == 100:
        return 100, False
    elif progress == -1:
        return 0, True
    else:
        return progress, True

@callback(
    Output("download-zip", "data"),
    Input("download-button", "n_clicks"),
    prevent_initial_call=True
)
def download_zip(n_clicks):
    global generated_file
    if generated_file is not None:
        return dcc.send_bytes(generated_file, "split_pdfs.zip")
    raise PreventUpdate

if __name__ == '__main__':
    print("Starting the Dash application...")
    app.run(debug=True, host='0.0.0.0', port=7860)
    print("Dash application has finished running.")