File size: 4,920 Bytes
557bfa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import base64
import io
import os
import zipfile
from threading import Thread

import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output, State, callback
from dash.exceptions import PreventUpdate
from PyPDF2 import PdfReader, PdfWriter

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Global variables
generated_file = None
progress = 0

# Layout
app.layout = dbc.Container([
    html.H1("PDF Splitter", className="my-4"),
    dcc.Upload(
        id='upload-pdf',
        children=html.Div(['Drag and Drop or ', html.A('Select PDF')]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        multiple=False
    ),
    html.Div(id='pdf-name'),
    dbc.Card([
        dbc.CardBody([
            dbc.Input(id='page-range', type='text', placeholder='Enter page range (e.g., 1-3)'),
            dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"),
            html.Div(id='ranges-list'),
        ])
    ], className="my-3"),
    dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True),
    dcc.Progress(id='progress-bar', className="my-3"),
    dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True),
    dcc.Download(id="download-zip"),
], fluid=True)

@callback(
    Output('pdf-name', 'children'),
    Output('split-button', 'disabled'),
    Input('upload-pdf', 'contents'),
    State('upload-pdf', 'filename')
)
def update_output(contents, filename):
    if contents is not None:
        return f"Selected file: {filename}", False
    return "No file selected", True

@callback(
    Output('ranges-list', 'children'),
    Input('add-range', 'n_clicks'),
    State('page-range', 'value'),
    State('ranges-list', 'children')
)
def add_range(n_clicks, new_range, existing_ranges):
    if n_clicks is None or not new_range:
        return existing_ranges or []
    new_item = html.Div(f"Range: {new_range}")
    return existing_ranges + [new_item] if existing_ranges else [new_item]

def process_pdf(contents, filename, ranges):
    global generated_file, progress
    progress = 0
    
    try:
        content_type, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        pdf = PdfReader(io.BytesIO(decoded))
        
        writers = []
        for range_str in ranges:
            start, end = map(int, range_str.split('-'))
            writer = PdfWriter()
            for i in range(start - 1, min(end, len(pdf.pages))):
                writer.add_page(pdf.pages[i])
            writers.append(writer)
        
        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
            for i, writer in enumerate(writers):
                progress = (i + 1) / len(writers) * 100
                pdf_buffer = io.BytesIO()
                writer.write(pdf_buffer)
                pdf_buffer.seek(0)
                zf.writestr(f"split_{i+1}.pdf", pdf_buffer.getvalue())
        
        zip_buffer.seek(0)
        generated_file = zip_buffer.getvalue()
        progress = 100
    except Exception as e:
        print(f"Error processing PDF: {str(e)}")
        progress = -1

@callback(
    Output('progress-bar', 'value'),
    Output('download-button', 'disabled'),
    Input('split-button', 'n_clicks'),
    State('upload-pdf', 'contents'),
    State('upload-pdf', 'filename'),
    State('ranges-list', 'children'),
    prevent_initial_call=True
)
def split_pdf(n_clicks, contents, filename, ranges):
    if not contents or not ranges:
        raise PreventUpdate
    
    ranges = [r['props']['children'].split(': ')[1] for r in ranges]
    thread = Thread(target=process_pdf, args=(contents, filename, ranges))
    thread.start()
    
    return 0, True

@callback(
    Output('progress-bar', 'value', allow_duplicate=True),
    Output('download-button', 'disabled', allow_duplicate=True),
    Input('progress-bar', 'value'),
    prevent_initial_call=True
)
def update_progress(value):
    global progress
    if progress == 100:
        return 100, False
    elif progress == -1:
        return 0, True
    else:
        return progress, True

@callback(
    Output("download-zip", "data"),
    Input("download-button", "n_clicks"),
    prevent_initial_call=True
)
def download_zip(n_clicks):
    global generated_file
    if generated_file is not None:
        return dcc.send_bytes(generated_file, "split_pdfs.zip")
    raise PreventUpdate

if __name__ == '__main__':
    print("Starting the Dash application...")
    app.run(debug=True, host='0.0.0.0', port=7860)
    print("Dash application has finished running.")