Spaces:
Paused
Paused
File size: 5,943 Bytes
557bfa3 ba2629d 557bfa3 ba2629d 557bfa3 ba2629d 557bfa3 ba2629d 557bfa3 ba2629d 557bfa3 ba2629d 557bfa3 ba2629d 557bfa3 ba2629d 557bfa3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import base64
import io
import os
import zipfile
from threading import Thread
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output, State, callback, MATCH, ALL
from dash.exceptions import PreventUpdate
from PyPDF2 import PdfReader, PdfWriter
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Global variables
generated_file = None
progress = 0
# Layout
app.layout = dbc.Container([
html.H1("PDF Splitter", className="my-4"),
dcc.Upload(
id='upload-pdf',
children=html.Div(['Drag and Drop or ', html.A('Select PDF')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
html.Div(id='pdf-name'),
dbc.Card([
dbc.CardBody([
html.Div(id='ranges-container', children=[
dbc.Row([
dbc.Col(dbc.Input(id={'type': 'range-input', 'index': 0}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': 0}, color="danger", size="sm"), width=2),
], className="mb-2"),
]),
dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"),
])
], className="my-3"),
dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True),
dbc.Progress(id='progress-bar', className="my-3"),
dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True),
dcc.Download(id="download-zip"),
], fluid=True)
@callback(
Output('pdf-name', 'children'),
Output('split-button', 'disabled'),
Input('upload-pdf', 'contents'),
State('upload-pdf', 'filename')
)
def update_output(contents, filename):
if contents is not None:
return f"Selected file: {filename}", False
return "No file selected", True
@callback(
Output('ranges-container', 'children'),
Input('add-range', 'n_clicks'),
Input({'type': 'remove-range', 'index': ALL}, 'n_clicks'),
State('ranges-container', 'children'),
prevent_initial_call=True
)
def manage_ranges(add_clicks, remove_clicks, existing_ranges):
ctx = dash.callback_context
triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]
if triggered_id == 'add-range':
new_index = len(existing_ranges)
new_range = dbc.Row([
dbc.Col(dbc.Input(id={'type': 'range-input', 'index': new_index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': new_index}, color="danger", size="sm"), width=2),
], className="mb-2")
existing_ranges.append(new_range)
elif 'remove-range' in triggered_id:
remove_index = json.loads(triggered_id)['index']
existing_ranges = [range for range in existing_ranges if json.loads(range['props']['children'][1]['props']['children']['props']['id'])['index'] != remove_index]
return existing_ranges
def process_pdf(contents, filename, ranges):
global generated_file, progress
progress = 0
try:
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
pdf = PdfReader(io.BytesIO(decoded))
writers = []
for range_str in ranges:
start, end = map(int, range_str.split('-'))
writer = PdfWriter()
for i in range(start - 1, min(end, len(pdf.pages))):
writer.add_page(pdf.pages[i])
writers.append(writer)
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
for i, writer in enumerate(writers):
progress = (i + 1) / len(writers) * 100
pdf_buffer = io.BytesIO()
writer.write(pdf_buffer)
pdf_buffer.seek(0)
zf.writestr(f"split_{i+1}.pdf", pdf_buffer.getvalue())
zip_buffer.seek(0)
generated_file = zip_buffer.getvalue()
progress = 100
except Exception as e:
print(f"Error processing PDF: {str(e)}")
progress = -1
@callback(
Output('progress-bar', 'value'),
Output('download-button', 'disabled'),
Input('split-button', 'n_clicks'),
State('upload-pdf', 'contents'),
State('upload-pdf', 'filename'),
State({'type': 'range-input', 'index': ALL}, 'value'),
prevent_initial_call=True
)
def split_pdf(n_clicks, contents, filename, ranges):
if not contents or not ranges:
raise PreventUpdate
ranges = [r for r in ranges if r] # Filter out empty ranges
thread = Thread(target=process_pdf, args=(contents, filename, ranges))
thread.start()
return 0, True
@callback(
Output('progress-bar', 'value', allow_duplicate=True),
Output('download-button', 'disabled', allow_duplicate=True),
Input('progress-bar', 'value'),
prevent_initial_call=True
)
def update_progress(value):
global progress
if progress == 100:
return 100, False
elif progress == -1:
return 0, True
else:
return progress, True
@callback(
Output("download-zip", "data"),
Input("download-button", "n_clicks"),
prevent_initial_call=True
)
def download_zip(n_clicks):
global generated_file
if generated_file is not None:
return dcc.send_bytes(generated_file, "split_pdfs.zip")
raise PreventUpdate
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
print("Dash application has finished running.") |