Spaces:
Paused
Paused
import base64 | |
import io | |
import os | |
import zipfile | |
from threading import Thread | |
import dash | |
import dash_bootstrap_components as dbc | |
from dash import dcc, html, Input, Output, State, callback, MATCH, ALL | |
from dash.exceptions import PreventUpdate | |
from PyPDF2 import PdfReader, PdfWriter | |
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
# Global variables | |
generated_file = None | |
progress = 0 | |
# Layout | |
app.layout = dbc.Container([ | |
html.H1("PDF Splitter", className="my-4"), | |
dcc.Upload( | |
id='upload-pdf', | |
children=html.Div(['Drag and Drop or ', html.A('Select PDF')]), | |
style={ | |
'width': '100%', | |
'height': '60px', | |
'lineHeight': '60px', | |
'borderWidth': '1px', | |
'borderStyle': 'dashed', | |
'borderRadius': '5px', | |
'textAlign': 'center', | |
'margin': '10px' | |
}, | |
multiple=False | |
), | |
html.Div(id='pdf-name'), | |
dbc.Card([ | |
dbc.CardBody([ | |
html.Div(id='ranges-container', children=[ | |
dbc.Row([ | |
dbc.Col(dbc.Input(id={'type': 'range-input', 'index': 0}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10), | |
dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': 0}, color="danger", size="sm"), width=2), | |
], className="mb-2"), | |
]), | |
dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"), | |
]) | |
], className="my-3"), | |
dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True), | |
dbc.Progress(id='progress-bar', className="my-3"), | |
dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True), | |
dcc.Download(id="download-zip"), | |
], fluid=True) | |
def update_output(contents, filename): | |
if contents is not None: | |
return f"Selected file: {filename}", False | |
return "No file selected", True | |
def manage_ranges(add_clicks, remove_clicks, existing_ranges): | |
ctx = dash.callback_context | |
triggered_id = ctx.triggered[0]['prop_id'].split('.')[0] | |
if triggered_id == 'add-range': | |
new_index = len(existing_ranges) | |
new_range = dbc.Row([ | |
dbc.Col(dbc.Input(id={'type': 'range-input', 'index': new_index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10), | |
dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': new_index}, color="danger", size="sm"), width=2), | |
], className="mb-2") | |
existing_ranges.append(new_range) | |
elif 'remove-range' in triggered_id: | |
remove_index = json.loads(triggered_id)['index'] | |
existing_ranges = [range for range in existing_ranges if json.loads(range['props']['children'][1]['props']['children']['props']['id'])['index'] != remove_index] | |
return existing_ranges | |
def process_pdf(contents, filename, ranges): | |
global generated_file, progress | |
progress = 0 | |
try: | |
content_type, content_string = contents.split(',') | |
decoded = base64.b64decode(content_string) | |
pdf = PdfReader(io.BytesIO(decoded)) | |
writers = [] | |
for range_str in ranges: | |
start, end = map(int, range_str.split('-')) | |
writer = PdfWriter() | |
for i in range(start - 1, min(end, len(pdf.pages))): | |
writer.add_page(pdf.pages[i]) | |
writers.append(writer) | |
zip_buffer = io.BytesIO() | |
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf: | |
for i, writer in enumerate(writers): | |
progress = (i + 1) / len(writers) * 100 | |
pdf_buffer = io.BytesIO() | |
writer.write(pdf_buffer) | |
pdf_buffer.seek(0) | |
zf.writestr(f"split_{i+1}.pdf", pdf_buffer.getvalue()) | |
zip_buffer.seek(0) | |
generated_file = zip_buffer.getvalue() | |
progress = 100 | |
except Exception as e: | |
print(f"Error processing PDF: {str(e)}") | |
progress = -1 | |
def split_pdf(n_clicks, contents, filename, ranges): | |
if not contents or not ranges: | |
raise PreventUpdate | |
ranges = [r for r in ranges if r] # Filter out empty ranges | |
thread = Thread(target=process_pdf, args=(contents, filename, ranges)) | |
thread.start() | |
return 0, True | |
def update_progress(value): | |
global progress | |
if progress == 100: | |
return 100, False | |
elif progress == -1: | |
return 0, True | |
else: | |
return progress, True | |
def download_zip(n_clicks): | |
global generated_file | |
if generated_file is not None: | |
return dcc.send_bytes(generated_file, "split_pdfs.zip") | |
raise PreventUpdate | |
if __name__ == '__main__': | |
print("Starting the Dash application...") | |
app.run(debug=True, host='0.0.0.0', port=7860) | |
print("Dash application has finished running.") |