Spaces:
Paused
Paused
File size: 7,072 Bytes
557bfa3 57c7f35 557bfa3 02dbd73 557bfa3 ba2629d 557bfa3 f0a8607 557bfa3 57c7f35 557bfa3 ba2629d 557bfa3 ba2629d 02dbd73 557bfa3 f0a8607 557bfa3 5b28261 b69229a 5b28261 b69229a 5b28261 b69229a 557bfa3 57c7f35 557bfa3 ba2629d 557bfa3 5b28261 557bfa3 57c7f35 557bfa3 57c7f35 ba2629d 57c7f35 5b28261 557bfa3 02dbd73 557bfa3 5b28261 557bfa3 57c7f35 557bfa3 57c7f35 02dbd73 557bfa3 57c7f35 02dbd73 557bfa3 02dbd73 557bfa3 57c7f35 557bfa3 57c7f35 557bfa3 57c7f35 557bfa3 57c7f35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import base64
import io
import os
import zipfile
import logging
from threading import Thread
import json
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output, State, callback, MATCH, ALL
from dash.exceptions import PreventUpdate
from PyPDF2 import PdfReader, PdfWriter
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Global variables
generated_file = None
progress = 0
# Layout
app.layout = dbc.Container([
html.H1("PDF Splitter", className="my-4"),
dcc.Upload(
id='upload-pdf',
children=html.Div(['Drag and Drop or ', html.A('Select PDF')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
html.Div(id='pdf-name'),
dbc.Card([
dbc.CardBody([
html.Div(id='ranges-container', children=[
dbc.Row([
dbc.Col(dbc.Input(id={'type': 'range-input', 'index': 0}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': 0}, color="danger", size="sm"), width=2),
], className="mb-2"),
]),
dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"),
])
], className="my-3"),
dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True),
dbc.Progress(id='progress-bar', className="my-3"),
dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3"),
dcc.Download(id="download-zip"),
html.Div(id='log-output', style={'whiteSpace': 'pre-line'}),
], fluid=True)
@callback(
Output('pdf-name', 'children'),
Output('split-button', 'disabled'),
Input('upload-pdf', 'contents'),
Input('upload-pdf', 'filename')
)
def update_output(contents, filename):
if contents is not None:
logger.info(f"PDF uploaded: {filename}")
return html.Div(f"Uploaded: {filename}"), False
return "", True
@callback(
Output('ranges-container', 'children'),
Input('add-range', 'n_clicks'),
State('ranges-container', 'children'),
prevent_initial_call=True
)
def add_range(n_clicks, existing_ranges):
if n_clicks:
new_index = len(existing_ranges)
new_range = dbc.Row([
dbc.Col(dbc.Input(id={'type': 'range-input', 'index': new_index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': new_index}, color="danger", size="sm"), width=2),
], className="mb-2")
existing_ranges.append(new_range)
return existing_ranges
@callback(
Output('ranges-container', 'children', allow_duplicate=True),
Input({'type': 'remove-range', 'index': ALL}, 'n_clicks'),
State('ranges-container', 'children'),
prevent_initial_call=True
)
def remove_range(n_clicks, existing_ranges):
ctx = dash.callback_context
if not ctx.triggered:
raise PreventUpdate
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
index_to_remove = json.loads(button_id)['index']
return [range for i, range in enumerate(existing_ranges) if i != index_to_remove]
@callback(
Output('progress-bar', 'value'),
Output('log-output', 'children'),
Input('split-button', 'n_clicks'),
State('upload-pdf', 'contents'),
State('upload-pdf', 'filename'),
State({'type': 'range-input', 'index': ALL}, 'value'),
prevent_initial_call=True
)
def split_pdf(n_clicks, contents, filename, ranges):
global progress
if not contents or not ranges:
logger.warning("Split PDF clicked but no content or ranges provided")
raise PreventUpdate
logger.info("Split PDF button clicked")
ranges = [r for r in ranges if r] # Filter out empty ranges
logger.info(f"Processing {len(ranges)} ranges")
progress = 0 # Reset progress
thread = Thread(target=process_pdf, args=(contents, filename, ranges))
thread.start()
return 0, "PDF splitting process started. Check console for detailed logs."
def process_pdf(contents, filename, ranges):
global progress, generated_file
try:
# Decode PDF content
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
# Read the PDF
pdf = PdfReader(io.BytesIO(decoded))
total_pages = len(pdf.pages)
# Create a ZIP file in memory
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zf:
for i, page_range in enumerate(ranges):
start, end = map(int, page_range.split('-'))
writer = PdfWriter()
for page_num in range(start - 1, min(end, total_pages)):
writer.add_page(pdf.pages[page_num])
# Save the split PDF to the ZIP file
output = io.BytesIO()
writer.write(output)
output.seek(0)
zf.writestr(f'split_{i+1}.pdf', output.getvalue())
progress = (i + 1) / len(ranges) * 100
zip_buffer.seek(0)
generated_file = zip_buffer.getvalue()
progress = 100
except Exception as e:
logger.error(f"Error processing PDF: {str(e)}")
progress = -1
@callback(
Output('progress-bar', 'value', allow_duplicate=True),
Output('log-output', 'children', allow_duplicate=True),
Input('progress-bar', 'value'),
prevent_initial_call=True
)
def update_progress(value):
global progress
if progress == 100:
logger.info("PDF splitting completed")
return 100, "PDF splitting completed. Click 'Download ZIP' to get your files."
elif progress == -1:
logger.error("PDF splitting failed")
return 0, "Error occurred during PDF splitting. Check console for details."
else:
return progress, f"Processing... {progress:.0f}% complete"
@callback(
Output("download-zip", "data"),
Input("download-button", "n_clicks"),
prevent_initial_call=True
)
def download_zip(n_clicks):
global generated_file
if generated_file is not None:
logger.info("Initiating ZIP file download")
return dcc.send_bytes(generated_file, "split_pdfs.zip")
logger.warning("Download attempted but no file generated")
raise PreventUpdate
if __name__ == '__main__':
logger.info("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
logger.info("Dash application has finished running.") |