Spaces:
Paused
Paused
File size: 7,633 Bytes
557bfa3 57c7f35 557bfa3 02dbd73 b2ab0f1 557bfa3 ba2629d 557bfa3 f0a8607 557bfa3 57c7f35 557bfa3 b2ab0f1 557bfa3 1eb1557 557bfa3 1eb1557 557bfa3 ba2629d 2e88003 83c24f4 b2ab0f1 83c24f4 b2ab0f1 557bfa3 f0a8607 b2ab0f1 557bfa3 5b28261 b69229a 5b28261 1eb1557 b69229a 5b28261 b69229a 9be275e 1eb1557 b69229a 9be275e b69229a bdfc164 b69229a aed9663 b69229a aed9663 b69229a 9be275e b69229a aed9663 b69229a aed9663 557bfa3 b2ab0f1 83c24f4 557bfa3 ba2629d 557bfa3 b2ab0f1 557bfa3 57c7f35 557bfa3 57c7f35 ba2629d 57c7f35 5b28261 b2ab0f1 5b28261 557bfa3 83c24f4 557bfa3 5b28261 b2ab0f1 5b28261 b2ab0f1 5b28261 b2ab0f1 5b28261 557bfa3 b2ab0f1 83c24f4 b2ab0f1 557bfa3 b2ab0f1 83c24f4 b2ab0f1 83c24f4 557bfa3 83c24f4 557bfa3 b2ab0f1 557bfa3 57c7f35 557bfa3 57c7f35 557bfa3 57c7f35 557bfa3 57c7f35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import base64
import io
import zipfile
import logging
from threading import Thread
import json
import time
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output, State, callback, MATCH, ALL
from dash.exceptions import PreventUpdate
from PyPDF2 import PdfReader, PdfWriter
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Global variables
generated_file = None
progress = 0
is_processing = False
# Layout
app.layout = dbc.Container([
html.H1("PDF Splitter", className="my-4"),
dcc.Upload(
id='upload-pdf',
children=html.Div(['Drag and Drop or ', html.A('Select PDF')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
dbc.Spinner(html.Div(id='pdf-name'), color="primary", type="grow"),
dbc.Card([
dbc.CardBody([
html.Div(id='ranges-container', children=[]),
dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"),
])
], className="my-3"),
dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True),
dbc.Progress(id='progress-bar', className="my-3"),
html.Div([
dbc.Spinner(html.Div(), id="processing-spinner", color="primary", type="border"),
html.Div(id='processing-status')
], id='processing-container', style={'display': 'none'}),
dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True),
dcc.Download(id="download-zip"),
html.Div(id='log-output', style={'whiteSpace': 'pre-line'}),
dcc.Interval(id='interval-component', interval=1000, n_intervals=0), # 1 second interval
], fluid=True)
@callback(
Output('pdf-name', 'children'),
Output('split-button', 'disabled'),
Output('ranges-container', 'children'),
Input('upload-pdf', 'contents'),
Input('upload-pdf', 'filename')
)
def update_output(contents, filename):
if contents is not None:
logger.info(f"PDF uploaded: {filename}")
initial_range = create_range_input(0)
return html.Div(f"Uploaded: {filename}"), False, [initial_range]
return "", True, []
def create_range_input(index):
return dbc.Row([
dbc.Col(dbc.Input(id={'type': 'range-input', 'index': index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10),
dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': index}, color="danger", size="sm"), width=2),
], className="mb-2")
@callback(
Output('ranges-container', 'children', allow_duplicate=True),
Input('add-range', 'n_clicks'),
Input({'type': 'remove-range', 'index': ALL}, 'n_clicks'),
State('ranges-container', 'children'),
prevent_initial_call=True
)
def manage_ranges(add_clicks, remove_clicks, existing_ranges):
ctx = dash.callback_context
triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]
if triggered_id == 'add-range':
new_index = len(existing_ranges)
new_range = create_range_input(new_index)
existing_ranges.append(new_range)
elif 'remove-range' in triggered_id:
remove_index = json.loads(triggered_id)['index']
existing_ranges = [range for i, range in enumerate(existing_ranges) if i != remove_index]
return existing_ranges
@callback(
Output('processing-status', 'children'),
Output('split-button', 'disabled', allow_duplicate=True),
Output('download-button', 'disabled'),
Output('processing-container', 'style'),
Input('split-button', 'n_clicks'),
State('upload-pdf', 'contents'),
State('upload-pdf', 'filename'),
State({'type': 'range-input', 'index': ALL}, 'value'),
prevent_initial_call=True
)
def split_pdf(n_clicks, contents, filename, ranges):
global progress, is_processing
if not contents or not ranges:
logger.warning("Split PDF clicked but no content or ranges provided")
raise PreventUpdate
logger.info("Split PDF button clicked")
ranges = [r for r in ranges if r] # Filter out empty ranges
logger.info(f"Processing {len(ranges)} ranges")
progress = 0 # Reset progress
is_processing = True
thread = Thread(target=process_pdf, args=(contents, filename, ranges))
thread.start()
return "Processing started...", True, True, {'display': 'block'}
def process_pdf(contents, filename, ranges):
global progress, generated_file, is_processing
try:
# Decode PDF content
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
# Read the PDF
pdf = PdfReader(io.BytesIO(decoded))
total_pages = len(pdf.pages)
# Create a ZIP file in memory
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zf:
for i, page_range in enumerate(ranges):
start, end = map(int, page_range.split('-'))
writer = PdfWriter()
for page_num in range(start - 1, min(end, total_pages)):
writer.add_page(pdf.pages[page_num])
# Save the split PDF to the ZIP file
output = io.BytesIO()
writer.write(output)
output.seek(0)
zf.writestr(f'split_{i+1}.pdf', output.getvalue())
progress = (i + 1) / len(ranges) * 100
time.sleep(0.1) # Simulate some processing time
zip_buffer.seek(0)
generated_file = zip_buffer.getvalue()
progress = 100
except Exception as e:
logger.error(f"Error processing PDF: {str(e)}")
progress = -1
finally:
is_processing = False
@callback(
Output('progress-bar', 'value'),
Output('processing-status', 'children', allow_duplicate=True),
Output('download-button', 'disabled', allow_duplicate=True),
Output('processing-container', 'style', allow_duplicate=True),
Input('interval-component', 'n_intervals'),
prevent_initial_call=True
)
def update_progress(n):
global progress, is_processing, generated_file
if is_processing:
return progress, f"Processing... {progress:.0f}% complete", True, {'display': 'block'}
elif progress == 100 and generated_file is not None:
return 100, "PDF splitting completed. Click 'Download ZIP' to get your files.", False, {'display': 'none'}
elif progress == -1:
return 0, "Error occurred during PDF splitting. Please try again.", True, {'display': 'none'}
else:
raise PreventUpdate
@callback(
Output("download-zip", "data"),
Input("download-button", "n_clicks"),
prevent_initial_call=True
)
def download_zip(n_clicks):
global generated_file
if generated_file is not None:
logger.info("Initiating ZIP file download")
return dcc.send_bytes(generated_file, "split_pdfs.zip")
logger.warning("Download attempted but no file generated")
raise PreventUpdate
if __name__ == '__main__':
logger.info("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
logger.info("Dash application has finished running.") |