Spaces:
Paused
Paused
import base64 | |
import io | |
import zipfile | |
import logging | |
from threading import Thread | |
import json | |
import time | |
import dash | |
import dash_bootstrap_components as dbc | |
from dash import dcc, html, Input, Output, State, callback, MATCH, ALL | |
from dash.exceptions import PreventUpdate | |
from PyPDF2 import PdfReader, PdfWriter | |
# Set up logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
# Global variables | |
generated_file = None | |
progress = 0 | |
is_processing = False | |
# Layout | |
app.layout = dbc.Container([ | |
html.H1("PDF Splitter", className="my-4"), | |
dcc.Upload( | |
id='upload-pdf', | |
children=html.Div(['Drag and Drop or ', html.A('Select PDF')]), | |
style={ | |
'width': '100%', | |
'height': '60px', | |
'lineHeight': '60px', | |
'borderWidth': '1px', | |
'borderStyle': 'dashed', | |
'borderRadius': '5px', | |
'textAlign': 'center', | |
'margin': '10px' | |
}, | |
multiple=False | |
), | |
dbc.Spinner(html.Div(id='pdf-name'), color="primary", type="grow"), | |
dbc.Card([ | |
dbc.CardBody([ | |
html.Div(id='ranges-container', children=[]), | |
dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"), | |
]) | |
], className="my-3"), | |
dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True), | |
dbc.Progress(id='progress-bar', className="my-3"), | |
html.Div([ | |
dbc.Spinner(html.Div(), id="processing-spinner", color="primary", type="border"), | |
html.Div(id='processing-status') | |
], id='processing-container', style={'display': 'none'}), | |
dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True), | |
dcc.Download(id="download-zip"), | |
html.Div(id='log-output', style={'whiteSpace': 'pre-line'}), | |
dcc.Interval(id='interval-component', interval=1000, n_intervals=0), # 1 second interval | |
], fluid=True) | |
def update_output(contents, filename): | |
if contents is not None: | |
logger.info(f"PDF uploaded: {filename}") | |
initial_range = create_range_input(0) | |
return html.Div(f"Uploaded: {filename}"), False, [initial_range] | |
return "", True, [] | |
def create_range_input(index): | |
return dbc.Row([ | |
dbc.Col(dbc.Input(id={'type': 'range-input', 'index': index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10), | |
dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': index}, color="danger", size="sm"), width=2), | |
], className="mb-2") | |
def manage_ranges(add_clicks, remove_clicks, existing_ranges): | |
ctx = dash.callback_context | |
triggered_id = ctx.triggered[0]['prop_id'].split('.')[0] | |
if triggered_id == 'add-range': | |
new_index = len(existing_ranges) | |
new_range = create_range_input(new_index) | |
existing_ranges.append(new_range) | |
elif 'remove-range' in triggered_id: | |
remove_index = json.loads(triggered_id)['index'] | |
existing_ranges = [range for i, range in enumerate(existing_ranges) if i != remove_index] | |
return existing_ranges | |
def split_pdf(n_clicks, contents, filename, ranges): | |
global progress, is_processing | |
if not contents or not ranges: | |
logger.warning("Split PDF clicked but no content or ranges provided") | |
raise PreventUpdate | |
logger.info("Split PDF button clicked") | |
ranges = [r for r in ranges if r] # Filter out empty ranges | |
logger.info(f"Processing {len(ranges)} ranges") | |
progress = 0 # Reset progress | |
is_processing = True | |
thread = Thread(target=process_pdf, args=(contents, filename, ranges)) | |
thread.start() | |
return "Processing started...", True, True, {'display': 'block'} | |
def process_pdf(contents, filename, ranges): | |
global progress, generated_file, is_processing | |
try: | |
# Decode PDF content | |
content_type, content_string = contents.split(',') | |
decoded = base64.b64decode(content_string) | |
# Read the PDF | |
pdf = PdfReader(io.BytesIO(decoded)) | |
total_pages = len(pdf.pages) | |
# Create a ZIP file in memory | |
zip_buffer = io.BytesIO() | |
with zipfile.ZipFile(zip_buffer, 'w') as zf: | |
for i, page_range in enumerate(ranges): | |
start, end = map(int, page_range.split('-')) | |
writer = PdfWriter() | |
for page_num in range(start - 1, min(end, total_pages)): | |
writer.add_page(pdf.pages[page_num]) | |
# Save the split PDF to the ZIP file | |
output = io.BytesIO() | |
writer.write(output) | |
output.seek(0) | |
zf.writestr(f'split_{i+1}.pdf', output.getvalue()) | |
progress = (i + 1) / len(ranges) * 100 | |
time.sleep(0.1) # Simulate some processing time | |
zip_buffer.seek(0) | |
generated_file = zip_buffer.getvalue() | |
progress = 100 | |
except Exception as e: | |
logger.error(f"Error processing PDF: {str(e)}") | |
progress = -1 | |
finally: | |
is_processing = False | |
def update_progress(n): | |
global progress, is_processing, generated_file | |
if is_processing: | |
return progress, f"Processing... {progress:.0f}% complete", True, {'display': 'block'} | |
elif progress == 100 and generated_file is not None: | |
return 100, "PDF splitting completed. Click 'Download ZIP' to get your files.", False, {'display': 'none'} | |
elif progress == -1: | |
return 0, "Error occurred during PDF splitting. Please try again.", True, {'display': 'none'} | |
else: | |
raise PreventUpdate | |
def download_zip(n_clicks): | |
global generated_file | |
if generated_file is not None: | |
logger.info("Initiating ZIP file download") | |
return dcc.send_bytes(generated_file, "split_pdfs.zip") | |
logger.warning("Download attempted but no file generated") | |
raise PreventUpdate | |
if __name__ == '__main__': | |
logger.info("Starting the Dash application...") | |
app.run(debug=True, host='0.0.0.0', port=7860) | |
logger.info("Dash application has finished running.") |