import base64 import io import os import threading import time import zipfile from dash import Dash, dcc, html, Input, Output, State, ctx import dash_bootstrap_components as dbc from pdf2docx import Converter import tempfile app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) # Global variables uploaded_files = {} converted_files = {} conversion_progress = {} def convert_pdf_to_docx(pdf_path, docx_path): cv = Converter(pdf_path) cv.convert(docx_path) cv.close() def process_contents(contents, filename): content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) return io.BytesIO(decoded) def convert_files(filenames): global conversion_progress, converted_files total_files = len(filenames) for i, filename in enumerate(filenames): pdf_file = uploaded_files[filename] with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf: temp_pdf.write(pdf_file.getvalue()) temp_pdf_path = temp_pdf.name docx_filename = os.path.splitext(filename)[0] + '.docx' with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_docx: temp_docx_path = temp_docx.name convert_pdf_to_docx(temp_pdf_path, temp_docx_path) with open(temp_docx_path, 'rb') as docx_file: converted_files[docx_filename] = io.BytesIO(docx_file.read()) os.unlink(temp_pdf_path) os.unlink(temp_docx_path) conversion_progress[filename] = (i + 1) / total_files * 100 conversion_progress['overall'] = 100 app.layout = dbc.Container([ dbc.Card( dbc.CardBody([ html.H1("PDF to DOCX Converter", className="text-center mb-4"), dcc.Upload( id='upload-data', children=html.Div([ 'Drag and Drop or ', html.A('Select PDF Files') ]), style={ 'width': '100%', 'height': '60px', 'lineHeight': '60px', 'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px' }, multiple=True ), html.Div(id='upload-output'), dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3 mb-3", disabled=True), html.Div(id='conversion-output'), dcc.Download(id="download-zip") ]), className="mt-3" ) ], fluid=True) @app.callback( Output('upload-output', 'children'), Output('convert-button', 'disabled'), Input('upload-data', 'contents'), State('upload-data', 'filename'), prevent_initial_call=True ) def update_output(list_of_contents, list_of_names): if list_of_contents is not None: global uploaded_files uploaded_files.clear() children = [] for content, name in zip(list_of_contents, list_of_names): if name.lower().endswith('.pdf'): uploaded_files[name] = process_contents(content, name) children.append(html.Div(f"Uploaded: {name}")) else: children.append(html.Div(f"Skipped: {name} (Not a PDF file)", style={'color': 'red'})) return children, False return [], True @app.callback( Output('conversion-output', 'children'), Output('download-zip', 'data'), Input('convert-button', 'n_clicks'), prevent_initial_call=True ) def convert_and_download(n_clicks): if n_clicks is None: return [], None global conversion_progress, converted_files conversion_progress.clear() converted_files.clear() conversion_progress['overall'] = 0 threading.Thread(target=convert_files, args=(list(uploaded_files.keys()),)).start() while conversion_progress.get('overall', 0) < 100: time.sleep(0.1) progress_bars = [ dbc.Progress(value=conversion_progress.get(filename, 0), label=f"{filename}: {conversion_progress.get(filename, 0):.0f}%", className="mb-3") for filename in uploaded_files.keys() ] yield progress_bars, None with io.BytesIO() as zip_buffer: with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: for filename, file_content in converted_files.items(): zip_file.writestr(filename, file_content.getvalue()) return [html.Div("Conversion complete! Downloading ZIP file...")], dcc.send_bytes(zip_buffer.getvalue(), "converted_files.zip") if __name__ == '__main__': print("Starting the Dash application...") app.run(debug=True, host='0.0.0.0', port=7860) print("Dash application has finished running.")