import base64 import io import os import threading import time import zipfile from dash import Dash, dcc, html, Input, Output, State, ctx import dash_bootstrap_components as dbc from pdf2docx import Converter import tempfile app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) # Global variables uploaded_files = {} converted_files = {} current_file = "" conversion_complete = False def convert_pdf_to_docx(pdf_path, docx_path): cv = Converter(pdf_path) cv.convert(docx_path) cv.close() def process_contents(contents, filename): content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) return io.BytesIO(decoded) def convert_files(filenames): global converted_files, current_file, conversion_complete for filename in filenames: current_file = filename pdf_file = uploaded_files[filename] with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf: temp_pdf.write(pdf_file.getvalue()) temp_pdf_path = temp_pdf.name docx_filename = os.path.splitext(filename)[0] + '.docx' with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_docx: temp_docx_path = temp_docx.name convert_pdf_to_docx(temp_pdf_path, temp_docx_path) with open(temp_docx_path, 'rb') as docx_file: converted_files[docx_filename] = io.BytesIO(docx_file.read()) os.unlink(temp_pdf_path) os.unlink(temp_docx_path) current_file = "" conversion_complete = True app.layout = dbc.Container([ dbc.Card( dbc.CardBody([ html.H1("PDF to DOCX Converter", className="text-center mb-4"), dcc.Upload( id='upload-data', children=html.Div([ 'Drag and Drop or ', html.A('Select PDF Files') ]), style={ 'width': '100%', 'height': '60px', 'lineHeight': '60px', 'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px' }, multiple=True ), html.Div(id='upload-output'), dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3 mb-3", disabled=True), html.Div(id='conversion-output'), dcc.Download(id="download-zip"), dcc.Interval(id='interval-component', interval=500, n_intervals=0, disabled=True) ]), className="mt-3" ) ], fluid=True) @app.callback( Output('upload-output', 'children'), Output('convert-button', 'disabled'), Input('upload-data', 'contents'), State('upload-data', 'filename'), prevent_initial_call=True ) def update_output(list_of_contents, list_of_names): if list_of_contents is not None: global uploaded_files uploaded_files.clear() children = [] for content, name in zip(list_of_contents, list_of_names): if name.lower().endswith('.pdf'): uploaded_files[name] = process_contents(content, name) children.append(html.Div(f"Uploaded: {name}")) else: children.append(html.Div(f"Skipped: {name} (Not a PDF file)", style={'color': 'red'})) return children, False return [], True @app.callback( Output('interval-component', 'disabled'), Input('convert-button', 'n_clicks'), prevent_initial_call=True ) def start_conversion(n_clicks): if n_clicks is None: return True global converted_files, conversion_complete converted_files.clear() conversion_complete = False threading.Thread(target=convert_files, args=(list(uploaded_files.keys()),)).start() return False @app.callback( Output('conversion-output', 'children'), Input('interval-component', 'n_intervals'), prevent_initial_call=True ) def update_status(n): if current_file: return [ html.Div([ dbc.Spinner(size="sm", color="primary", type="grow"), html.Span(f" Converting: {current_file}", className="ml-2") ], className="d-flex align-items-center") ] elif conversion_complete: return [html.Div("Conversion complete! Preparing download...")] else: return [html.Div("Starting conversion...")] @app.callback( Output('download-zip', 'data'), Output('interval-component', 'disabled', allow_duplicate=True), Input('interval-component', 'n_intervals'), prevent_initial_call=True ) def check_conversion_complete(n): if conversion_complete: with io.BytesIO() as zip_buffer: with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: for filename, file_content in converted_files.items(): zip_file.writestr(filename, file_content.getvalue()) return dcc.send_bytes(zip_buffer.getvalue(), "converted_files.zip"), True return None, False if __name__ == '__main__': print("Starting the Dash application...") app.run(debug=False, host='0.0.0.0', port=7860) print("Dash application has finished running.")