pdf-to-word / app.py
bluenevus's picture
Update app.py
f967883 verified
import base64
import io
import os
import threading
import time
import zipfile
from dash import Dash, dcc, html, Input, Output, State, ctx
import dash_bootstrap_components as dbc
from pdf2docx import Converter
import tempfile
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Global variables
uploaded_files = {}
converted_files = {}
current_file = ""
conversion_complete = False
def convert_pdf_to_docx(pdf_path, docx_path):
cv = Converter(pdf_path)
cv.convert(docx_path)
cv.close()
def process_contents(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
return io.BytesIO(decoded)
def convert_files(filenames):
global converted_files, current_file, conversion_complete
for filename in filenames:
current_file = filename
pdf_file = uploaded_files[filename]
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
temp_pdf.write(pdf_file.getvalue())
temp_pdf_path = temp_pdf.name
docx_filename = os.path.splitext(filename)[0] + '.docx'
with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_docx:
temp_docx_path = temp_docx.name
convert_pdf_to_docx(temp_pdf_path, temp_docx_path)
with open(temp_docx_path, 'rb') as docx_file:
converted_files[docx_filename] = io.BytesIO(docx_file.read())
os.unlink(temp_pdf_path)
os.unlink(temp_docx_path)
current_file = ""
conversion_complete = True
app.layout = dbc.Container([
dbc.Card(
dbc.CardBody([
html.H1("PDF to DOCX Converter", className="text-center mb-4"),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select PDF Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=True
),
html.Div(id='upload-output'),
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3 mb-3", disabled=True),
html.Div(id='conversion-output'),
dcc.Download(id="download-zip"),
dcc.Interval(id='interval-component', interval=500, n_intervals=0, disabled=True)
]),
className="mt-3"
)
], fluid=True)
@app.callback(
Output('upload-output', 'children'),
Output('convert-button', 'disabled'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
prevent_initial_call=True
)
def update_output(list_of_contents, list_of_names):
if list_of_contents is not None:
global uploaded_files
uploaded_files.clear()
children = []
for content, name in zip(list_of_contents, list_of_names):
if name.lower().endswith('.pdf'):
uploaded_files[name] = process_contents(content, name)
children.append(html.Div(f"Uploaded: {name}"))
else:
children.append(html.Div(f"Skipped: {name} (Not a PDF file)", style={'color': 'red'}))
return children, False
return [], True
@app.callback(
Output('interval-component', 'disabled'),
Input('convert-button', 'n_clicks'),
prevent_initial_call=True
)
def start_conversion(n_clicks):
if n_clicks is None:
return True
global converted_files, conversion_complete
converted_files.clear()
conversion_complete = False
threading.Thread(target=convert_files, args=(list(uploaded_files.keys()),)).start()
return False
@app.callback(
Output('conversion-output', 'children'),
Input('interval-component', 'n_intervals'),
prevent_initial_call=True
)
def update_status(n):
if current_file:
return [
html.Div([
dbc.Spinner(size="sm", color="primary", type="grow"),
html.Span(f" Converting: {current_file}", className="ml-2")
], className="d-flex align-items-center")
]
elif conversion_complete:
return [html.Div("Conversion complete! Preparing download...")]
else:
return [html.Div("Starting conversion...")]
@app.callback(
Output('download-zip', 'data'),
Output('interval-component', 'disabled', allow_duplicate=True),
Input('interval-component', 'n_intervals'),
prevent_initial_call=True
)
def check_conversion_complete(n):
if conversion_complete:
with io.BytesIO() as zip_buffer:
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for filename, file_content in converted_files.items():
zip_file.writestr(filename, file_content.getvalue())
return dcc.send_bytes(zip_buffer.getvalue(), "converted_files.zip"), True
return None, False
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=False, host='0.0.0.0', port=7860)
print("Dash application has finished running.")