Spaces:
Paused
Paused
import base64 | |
import io | |
import os | |
import threading | |
import time | |
import zipfile | |
from dash import Dash, dcc, html, Input, Output, State, ctx | |
import dash_bootstrap_components as dbc | |
from pdf2docx import Converter | |
import tempfile | |
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
# Global variables | |
uploaded_files = {} | |
converted_files = {} | |
current_file = "" | |
conversion_complete = False | |
def convert_pdf_to_docx(pdf_path, docx_path): | |
cv = Converter(pdf_path) | |
cv.convert(docx_path) | |
cv.close() | |
def process_contents(contents, filename): | |
content_type, content_string = contents.split(',') | |
decoded = base64.b64decode(content_string) | |
return io.BytesIO(decoded) | |
def convert_files(filenames): | |
global converted_files, current_file, conversion_complete | |
for filename in filenames: | |
current_file = filename | |
pdf_file = uploaded_files[filename] | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf: | |
temp_pdf.write(pdf_file.getvalue()) | |
temp_pdf_path = temp_pdf.name | |
docx_filename = os.path.splitext(filename)[0] + '.docx' | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_docx: | |
temp_docx_path = temp_docx.name | |
convert_pdf_to_docx(temp_pdf_path, temp_docx_path) | |
with open(temp_docx_path, 'rb') as docx_file: | |
converted_files[docx_filename] = io.BytesIO(docx_file.read()) | |
os.unlink(temp_pdf_path) | |
os.unlink(temp_docx_path) | |
current_file = "" | |
conversion_complete = True | |
app.layout = dbc.Container([ | |
dbc.Card( | |
dbc.CardBody([ | |
html.H1("PDF to DOCX Converter", className="text-center mb-4"), | |
dcc.Upload( | |
id='upload-data', | |
children=html.Div([ | |
'Drag and Drop or ', | |
html.A('Select PDF Files') | |
]), | |
style={ | |
'width': '100%', | |
'height': '60px', | |
'lineHeight': '60px', | |
'borderWidth': '1px', | |
'borderStyle': 'dashed', | |
'borderRadius': '5px', | |
'textAlign': 'center', | |
'margin': '10px' | |
}, | |
multiple=True | |
), | |
html.Div(id='upload-output'), | |
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3 mb-3", disabled=True), | |
html.Div(id='conversion-output'), | |
dcc.Download(id="download-zip"), | |
dcc.Interval(id='interval-component', interval=500, n_intervals=0, disabled=True) | |
]), | |
className="mt-3" | |
) | |
], fluid=True) | |
def update_output(list_of_contents, list_of_names): | |
if list_of_contents is not None: | |
global uploaded_files | |
uploaded_files.clear() | |
children = [] | |
for content, name in zip(list_of_contents, list_of_names): | |
if name.lower().endswith('.pdf'): | |
uploaded_files[name] = process_contents(content, name) | |
children.append(html.Div(f"Uploaded: {name}")) | |
else: | |
children.append(html.Div(f"Skipped: {name} (Not a PDF file)", style={'color': 'red'})) | |
return children, False | |
return [], True | |
def start_conversion(n_clicks): | |
if n_clicks is None: | |
return True | |
global converted_files, conversion_complete | |
converted_files.clear() | |
conversion_complete = False | |
threading.Thread(target=convert_files, args=(list(uploaded_files.keys()),)).start() | |
return False | |
def update_status(n): | |
if current_file: | |
return [ | |
html.Div([ | |
dbc.Spinner(size="sm", color="primary", type="grow"), | |
html.Span(f" Converting: {current_file}", className="ml-2") | |
], className="d-flex align-items-center") | |
] | |
elif conversion_complete: | |
return [html.Div("Conversion complete! Preparing download...")] | |
else: | |
return [html.Div("Starting conversion...")] | |
def check_conversion_complete(n): | |
if conversion_complete: | |
with io.BytesIO() as zip_buffer: | |
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: | |
for filename, file_content in converted_files.items(): | |
zip_file.writestr(filename, file_content.getvalue()) | |
return dcc.send_bytes(zip_buffer.getvalue(), "converted_files.zip"), True | |
return None, False | |
if __name__ == '__main__': | |
print("Starting the Dash application...") | |
app.run(debug=False, host='0.0.0.0', port=7860) | |
print("Dash application has finished running.") |