Spaces:
Paused
Paused
File size: 5,393 Bytes
0094fbb 5d90167 eb83bba 0094fbb 5d90167 0094fbb 5d90167 0094fbb 5d90167 0094fbb 5d90167 0094fbb 5d90167 eb83bba 0094fbb eb83bba 5d90167 0094fbb eb83bba 0094fbb eb83bba 0094fbb eb83bba 0094fbb 5d90167 0094fbb eb83bba 0094fbb eb83bba 0094fbb eb83bba 5d90167 eb83bba 0094fbb f967883 0094fbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import base64
import io
import os
import threading
import time
import zipfile
from dash import Dash, dcc, html, Input, Output, State, ctx
import dash_bootstrap_components as dbc
from pdf2docx import Converter
import tempfile
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Global variables
uploaded_files = {}
converted_files = {}
current_file = ""
conversion_complete = False
def convert_pdf_to_docx(pdf_path, docx_path):
cv = Converter(pdf_path)
cv.convert(docx_path)
cv.close()
def process_contents(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
return io.BytesIO(decoded)
def convert_files(filenames):
global converted_files, current_file, conversion_complete
for filename in filenames:
current_file = filename
pdf_file = uploaded_files[filename]
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
temp_pdf.write(pdf_file.getvalue())
temp_pdf_path = temp_pdf.name
docx_filename = os.path.splitext(filename)[0] + '.docx'
with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_docx:
temp_docx_path = temp_docx.name
convert_pdf_to_docx(temp_pdf_path, temp_docx_path)
with open(temp_docx_path, 'rb') as docx_file:
converted_files[docx_filename] = io.BytesIO(docx_file.read())
os.unlink(temp_pdf_path)
os.unlink(temp_docx_path)
current_file = ""
conversion_complete = True
app.layout = dbc.Container([
dbc.Card(
dbc.CardBody([
html.H1("PDF to DOCX Converter", className="text-center mb-4"),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select PDF Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=True
),
html.Div(id='upload-output'),
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3 mb-3", disabled=True),
html.Div(id='conversion-output'),
dcc.Download(id="download-zip"),
dcc.Interval(id='interval-component', interval=500, n_intervals=0, disabled=True)
]),
className="mt-3"
)
], fluid=True)
@app.callback(
Output('upload-output', 'children'),
Output('convert-button', 'disabled'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
prevent_initial_call=True
)
def update_output(list_of_contents, list_of_names):
if list_of_contents is not None:
global uploaded_files
uploaded_files.clear()
children = []
for content, name in zip(list_of_contents, list_of_names):
if name.lower().endswith('.pdf'):
uploaded_files[name] = process_contents(content, name)
children.append(html.Div(f"Uploaded: {name}"))
else:
children.append(html.Div(f"Skipped: {name} (Not a PDF file)", style={'color': 'red'}))
return children, False
return [], True
@app.callback(
Output('interval-component', 'disabled'),
Input('convert-button', 'n_clicks'),
prevent_initial_call=True
)
def start_conversion(n_clicks):
if n_clicks is None:
return True
global converted_files, conversion_complete
converted_files.clear()
conversion_complete = False
threading.Thread(target=convert_files, args=(list(uploaded_files.keys()),)).start()
return False
@app.callback(
Output('conversion-output', 'children'),
Input('interval-component', 'n_intervals'),
prevent_initial_call=True
)
def update_status(n):
if current_file:
return [
html.Div([
dbc.Spinner(size="sm", color="primary", type="grow"),
html.Span(f" Converting: {current_file}", className="ml-2")
], className="d-flex align-items-center")
]
elif conversion_complete:
return [html.Div("Conversion complete! Preparing download...")]
else:
return [html.Div("Starting conversion...")]
@app.callback(
Output('download-zip', 'data'),
Output('interval-component', 'disabled', allow_duplicate=True),
Input('interval-component', 'n_intervals'),
prevent_initial_call=True
)
def check_conversion_complete(n):
if conversion_complete:
with io.BytesIO() as zip_buffer:
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for filename, file_content in converted_files.items():
zip_file.writestr(filename, file_content.getvalue())
return dcc.send_bytes(zip_buffer.getvalue(), "converted_files.zip"), True
return None, False
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=False, host='0.0.0.0', port=7860)
print("Dash application has finished running.") |