File size: 5,393 Bytes
0094fbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d90167
eb83bba
0094fbb
5d90167
0094fbb
5d90167
0094fbb
 
 
 
 
 
 
 
5d90167
 
 
0094fbb
 
 
 
 
 
 
 
 
5d90167
0094fbb
 
 
 
 
 
 
5d90167
eb83bba
0094fbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb83bba
5d90167
0094fbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb83bba
0094fbb
 
 
eb83bba
0094fbb
eb83bba
0094fbb
5d90167
0094fbb
eb83bba
0094fbb
 
eb83bba
0094fbb
eb83bba
 
 
 
 
5d90167
 
 
 
 
 
 
 
 
 
 
 
eb83bba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0094fbb
 
 
f967883
0094fbb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import base64
import io
import os
import threading
import time
import zipfile
from dash import Dash, dcc, html, Input, Output, State, ctx
import dash_bootstrap_components as dbc
from pdf2docx import Converter
import tempfile

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Global variables
uploaded_files = {}
converted_files = {}
current_file = ""
conversion_complete = False

def convert_pdf_to_docx(pdf_path, docx_path):
    cv = Converter(pdf_path)
    cv.convert(docx_path)
    cv.close()

def process_contents(contents, filename):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    return io.BytesIO(decoded)

def convert_files(filenames):
    global converted_files, current_file, conversion_complete
    for filename in filenames:
        current_file = filename
        pdf_file = uploaded_files[filename]
        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
            temp_pdf.write(pdf_file.getvalue())
            temp_pdf_path = temp_pdf.name

        docx_filename = os.path.splitext(filename)[0] + '.docx'
        with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_docx:
            temp_docx_path = temp_docx.name

        convert_pdf_to_docx(temp_pdf_path, temp_docx_path)

        with open(temp_docx_path, 'rb') as docx_file:
            converted_files[docx_filename] = io.BytesIO(docx_file.read())

        os.unlink(temp_pdf_path)
        os.unlink(temp_docx_path)

    current_file = ""
    conversion_complete = True

app.layout = dbc.Container([
    dbc.Card(
        dbc.CardBody([
            html.H1("PDF to DOCX Converter", className="text-center mb-4"),
            dcc.Upload(
                id='upload-data',
                children=html.Div([
                    'Drag and Drop or ',
                    html.A('Select PDF Files')
                ]),
                style={
                    'width': '100%',
                    'height': '60px',
                    'lineHeight': '60px',
                    'borderWidth': '1px',
                    'borderStyle': 'dashed',
                    'borderRadius': '5px',
                    'textAlign': 'center',
                    'margin': '10px'
                },
                multiple=True
            ),
            html.Div(id='upload-output'),
            dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3 mb-3", disabled=True),
            html.Div(id='conversion-output'),
            dcc.Download(id="download-zip"),
            dcc.Interval(id='interval-component', interval=500, n_intervals=0, disabled=True)
        ]),
        className="mt-3"
    )
], fluid=True)

@app.callback(
    Output('upload-output', 'children'),
    Output('convert-button', 'disabled'),
    Input('upload-data', 'contents'),
    State('upload-data', 'filename'),
    prevent_initial_call=True
)
def update_output(list_of_contents, list_of_names):
    if list_of_contents is not None:
        global uploaded_files
        uploaded_files.clear()
        children = []
        for content, name in zip(list_of_contents, list_of_names):
            if name.lower().endswith('.pdf'):
                uploaded_files[name] = process_contents(content, name)
                children.append(html.Div(f"Uploaded: {name}"))
            else:
                children.append(html.Div(f"Skipped: {name} (Not a PDF file)", style={'color': 'red'}))
        return children, False
    return [], True

@app.callback(
    Output('interval-component', 'disabled'),
    Input('convert-button', 'n_clicks'),
    prevent_initial_call=True
)
def start_conversion(n_clicks):
    if n_clicks is None:
        return True

    global converted_files, conversion_complete
    converted_files.clear()
    conversion_complete = False

    threading.Thread(target=convert_files, args=(list(uploaded_files.keys()),)).start()
    return False

@app.callback(
    Output('conversion-output', 'children'),
    Input('interval-component', 'n_intervals'),
    prevent_initial_call=True
)
def update_status(n):
    if current_file:
        return [
            html.Div([
                dbc.Spinner(size="sm", color="primary", type="grow"),
                html.Span(f" Converting: {current_file}", className="ml-2")
            ], className="d-flex align-items-center")
        ]
    elif conversion_complete:
        return [html.Div("Conversion complete! Preparing download...")]
    else:
        return [html.Div("Starting conversion...")]

@app.callback(
    Output('download-zip', 'data'),
    Output('interval-component', 'disabled', allow_duplicate=True),
    Input('interval-component', 'n_intervals'),
    prevent_initial_call=True
)
def check_conversion_complete(n):
    if conversion_complete:
        with io.BytesIO() as zip_buffer:
            with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                for filename, file_content in converted_files.items():
                    zip_file.writestr(filename, file_content.getvalue())
            
            return dcc.send_bytes(zip_buffer.getvalue(), "converted_files.zip"), True
    return None, False

if __name__ == '__main__':
    print("Starting the Dash application...")
    app.run(debug=False, host='0.0.0.0', port=7860)
    print("Dash application has finished running.")