bluenevus commited on
Commit
0094fbb
·
verified ·
1 Parent(s): 72fc9bd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import os
4
+ import threading
5
+ import time
6
+ import zipfile
7
+ from dash import Dash, dcc, html, Input, Output, State, ctx
8
+ import dash_bootstrap_components as dbc
9
+ from pdf2docx import Converter
10
+ import tempfile
11
+
12
+ app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
13
+
14
+ # Global variables
15
+ uploaded_files = {}
16
+ converted_files = {}
17
+ conversion_progress = {}
18
+
19
+ def convert_pdf_to_docx(pdf_path, docx_path):
20
+ cv = Converter(pdf_path)
21
+ cv.convert(docx_path)
22
+ cv.close()
23
+
24
+ def process_contents(contents, filename):
25
+ content_type, content_string = contents.split(',')
26
+ decoded = base64.b64decode(content_string)
27
+ return io.BytesIO(decoded)
28
+
29
+ def convert_files(filenames):
30
+ global conversion_progress, converted_files
31
+ total_files = len(filenames)
32
+ for i, filename in enumerate(filenames):
33
+ pdf_file = uploaded_files[filename]
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
35
+ temp_pdf.write(pdf_file.getvalue())
36
+ temp_pdf_path = temp_pdf.name
37
+
38
+ docx_filename = os.path.splitext(filename)[0] + '.docx'
39
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_docx:
40
+ temp_docx_path = temp_docx.name
41
+
42
+ convert_pdf_to_docx(temp_pdf_path, temp_docx_path)
43
+
44
+ with open(temp_docx_path, 'rb') as docx_file:
45
+ converted_files[docx_filename] = io.BytesIO(docx_file.read())
46
+
47
+ os.unlink(temp_pdf_path)
48
+ os.unlink(temp_docx_path)
49
+
50
+ conversion_progress[filename] = (i + 1) / total_files * 100
51
+
52
+ conversion_progress['overall'] = 100
53
+
54
+ app.layout = dbc.Container([
55
+ dbc.Card(
56
+ dbc.CardBody([
57
+ html.H1("PDF to DOCX Converter", className="text-center mb-4"),
58
+ dcc.Upload(
59
+ id='upload-data',
60
+ children=html.Div([
61
+ 'Drag and Drop or ',
62
+ html.A('Select PDF Files')
63
+ ]),
64
+ style={
65
+ 'width': '100%',
66
+ 'height': '60px',
67
+ 'lineHeight': '60px',
68
+ 'borderWidth': '1px',
69
+ 'borderStyle': 'dashed',
70
+ 'borderRadius': '5px',
71
+ 'textAlign': 'center',
72
+ 'margin': '10px'
73
+ },
74
+ multiple=True
75
+ ),
76
+ html.Div(id='upload-output'),
77
+ dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3 mb-3", disabled=True),
78
+ html.Div(id='conversion-output'),
79
+ dcc.Download(id="download-zip")
80
+ ]),
81
+ className="mt-3"
82
+ )
83
+ ], fluid=True)
84
+
85
+ @app.callback(
86
+ Output('upload-output', 'children'),
87
+ Output('convert-button', 'disabled'),
88
+ Input('upload-data', 'contents'),
89
+ State('upload-data', 'filename'),
90
+ prevent_initial_call=True
91
+ )
92
+ def update_output(list_of_contents, list_of_names):
93
+ if list_of_contents is not None:
94
+ global uploaded_files
95
+ uploaded_files.clear()
96
+ children = []
97
+ for content, name in zip(list_of_contents, list_of_names):
98
+ if name.lower().endswith('.pdf'):
99
+ uploaded_files[name] = process_contents(content, name)
100
+ children.append(html.Div(f"Uploaded: {name}"))
101
+ else:
102
+ children.append(html.Div(f"Skipped: {name} (Not a PDF file)", style={'color': 'red'}))
103
+ return children, False
104
+ return [], True
105
+
106
+ @app.callback(
107
+ Output('conversion-output', 'children'),
108
+ Output('download-zip', 'data'),
109
+ Input('convert-button', 'n_clicks'),
110
+ prevent_initial_call=True
111
+ )
112
+ def convert_and_download(n_clicks):
113
+ if n_clicks is None:
114
+ return [], None
115
+
116
+ global conversion_progress, converted_files
117
+ conversion_progress.clear()
118
+ converted_files.clear()
119
+ conversion_progress['overall'] = 0
120
+
121
+ threading.Thread(target=convert_files, args=(list(uploaded_files.keys()),)).start()
122
+
123
+ while conversion_progress.get('overall', 0) < 100:
124
+ time.sleep(0.1)
125
+ progress_bars = [
126
+ dbc.Progress(value=conversion_progress.get(filename, 0), label=f"{filename}: {conversion_progress.get(filename, 0):.0f}%", className="mb-3")
127
+ for filename in uploaded_files.keys()
128
+ ]
129
+ yield progress_bars, None
130
+
131
+ with io.BytesIO() as zip_buffer:
132
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
133
+ for filename, file_content in converted_files.items():
134
+ zip_file.writestr(filename, file_content.getvalue())
135
+
136
+ return [html.Div("Conversion complete! Downloading ZIP file...")], dcc.send_bytes(zip_buffer.getvalue(), "converted_files.zip")
137
+
138
+ if __name__ == '__main__':
139
+ print("Starting the Dash application...")
140
+ app.run(debug=True, host='0.0.0.0', port=7860)
141
+ print("Dash application has finished running.")