Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,47 @@
|
|
1 |
-
import
|
|
|
|
|
|
|
2 |
import pikepdf
|
3 |
import requests
|
4 |
import io
|
5 |
import tempfile
|
6 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def compress_pdf(input_file, url):
|
9 |
if input_file is None and (url is None or url.strip() == ""):
|
@@ -19,62 +57,75 @@ def compress_pdf(input_file, url):
|
|
19 |
pdf_content = io.BytesIO(response.content)
|
20 |
initial_size = len(response.content)
|
21 |
else:
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
else:
|
27 |
-
# If input_file is file-like object
|
28 |
-
pdf_content = io.BytesIO(input_file.read())
|
29 |
-
pdf_content.seek(0, io.SEEK_END)
|
30 |
-
initial_size = pdf_content.tell()
|
31 |
-
pdf_content.seek(0)
|
32 |
|
33 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
34 |
temp_file_path = temp_file.name
|
35 |
|
36 |
pdf = pikepdf.Pdf.open(pdf_content)
|
37 |
|
38 |
-
# Always use the highest compression setting
|
39 |
compression_params = dict(compress_streams=True, object_stream_mode=pikepdf.ObjectStreamMode.generate)
|
40 |
|
41 |
pdf.save(temp_file_path, **compression_params)
|
42 |
|
43 |
-
# Check the compression ratio achieved
|
44 |
compressed_size = os.path.getsize(temp_file_path)
|
45 |
compression_ratio = compressed_size / initial_size
|
46 |
compression_percentage = (1 - compression_ratio) * 100
|
47 |
|
48 |
-
# If compression increased file size or didn't meet minimum threshold, return original file
|
49 |
if compression_ratio >= 1 or compression_percentage < 5:
|
50 |
os.remove(temp_file_path)
|
51 |
-
return
|
52 |
|
53 |
return temp_file_path, f"PDF compressed successfully! Compression achieved: {compression_percentage:.2f}%"
|
54 |
except Exception as e:
|
55 |
return None, f"Error compressing PDF: {str(e)}"
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dash
|
2 |
+
from dash import dcc, html, Input, Output, State, callback
|
3 |
+
import dash_bootstrap_components as dbc
|
4 |
+
from dash.exceptions import PreventUpdate
|
5 |
import pikepdf
|
6 |
import requests
|
7 |
import io
|
8 |
import tempfile
|
9 |
import os
|
10 |
+
import base64
|
11 |
+
import threading
|
12 |
+
|
13 |
+
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
14 |
+
|
15 |
+
app.layout = dbc.Container([
|
16 |
+
html.H1("PDF Compressor", className="my-4"),
|
17 |
+
dbc.Card([
|
18 |
+
dbc.CardBody([
|
19 |
+
dcc.Upload(
|
20 |
+
id='upload-pdf',
|
21 |
+
children=html.Div([
|
22 |
+
'Drag and Drop or ',
|
23 |
+
html.A('Select PDF File')
|
24 |
+
]),
|
25 |
+
style={
|
26 |
+
'width': '100%',
|
27 |
+
'height': '60px',
|
28 |
+
'lineHeight': '60px',
|
29 |
+
'borderWidth': '1px',
|
30 |
+
'borderStyle': 'dashed',
|
31 |
+
'borderRadius': '5px',
|
32 |
+
'textAlign': 'center',
|
33 |
+
'margin': '10px'
|
34 |
+
},
|
35 |
+
multiple=False
|
36 |
+
),
|
37 |
+
dbc.Input(id="url-input", placeholder="Or enter PDF URL", type="text", className="mt-3"),
|
38 |
+
dbc.Button("Compress", id="compress-btn", color="primary", className="mt-3"),
|
39 |
+
dbc.Spinner(html.Div(id="compression-status"), color="primary", type="grow", className="mt-3"),
|
40 |
+
dcc.Download(id="download-pdf"),
|
41 |
+
dbc.Button("Download Compressed PDF", id="download-btn", color="success", className="mt-3", disabled=True),
|
42 |
+
])
|
43 |
+
]),
|
44 |
+
])
|
45 |
|
46 |
def compress_pdf(input_file, url):
|
47 |
if input_file is None and (url is None or url.strip() == ""):
|
|
|
57 |
pdf_content = io.BytesIO(response.content)
|
58 |
initial_size = len(response.content)
|
59 |
else:
|
60 |
+
content_type, content_string = input_file.split(',')
|
61 |
+
decoded = base64.b64decode(content_string)
|
62 |
+
pdf_content = io.BytesIO(decoded)
|
63 |
+
initial_size = len(decoded)
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
66 |
temp_file_path = temp_file.name
|
67 |
|
68 |
pdf = pikepdf.Pdf.open(pdf_content)
|
69 |
|
|
|
70 |
compression_params = dict(compress_streams=True, object_stream_mode=pikepdf.ObjectStreamMode.generate)
|
71 |
|
72 |
pdf.save(temp_file_path, **compression_params)
|
73 |
|
|
|
74 |
compressed_size = os.path.getsize(temp_file_path)
|
75 |
compression_ratio = compressed_size / initial_size
|
76 |
compression_percentage = (1 - compression_ratio) * 100
|
77 |
|
|
|
78 |
if compression_ratio >= 1 or compression_percentage < 5:
|
79 |
os.remove(temp_file_path)
|
80 |
+
return None, f"Unable to compress the PDF effectively. Original file returned. (Attempted compression: {compression_percentage:.2f}%)"
|
81 |
|
82 |
return temp_file_path, f"PDF compressed successfully! Compression achieved: {compression_percentage:.2f}%"
|
83 |
except Exception as e:
|
84 |
return None, f"Error compressing PDF: {str(e)}"
|
85 |
|
86 |
+
@callback(
|
87 |
+
Output("compression-status", "children"),
|
88 |
+
Output("download-btn", "disabled"),
|
89 |
+
Output("download-pdf", "data"),
|
90 |
+
Input("compress-btn", "n_clicks"),
|
91 |
+
Input("download-btn", "n_clicks"),
|
92 |
+
State("upload-pdf", "contents"),
|
93 |
+
State("url-input", "value"),
|
94 |
+
prevent_initial_call=True
|
95 |
+
)
|
96 |
+
def process_and_compress(compress_clicks, download_clicks, file_content, url):
|
97 |
+
ctx = dash.callback_context
|
98 |
+
if not ctx.triggered:
|
99 |
+
raise PreventUpdate
|
100 |
+
|
101 |
+
triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]
|
102 |
+
|
103 |
+
if triggered_id == "compress-btn":
|
104 |
+
if file_content is None and (url is None or url.strip() == ""):
|
105 |
+
return "Please provide either a file or a URL.", True, None
|
106 |
+
|
107 |
+
def compression_thread():
|
108 |
+
nonlocal file_content, url
|
109 |
+
output_file, message = compress_pdf(file_content, url)
|
110 |
+
if output_file:
|
111 |
+
with open(output_file, "rb") as file:
|
112 |
+
compressed_content = file.read()
|
113 |
+
os.remove(output_file)
|
114 |
+
return message, False, dcc.send_bytes(compressed_content, "compressed.pdf")
|
115 |
+
else:
|
116 |
+
return message, True, None
|
117 |
+
|
118 |
+
thread = threading.Thread(target=compression_thread)
|
119 |
+
thread.start()
|
120 |
+
thread.join()
|
121 |
+
return compression_thread()
|
122 |
+
|
123 |
+
elif triggered_id == "download-btn":
|
124 |
+
raise PreventUpdate
|
125 |
+
|
126 |
+
raise PreventUpdate
|
127 |
+
|
128 |
+
if __name__ == '__main__':
|
129 |
+
print("Starting the Dash application...")
|
130 |
+
app.run(debug=True, host='0.0.0.0', port=7860)
|
131 |
+
print("Dash application has finished running.")
|