Spaces:
Paused
Paused
File size: 4,286 Bytes
a736130 a166383 a736130 171b356 a736130 171b356 a736130 e777af8 a736130 e777af8 a736130 171b356 49e035d e777af8 49e035d a736130 e777af8 a736130 e777af8 a736130 e777af8 1879f8f a736130 a166383 a736130 171b356 a736130 e777af8 a166383 a736130 e777af8 a166383 a736130 e777af8 a736130 e777af8 a736130 a166383 a736130 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import base64
import io
import os
import zipfile
from dash import Dash, dcc, html, Input, Output, State, callback_context, no_update
import dash_bootstrap_components as dbc
from docx import Document
import markdown
import threading
import time
import PyPDF2
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = dbc.Container([
html.H1("Auto-Wiki", className="my-4"),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=True,
accept='.docx,.pdf'
),
html.Div(id='upload-output'),
dbc.Spinner(html.Div(id="upload-spinner"), color="primary", type="grow", style={"display": "none"}),
dbc.Spinner(html.Div(id="conversion-spinner"), color="secondary", type="grow", style={"display": "none"}),
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
dcc.Download(id="download-zip")
])
def process_docx(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
doc = Document(io.BytesIO(decoded))
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
return '\n\n'.join(full_text)
def process_pdf(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
pdf_file = io.BytesIO(decoded)
pdf_reader = PyPDF2.PdfReader(pdf_file)
full_text = []
for page in pdf_reader.pages:
full_text.append(page.extract_text())
return '\n\n'.join(full_text)
def process_files(contents, filenames):
processed_files = []
for c, n in zip(contents, filenames):
if n.lower().endswith('.docx'):
text = process_docx(c, n)
elif n.lower().endswith('.pdf'):
text = process_pdf(c, n)
else:
continue # Skip unsupported file types
md = markdown.markdown(text)
processed_files.append((n.replace('.docx', '.md').replace('.pdf', '.md'), md))
time.sleep(0.1) # Simulate processing time
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for name, content in processed_files:
zip_file.writestr(name, content)
return zip_buffer.getvalue()
@app.callback(
[Output('upload-output', 'children'),
Output('convert-button', 'disabled'),
Output('upload-spinner', 'style'),
Output('conversion-spinner', 'style'),
Output('download-zip', 'data')],
[Input('upload-data', 'contents'),
Input('upload-data', 'filename'),
Input('convert-button', 'n_clicks')],
[State('upload-data', 'contents'),
State('upload-data', 'filename')]
)
def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames):
ctx = callback_context
if not ctx.triggered:
return no_update
if ctx.triggered[0]['prop_id'] == 'upload-data.contents':
if list_of_contents is not None:
children = [
html.Div([
html.H5(f"File uploaded: {name}"),
html.Hr()
]) for name in list_of_names
]
return children, False, {"display": "none"}, {"display": "none"}, None
return no_update
if ctx.triggered[0]['prop_id'] == 'convert-button.n_clicks':
if n_clicks is None or not contents:
return no_update
def process_and_download():
zip_data = process_files(contents, filenames)
return dcc.send_bytes(zip_data, "converted_files.zip")
return no_update, True, {"display": "none"}, {"display": "block"}, process_and_download()
return no_update
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
print("Dash application has finished running.") |