auto-wiki / app.py
bluenevus's picture
Update app.py
e777af8 verified
raw
history blame
4.29 kB
import base64
import io
import os
import zipfile
from dash import Dash, dcc, html, Input, Output, State, callback_context, no_update
import dash_bootstrap_components as dbc
from docx import Document
import markdown
import threading
import time
import PyPDF2
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = dbc.Container([
html.H1("Auto-Wiki", className="my-4"),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=True,
accept='.docx,.pdf'
),
html.Div(id='upload-output'),
dbc.Spinner(html.Div(id="upload-spinner"), color="primary", type="grow", style={"display": "none"}),
dbc.Spinner(html.Div(id="conversion-spinner"), color="secondary", type="grow", style={"display": "none"}),
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
dcc.Download(id="download-zip")
])
def process_docx(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
doc = Document(io.BytesIO(decoded))
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
return '\n\n'.join(full_text)
def process_pdf(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
pdf_file = io.BytesIO(decoded)
pdf_reader = PyPDF2.PdfReader(pdf_file)
full_text = []
for page in pdf_reader.pages:
full_text.append(page.extract_text())
return '\n\n'.join(full_text)
def process_files(contents, filenames):
processed_files = []
for c, n in zip(contents, filenames):
if n.lower().endswith('.docx'):
text = process_docx(c, n)
elif n.lower().endswith('.pdf'):
text = process_pdf(c, n)
else:
continue # Skip unsupported file types
md = markdown.markdown(text)
processed_files.append((n.replace('.docx', '.md').replace('.pdf', '.md'), md))
time.sleep(0.1) # Simulate processing time
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for name, content in processed_files:
zip_file.writestr(name, content)
return zip_buffer.getvalue()
@app.callback(
[Output('upload-output', 'children'),
Output('convert-button', 'disabled'),
Output('upload-spinner', 'style'),
Output('conversion-spinner', 'style'),
Output('download-zip', 'data')],
[Input('upload-data', 'contents'),
Input('upload-data', 'filename'),
Input('convert-button', 'n_clicks')],
[State('upload-data', 'contents'),
State('upload-data', 'filename')]
)
def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames):
ctx = callback_context
if not ctx.triggered:
return no_update
if ctx.triggered[0]['prop_id'] == 'upload-data.contents':
if list_of_contents is not None:
children = [
html.Div([
html.H5(f"File uploaded: {name}"),
html.Hr()
]) for name in list_of_names
]
return children, False, {"display": "none"}, {"display": "none"}, None
return no_update
if ctx.triggered[0]['prop_id'] == 'convert-button.n_clicks':
if n_clicks is None or not contents:
return no_update
def process_and_download():
zip_data = process_files(contents, filenames)
return dcc.send_bytes(zip_data, "converted_files.zip")
return no_update, True, {"display": "none"}, {"display": "block"}, process_and_download()
return no_update
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
print("Dash application has finished running.")