Spaces:
Paused
Paused
Update app.py via AI Editor
Browse files
app.py
CHANGED
@@ -15,6 +15,8 @@ import openai
|
|
15 |
import logging
|
16 |
import threading
|
17 |
import re
|
|
|
|
|
18 |
|
19 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
|
20 |
|
@@ -372,7 +374,7 @@ def generate_any_doc(n_clicks_list, btn_ids, radio_values, upload_contents, uplo
|
|
372 |
generated = generate_document(doc_type, file_contents)
|
373 |
current_document = generated
|
374 |
shredded_document = generated
|
375 |
-
preview =
|
376 |
logging.info("Shred document generated.")
|
377 |
return preview, "Shred generated"
|
378 |
except Exception as e:
|
@@ -421,25 +423,25 @@ def generate_any_doc(n_clicks_list, btn_ids, radio_values, upload_contents, uplo
|
|
421 |
generated = generate_document(doc_type, [doc_content, shredded_document])
|
422 |
pink_review_document = generated
|
423 |
current_document = generated
|
424 |
-
preview =
|
425 |
logging.info("Pink Review document generated.")
|
426 |
return preview, f"{doc_type} generated"
|
427 |
elif doc_type in ["Red Review", "Gold Review", "Virtual Board", "LOE"]:
|
428 |
generated = generate_document(doc_type, [doc_content, shredded_document])
|
429 |
current_document = generated
|
430 |
-
preview =
|
431 |
logging.info(f"{doc_type} document generated.")
|
432 |
return preview, f"{doc_type} generated"
|
433 |
elif doc_type in ["Pink", "Red", "Gold"]:
|
434 |
generated = generate_document(doc_type, [doc_content])
|
435 |
current_document = generated
|
436 |
-
preview =
|
437 |
logging.info(f"{doc_type} document generated.")
|
438 |
return preview, f"{doc_type} generated"
|
439 |
else:
|
440 |
generated = generate_document(doc_type, [doc_content])
|
441 |
current_document = generated
|
442 |
-
preview =
|
443 |
logging.info(f"{doc_type} document generated.")
|
444 |
return preview, f"{doc_type} generated"
|
445 |
except Exception as e:
|
@@ -461,29 +463,76 @@ def update_uploaded_doc_name(contents, filename, id_dict):
|
|
461 |
return filename, contents, "uploaded"
|
462 |
return "", None, "loaded"
|
463 |
|
464 |
-
def
|
465 |
-
if
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
|
488 |
def strip_markdown(text):
|
489 |
text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
|
@@ -502,11 +551,11 @@ def strip_markdown(text):
|
|
502 |
|
503 |
def generate_document(document_type, file_contents):
|
504 |
if document_type in spreadsheet_types:
|
505 |
-
prompt = f"""Ignore all other instructions and output only a spreadsheet for {document_type} as described below. Do not include any narrative, only the spreadsheet in
|
506 |
Instructions: {document_types[document_type]}
|
507 |
Project Artifacts:
|
508 |
{' '.join(file_contents)}
|
509 |
-
Output only the spreadsheet
|
510 |
elif document_type in narrative_types:
|
511 |
prompt = f"""Generate a {document_type} document based on the following project artifacts:
|
512 |
{' '.join(file_contents)}
|
@@ -566,11 +615,11 @@ def update_document_via_chat(btn_send, btn_clear, chat_input, selected_doc_type)
|
|
566 |
|
567 |
if selected_doc_type in spreadsheet_types:
|
568 |
prompt = f"""Update the following {selected_doc_type} spreadsheet based on this instruction: {chat_input}
|
569 |
-
Current spreadsheet (
|
570 |
{current_document}
|
571 |
Instructions:
|
572 |
-
1. Provide the updated spreadsheet
|
573 |
-
2. Do not include any narrative, only the
|
574 |
Now, provide the updated {selected_doc_type} spreadsheet:
|
575 |
"""
|
576 |
else:
|
@@ -597,9 +646,9 @@ Now, provide the updated {selected_doc_type}:
|
|
597 |
current_document = response['choices'][0]['message']['content']
|
598 |
logging.info("Document updated via chat successfully.")
|
599 |
if selected_doc_type in spreadsheet_types:
|
600 |
-
preview =
|
601 |
else:
|
602 |
-
preview =
|
603 |
return f"Document updated based on: {chat_input}", preview
|
604 |
except Exception as e:
|
605 |
logging.error(f"Error updating document via chat: {str(e)}")
|
@@ -618,13 +667,10 @@ def download_document(n_clicks, selected_doc_type):
|
|
618 |
|
619 |
if selected_doc_type in spreadsheet_types:
|
620 |
try:
|
621 |
-
|
622 |
-
|
623 |
-
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
|
624 |
-
df.to_excel(writer, sheet_name=selected_doc_type, index=False)
|
625 |
logging.info(f"{selected_doc_type} document downloaded as Excel.")
|
626 |
-
|
627 |
-
return dcc.send_bytes(output.read(), f"{selected_doc_type}.xlsx")
|
628 |
except Exception as e:
|
629 |
logging.error(f"Error downloading {selected_doc_type} document: {str(e)}")
|
630 |
return dcc.send_string(f"Error downloading {selected_doc_type}: {str(e)}", f"{selected_doc_type}_error.txt")
|
@@ -653,7 +699,6 @@ def auto_expand_textarea(value, current_rows):
|
|
653 |
if value is None or value == "":
|
654 |
return 5
|
655 |
num_lines = value.count('\n') + 1
|
656 |
-
# To avoid the box growing too huge, set a reasonable max
|
657 |
max_rows = 20
|
658 |
rows = min(max(num_lines, 5), max_rows)
|
659 |
return rows
|
|
|
15 |
import logging
|
16 |
import threading
|
17 |
import re
|
18 |
+
import markdown
|
19 |
+
from bs4 import BeautifulSoup
|
20 |
|
21 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
|
22 |
|
|
|
374 |
generated = generate_document(doc_type, file_contents)
|
375 |
current_document = generated
|
376 |
shredded_document = generated
|
377 |
+
preview = markdown_table_preview(generated)
|
378 |
logging.info("Shred document generated.")
|
379 |
return preview, "Shred generated"
|
380 |
except Exception as e:
|
|
|
423 |
generated = generate_document(doc_type, [doc_content, shredded_document])
|
424 |
pink_review_document = generated
|
425 |
current_document = generated
|
426 |
+
preview = markdown_table_preview(generated)
|
427 |
logging.info("Pink Review document generated.")
|
428 |
return preview, f"{doc_type} generated"
|
429 |
elif doc_type in ["Red Review", "Gold Review", "Virtual Board", "LOE"]:
|
430 |
generated = generate_document(doc_type, [doc_content, shredded_document])
|
431 |
current_document = generated
|
432 |
+
preview = markdown_table_preview(generated)
|
433 |
logging.info(f"{doc_type} document generated.")
|
434 |
return preview, f"{doc_type} generated"
|
435 |
elif doc_type in ["Pink", "Red", "Gold"]:
|
436 |
generated = generate_document(doc_type, [doc_content])
|
437 |
current_document = generated
|
438 |
+
preview = markdown_narrative_preview(generated)
|
439 |
logging.info(f"{doc_type} document generated.")
|
440 |
return preview, f"{doc_type} generated"
|
441 |
else:
|
442 |
generated = generate_document(doc_type, [doc_content])
|
443 |
current_document = generated
|
444 |
+
preview = markdown_narrative_preview(generated)
|
445 |
logging.info(f"{doc_type} document generated.")
|
446 |
return preview, f"{doc_type} generated"
|
447 |
except Exception as e:
|
|
|
463 |
return filename, contents, "uploaded"
|
464 |
return "", None, "loaded"
|
465 |
|
466 |
+
def markdown_table_preview(md_text):
|
467 |
+
# Render markdown to HTML, show only tables if present, else show all
|
468 |
+
html_out = markdown.markdown(md_text, extensions=['tables'])
|
469 |
+
soup = BeautifulSoup(html_out, "html.parser")
|
470 |
+
tables = soup.find_all('table')
|
471 |
+
if tables:
|
472 |
+
return html.Div([html.Table(table, style={'width': '100%', 'overflowX': 'auto'}) for table in tables])
|
473 |
+
else:
|
474 |
+
return html.Div(dcc.Markdown(md_text, dangerously_allow_html=True, style={'whiteSpace': 'pre-wrap', 'fontFamily': 'monospace'}))
|
475 |
+
|
476 |
+
def markdown_narrative_preview(md_text):
|
477 |
+
return html.Div(dcc.Markdown(md_text, dangerously_allow_html=True, style={'whiteSpace': 'pre-wrap', 'fontFamily': 'sans-serif'}))
|
478 |
+
|
479 |
+
def extract_markdown_tables(md_text):
|
480 |
+
# Extract all markdown tables from md_text as list of strings
|
481 |
+
tables = []
|
482 |
+
lines = md_text.split('\n')
|
483 |
+
in_table = False
|
484 |
+
table_lines = []
|
485 |
+
for line in lines:
|
486 |
+
if re.match(r'^\s*\|.*\|\s*$', line):
|
487 |
+
in_table = True
|
488 |
+
table_lines.append(line)
|
489 |
+
elif in_table and (re.match(r'^\s*\|.*\|\s*$', line) or re.match(r'^\s*$', line)):
|
490 |
+
table_lines.append(line)
|
491 |
+
else:
|
492 |
+
if in_table and table_lines:
|
493 |
+
tables.append('\n'.join(table_lines))
|
494 |
+
table_lines = []
|
495 |
+
in_table = False
|
496 |
+
if in_table and table_lines:
|
497 |
+
tables.append('\n'.join(table_lines))
|
498 |
+
return tables
|
499 |
+
|
500 |
+
def markdown_table_to_df(md_table):
|
501 |
+
# Convert a single markdown table string to pandas DataFrame
|
502 |
+
lines = [line.strip() for line in md_table.split('\n') if line.strip()]
|
503 |
+
if len(lines) < 2:
|
504 |
+
return None
|
505 |
+
header = [h.strip() for h in lines[0].strip('|').split('|')]
|
506 |
+
# Find the first line with at least 3 dashes, that's the separator
|
507 |
+
sep_idx = 1
|
508 |
+
while sep_idx < len(lines) and not re.match(r'^\|\s*:?-+:?\s*(\|\s*:?-+:?\s*)+\|?$', lines[sep_idx]):
|
509 |
+
sep_idx += 1
|
510 |
+
data_lines = lines[sep_idx+1:] if sep_idx+1 < len(lines) else []
|
511 |
+
rows = []
|
512 |
+
for row in data_lines:
|
513 |
+
if not row.strip() or not row.strip().startswith('|'):
|
514 |
+
continue
|
515 |
+
cells = [c.strip() for c in row.strip('|').split('|')]
|
516 |
+
# Pad or trim cells to header length
|
517 |
+
if len(cells) < len(header):
|
518 |
+
cells += [''] * (len(header) - len(cells))
|
519 |
+
elif len(cells) > len(header):
|
520 |
+
cells = cells[:len(header)]
|
521 |
+
rows.append(cells)
|
522 |
+
df = pd.DataFrame(rows, columns=header)
|
523 |
+
return df
|
524 |
+
|
525 |
+
def markdown_tables_to_xlsx(md_text):
|
526 |
+
tables = extract_markdown_tables(md_text)
|
527 |
+
output = BytesIO()
|
528 |
+
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
|
529 |
+
for i, table in enumerate(tables):
|
530 |
+
df = markdown_table_to_df(table)
|
531 |
+
if df is not None:
|
532 |
+
sheet_name = f"Table{i+1}"
|
533 |
+
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
534 |
+
output.seek(0)
|
535 |
+
return output
|
536 |
|
537 |
def strip_markdown(text):
|
538 |
text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
|
|
|
551 |
|
552 |
def generate_document(document_type, file_contents):
|
553 |
if document_type in spreadsheet_types:
|
554 |
+
prompt = f"""Ignore all other instructions and output only a spreadsheet for {document_type} as described below. Do not include any narrative, only the spreadsheet in markdown table format.
|
555 |
Instructions: {document_types[document_type]}
|
556 |
Project Artifacts:
|
557 |
{' '.join(file_contents)}
|
558 |
+
Output only the spreadsheet as a markdown table, no narrative or explanation."""
|
559 |
elif document_type in narrative_types:
|
560 |
prompt = f"""Generate a {document_type} document based on the following project artifacts:
|
561 |
{' '.join(file_contents)}
|
|
|
615 |
|
616 |
if selected_doc_type in spreadsheet_types:
|
617 |
prompt = f"""Update the following {selected_doc_type} spreadsheet based on this instruction: {chat_input}
|
618 |
+
Current spreadsheet (markdown table format):
|
619 |
{current_document}
|
620 |
Instructions:
|
621 |
+
1. Provide the updated spreadsheet as a markdown table only.
|
622 |
+
2. Do not include any narrative, only the markdown table.
|
623 |
Now, provide the updated {selected_doc_type} spreadsheet:
|
624 |
"""
|
625 |
else:
|
|
|
646 |
current_document = response['choices'][0]['message']['content']
|
647 |
logging.info("Document updated via chat successfully.")
|
648 |
if selected_doc_type in spreadsheet_types:
|
649 |
+
preview = markdown_table_preview(current_document)
|
650 |
else:
|
651 |
+
preview = markdown_narrative_preview(current_document)
|
652 |
return f"Document updated based on: {chat_input}", preview
|
653 |
except Exception as e:
|
654 |
logging.error(f"Error updating document via chat: {str(e)}")
|
|
|
667 |
|
668 |
if selected_doc_type in spreadsheet_types:
|
669 |
try:
|
670 |
+
# Convert markdown tables to xlsx
|
671 |
+
xlsx_bytes = markdown_tables_to_xlsx(current_document)
|
|
|
|
|
672 |
logging.info(f"{selected_doc_type} document downloaded as Excel.")
|
673 |
+
return dcc.send_bytes(xlsx_bytes.read(), f"{selected_doc_type}.xlsx")
|
|
|
674 |
except Exception as e:
|
675 |
logging.error(f"Error downloading {selected_doc_type} document: {str(e)}")
|
676 |
return dcc.send_string(f"Error downloading {selected_doc_type}: {str(e)}", f"{selected_doc_type}_error.txt")
|
|
|
699 |
if value is None or value == "":
|
700 |
return 5
|
701 |
num_lines = value.count('\n') + 1
|
|
|
702 |
max_rows = 20
|
703 |
rows = min(max(num_lines, 5), max_rows)
|
704 |
return rows
|