Spaces:
Paused
Paused
import base64 | |
import io | |
import os | |
import threading | |
import time | |
from typing import List, Tuple | |
import re | |
import pandas as pd | |
from docx import Document | |
from io import BytesIO | |
import dash | |
import dash_bootstrap_components as dbc | |
from dash import html, dcc, Input, Output, State, ctx, dash_table | |
import google.generativeai as genai | |
from docx import Document | |
from docx.shared import Pt | |
from docx.enum.style import WD_STYLE_TYPE | |
from PyPDF2 import PdfReader | |
# Initialize Dash app | |
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
# Configure Gemini AI | |
genai.configure(api_key=os.environ["GEMINI_API_KEY"]) | |
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') | |
def process_document(contents: str, filename: str) -> str: | |
content_type, content_string = contents.split(',') | |
decoded = base64.b64decode(content_string) | |
if filename.endswith('.pdf'): | |
pdf = PdfReader(io.BytesIO(decoded)) | |
text = "" | |
for page in pdf.pages: | |
text += page.extract_text() | |
elif filename.endswith('.docx'): | |
doc = Document(io.BytesIO(decoded)) | |
text = "\n".join([para.text for para in doc.paragraphs]) | |
else: | |
return "Unsupported file format. Please upload a PDF or DOCX file." | |
return text | |
def generate_outline(text: str, instructions: str) -> str: | |
prompt = f""" | |
Analyze the following Project Work Statement (PWS) and create an outline | |
focusing on sections the indicate specific tasks and L&M (for compliance and writing guide). Extract the main headers, subheaders, and specific | |
requirements in each section. Pay special attention to requirements indicated | |
by words like "shall", "will", "must", and similar imperative language. | |
Additional instructions: {instructions} | |
Document text: | |
{text} | |
Provide the outline in a structured format, clearly highlighting the specific | |
requirements and their associated sections. | |
""" | |
response = model.generate_content(prompt) | |
return response.text | |
def generate_pink_team_document(outline: str, instructions: str) -> str: | |
prompt = f""" | |
Based on the following outline of a Project Work Statement (PWS): | |
{outline} | |
Additional instructions: {instructions} | |
Create a detailed response document as if MicroHealth is responding to this PWS. | |
Follow these guidelines: | |
1. Use Wikipedia style writing with active voice. Be firm with the approach, no soft words like could be, may be, should, might. Use definitve language. | |
2. For each requirement, describe in detail how MicroHealth will innovate to address it. | |
3. Explain the industry best practices that will be applied and the workflow to accomplish the steps in the best practice to address the requirement. | |
4. Provide measurable outcomes for the customer. | |
5. Limit the use of bullet points and write predominantly in paragraph format. | |
6. Ensure a logical flow of steps taken by MicroHealth for each requirement. | |
Generate a comprehensive response that showcases MicroHealth's expertise and approach. | |
""" | |
response = model.generate_content(prompt) | |
return response.text | |
def evaluate_compliance(document: str, requirements: str) -> str: | |
prompt = f""" | |
Evaluate the following document against the requirements from sections L&M of the PWS: | |
Document: | |
{document} | |
Requirements: | |
{requirements} | |
Provide a compliance report by section number, highlighting: | |
1. Areas that need improvement | |
2. Suggestions on how MicroHealth can better respond to the requirements | |
3. Best industry practices that should be applied | |
4. Measurable outcomes that should be included | |
5. Organize by document section headers and numbers | |
Format the report clearly by section number. | |
""" | |
response = model.generate_content(prompt) | |
return response.text | |
def generate_red_document(document: str, compliance_report: str) -> str: | |
prompt = f""" | |
Based on the following document and compliance report: | |
Original Document: | |
{document} | |
Compliance Report: | |
{compliance_report} | |
Generate a revised "Red Team" document that addresses all issues found in the compliance report. | |
Follow these guidelines: | |
1. Use Wikipedia style writing with active voice. Be firm with the approach, no soft words like could be, may be, should, might. Use definitve language. | |
2. For each requirement, describe in detail how MicroHealth will innovate to address it. | |
3. Explain the industry best practices that will be applied and the workflow to accomplish the steps in the best practice to address the requirement. | |
4. Provide measurable outcomes for the customer. | |
5. Limit the use of bullet points and write predominantly in paragraph format. | |
6. Ensure a logical flow of steps taken by MicroHealth for each requirement. | |
Provide the revised document in a clear, structured format using paragraphs. | |
Limit the use of bullet points and write predominantly in paragraph format. | |
Ensure a logical flow of steps taken by MicroHealth for each requirement. | |
""" | |
response = model.generate_content(prompt) | |
return response.text | |
ddef generate_loe(document: str) -> Tuple[str, pd.DataFrame]: | |
prompt = f""" | |
Analyze the following document and provide a Level of Effort (LOE) breakdown: | |
Document: | |
{document} | |
For each section header in the document: | |
1. Identify the tasks to be completed | |
2. Determine the appropriate labor categories for each task | |
3. Estimate the number of hours required for each labor category to complete the task | |
Provide a detailed breakdown and then summarize the information in a tabular format with the following columns: | |
- Task Summary | |
- Labor Categories | |
- Hours per Labor Category | |
- Total Hours | |
Present the detailed breakdown first, followed by the summary table. | |
Ensure the table is properly formatted with | as column separators and a header row. | |
""" | |
response = model.generate_content(prompt) | |
# Extract the table from the response | |
table_start = response.text.find("| Task Summary |") | |
table_end = response.text.find("\n\n", table_start) | |
table_text = response.text[table_start:table_end] | |
# Convert the table to a pandas DataFrame | |
try: | |
df = pd.read_csv(StringIO(table_text), sep='|', skipinitialspace=True).dropna(axis=1, how='all') | |
df.columns = df.columns.str.strip() | |
except pd.errors.EmptyDataError: | |
# If no table is found or it's empty, create a default DataFrame | |
df = pd.DataFrame(columns=['Task Summary', 'Labor Categories', 'Hours per Labor Category', 'Total Hours']) | |
response.text += "\n\nNote: No detailed LOE table could be generated from the AI response." | |
return response.text, df | |
# Layout | |
app.layout = dbc.Container([ | |
html.H1("MicroHealth PWS Analysis and Response Generator", className="my-4"), | |
dbc.Tabs([ | |
dbc.Tab(label="Shred", tab_id="shred", children=[ | |
dbc.Textarea( | |
id='shred-instructions', | |
placeholder="Enter any additional instructions for shredding the document...", | |
style={'height': '100px', 'marginBottom': '10px'} | |
), | |
dcc.Upload( | |
id='upload-document', | |
children=html.Div(['Drag and Drop or ', html.A('Select Files')]), | |
style={ | |
'width': '100%', | |
'height': '60px', | |
'lineHeight': '60px', | |
'borderWidth': '1px', | |
'borderStyle': 'dashed', | |
'borderRadius': '5px', | |
'textAlign': 'center', | |
'margin': '10px' | |
}, | |
multiple=False | |
), | |
dbc.Spinner(html.Div(id='shred-output')), | |
dbc.Button("Download Outline", id="download-shred", className="mt-3"), | |
dcc.Download(id="download-shred-doc") | |
]), | |
dbc.Tab(label="Pink", tab_id="pink", children=[ | |
dbc.Textarea( | |
id='pink-instructions', | |
placeholder="Enter any additional instructions for generating the Pink Team document...", | |
style={'height': '100px', 'marginBottom': '10px'} | |
), | |
dbc.Button("Generate Pink Team Document", id="generate-pink", className="mt-3"), | |
dbc.Spinner(html.Div(id='pink-output')), | |
dbc.Button("Download Pink Team Document", id="download-pink", className="mt-3"), | |
dcc.Download(id="download-pink-doc") | |
]), | |
dbc.Tab(label="P.Review", tab_id="p-review", children=[ | |
dcc.Upload( | |
id='upload-p-review', | |
children=html.Div(['Drag and Drop or ', html.A('Select Files')]), | |
style={ | |
'width': '100%', | |
'height': '60px', | |
'lineHeight': '60px', | |
'borderWidth': '1px', | |
'borderStyle': 'dashed', | |
'borderRadius': '5px', | |
'textAlign': 'center', | |
'margin': '10px' | |
}, | |
multiple=False | |
), | |
dbc.Button("Evaluate Compliance", id="evaluate-p-review", className="mt-3"), | |
dbc.Spinner(html.Div(id='p-review-output')), | |
dbc.Button("Download P.Review Report", id="download-p-review", className="mt-3"), | |
dcc.Download(id="download-p-review-doc") | |
]), | |
dbc.Tab(label="Red", tab_id="red", children=[ | |
dcc.Upload( | |
id='upload-red', | |
children=html.Div(['Drag and Drop or ', html.A('Select Files')]), | |
style={ | |
'width': '100%', | |
'height': '60px', | |
'lineHeight': '60px', | |
'borderWidth': '1px', | |
'borderStyle': 'dashed', | |
'borderRadius': '5px', | |
'textAlign': 'center', | |
'margin': '10px' | |
}, | |
multiple=False | |
), | |
dbc.Button("Generate Red Team Document", id="generate-red", className="mt-3"), | |
dbc.Spinner(html.Div(id='red-output')), | |
dbc.Button("Download Red Team Document", id="download-red", className="mt-3"), | |
dcc.Download(id="download-red-doc") | |
]), | |
dbc.Tab(label="R.Review", tab_id="r-review", children=[ | |
dcc.Upload( | |
id='upload-r-review', | |
children=html.Div(['Drag and Drop or ', html.A('Select Files')]), | |
style={ | |
'width': '100%', | |
'height': '60px', | |
'lineHeight': '60px', | |
'borderWidth': '1px', | |
'borderStyle': 'dashed', | |
'borderRadius': '5px', | |
'textAlign': 'center', | |
'margin': '10px' | |
}, | |
multiple=False | |
), | |
dbc.Button("Evaluate Compliance", id="evaluate-r-review", className="mt-3"), | |
dbc.Spinner(html.Div(id='r-review-output')), | |
dbc.Button("Download R.Review Report", id="download-r-review", className="mt-3"), | |
dcc.Download(id="download-r-review-doc") | |
]), | |
dbc.Tab(label="G.Review", tab_id="g-review", children=[ | |
dcc.Upload( | |
id='upload-g-review', | |
children=html.Div(['Drag and Drop or ', html.A('Select Files')]), | |
style={ | |
'width': '100%', | |
'height': '60px', | |
'lineHeight': '60px', | |
'borderWidth': '1px', | |
'borderStyle': 'dashed', | |
'borderRadius': '5px', | |
'textAlign': 'center', | |
'margin': '10px' | |
}, | |
multiple=False | |
), | |
dbc.Button("Evaluate Compliance", id="evaluate-g-review", className="mt-3"), | |
dbc.Spinner(html.Div(id='g-review-output')), | |
dbc.Button("Download G.Review Report", id="download-g-review", className="mt-3"), | |
dcc.Download(id="download-g-review-doc") | |
]), | |
dbc.Tab(label="LOE", tab_id="loe", children=[ | |
dbc.Button("Generate LOE", id="generate-loe", className="mt-3"), | |
dbc.Spinner(html.Div(id='loe-output')), | |
dbc.Button("Download LOE Report", id="download-loe", className="mt-3"), | |
dcc.Download(id="download-loe-doc") | |
]), | |
], id="tabs", active_tab="shred"), | |
]) | |
def update_shred_output(contents, filename, instructions): | |
if contents is None: | |
return "Upload a document to begin." | |
text = process_document(contents, filename) | |
outline = generate_outline(text, instructions or "") | |
return dcc.Markdown(outline) | |
def update_pink_output(n_clicks, shred_output, instructions): | |
if n_clicks is None or shred_output is None: | |
return "Generate an outline in the Shred tab first." | |
pink_doc = generate_pink_team_document(shred_output, instructions or "") | |
return dcc.Markdown(pink_doc) | |
def update_p_review_output(n_clicks, contents, filename, pink_doc, requirements): | |
if n_clicks is None: | |
return "Click 'Evaluate Compliance' to begin." | |
if contents: | |
document = process_document(contents, filename) | |
elif pink_doc: | |
document = pink_doc | |
else: | |
return "Please upload a document or generate a Pink Team document first." | |
compliance_report = evaluate_compliance(document, requirements) | |
return dcc.Markdown(compliance_report) | |
def update_g_review_output(n_clicks, contents, filename, requirements): | |
if n_clicks is None: | |
return "Click 'Evaluate Compliance' to begin." | |
if contents is None: | |
return "Please upload a document first." | |
document = process_document(contents, filename) | |
compliance_report = evaluate_compliance(document, requirements) | |
return dcc.Markdown(compliance_report) | |
def update_loe_output(n_clicks, shred_output): | |
if n_clicks is None: | |
return "Click 'Generate LOE' to begin." | |
if shred_output is None: | |
return "Please complete the Shred tab first." | |
loe_text, loe_df = generate_loe(shred_output) | |
return [ | |
dcc.Markdown(loe_text), | |
dash_table.DataTable( | |
data=loe_df.to_dict('records'), | |
columns=[{'name': i, 'id': i} for i in loe_df.columns], | |
style_table={'overflowX': 'auto'}, | |
style_cell={'textAlign': 'left', 'padding': '5px'}, | |
style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'} | |
) | |
] | |
def update_red_output(n_clicks, contents, filename, p_review_output): | |
if n_clicks is None: | |
return "Click 'Generate Red Team Document' to begin." | |
if contents: | |
document = process_document(contents, filename) | |
elif p_review_output: | |
document = p_review_output | |
else: | |
return "Please upload a document or complete the P.Review first." | |
red_doc = generate_red_document(document, p_review_output) | |
return dcc.Markdown(red_doc) | |
def update_r_review_output(n_clicks, contents, filename, red_doc, requirements): | |
if n_clicks is None: | |
return "Click 'Evaluate Compliance' to begin." | |
if contents: | |
document = process_document(contents, filename) | |
elif red_doc: | |
document = red_doc | |
else: | |
return "Please upload a document or generate a Red Team document first." | |
compliance_report = evaluate_compliance(document, requirements) | |
return dcc.Markdown(compliance_report) | |
def parse_markdown(doc, content): | |
# Split content into paragraphs | |
paragraphs = content.split('\n\n') | |
for para in paragraphs: | |
# Check for headers | |
header_match = re.match(r'^(#{1,6})\s+(.+)$', para) | |
if header_match: | |
level = len(header_match.group(1)) | |
text = header_match.group(2) | |
doc.add_heading(text, level=level) | |
else: | |
p = doc.add_paragraph() | |
# Split paragraph into runs | |
runs = re.split(r'(\*\*|\*|__|\~\~)', para) | |
is_bold = is_italic = is_underline = is_strikethrough = False | |
for run in runs: | |
if run == '**' or run == '__': | |
is_bold = not is_bold | |
elif run == '*': | |
is_italic = not is_italic | |
elif run == '~~': | |
is_strikethrough = not is_strikethrough | |
else: | |
r = p.add_run(run) | |
r.bold = is_bold | |
r.italic = is_italic | |
r.underline = is_underline | |
r.font.strike = is_strikethrough | |
def create_docx(content): | |
doc = Document() | |
# Add styles | |
styles = doc.styles | |
style_names = [style.name for style in styles] | |
if 'Code' not in style_names: | |
code_style = styles.add_style('Code', WD_STYLE_TYPE.PARAGRAPH) | |
code_font = code_style.font | |
code_font.name = 'Courier New' | |
code_font.size = Pt(10) | |
parse_markdown(doc, content) | |
return doc | |
def download_shred(n_clicks, shred_output): | |
if shred_output is None: | |
return dash.no_update | |
content = shred_output['props']['children'] if isinstance(shred_output, dict) else str(shred_output) | |
doc = create_docx(content) | |
buffer = BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return dcc.send_bytes(buffer.getvalue(), "shred_outline.docx") | |
def download_pink(n_clicks, pink_output): | |
if pink_output is None: | |
return dash.no_update | |
content = pink_output['props']['children'] if isinstance(pink_output, dict) else str(pink_output) | |
doc = create_docx(content) | |
buffer = BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return dcc.send_bytes(buffer.getvalue(), "pink_team_document.docx") | |
def download_p_review(n_clicks, p_review_output): | |
if p_review_output is None: | |
return dash.no_update | |
content = p_review_output['props']['children'] if isinstance(p_review_output, dict) else str(p_review_output) | |
doc = create_docx(content) | |
buffer = BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return dcc.send_bytes(buffer.getvalue(), "p_review_report.docx") | |
def download_red(n_clicks, red_output): | |
if red_output is None: | |
return dash.no_update | |
content = red_output['props']['children'] if isinstance(red_output, dict) else str(red_output) | |
doc = create_docx(content) | |
buffer = BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return dcc.send_bytes(buffer.getvalue(), "red_team_document.docx") | |
def download_r_review(n_clicks, r_review_output): | |
if r_review_output is None: | |
return dash.no_update | |
content = r_review_output['props']['children'] if isinstance(r_review_output, dict) else str(r_review_output) | |
doc = create_docx(content) | |
buffer = BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return dcc.send_bytes(buffer.getvalue(), "r_review_report.docx") | |
def download_g_review(n_clicks, g_review_output): | |
if g_review_output is None: | |
return dash.no_update | |
content = g_review_output['props']['children'] if isinstance(g_review_output, dict) else str(g_review_output) | |
doc = create_docx(content) | |
buffer = BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return dcc.send_bytes(buffer.getvalue(), "g_review_report.docx") | |
def download_loe(n_clicks, loe_output): | |
if loe_output is None: | |
return dash.no_update | |
if isinstance(loe_output, list) and len(loe_output) > 0: | |
content = loe_output[0]['props']['children'] if isinstance(loe_output[0], dict) else str(loe_output[0]) | |
else: | |
content = str(loe_output) | |
doc = create_docx(content) | |
buffer = BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return dcc.send_bytes(buffer.getvalue(), "loe_report.docx") | |
if __name__ == '__main__': | |
print("Starting the Dash application...") | |
app.run(debug=True, host='0.0.0.0', port=7860) | |
print("Dash application has finished running.") |