Spaces:

MicroHealth
/

proposal-writer

Paused

File size: 22,913 Bytes

import base64
import io
import os
import threading
import time
from typing import List, Tuple
import re
import pandas as pd
from docx import Document
from io import BytesIO
import dash
import dash_bootstrap_components as dbc
from dash import html, dcc, Input, Output, State, ctx, dash_table
import google.generativeai as genai
from docx import Document
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE
from PyPDF2 import PdfReader

# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Configure Gemini AI
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')

def process_document(contents: str, filename: str) -> str:
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    
    if filename.endswith('.pdf'):
        pdf = PdfReader(io.BytesIO(decoded))
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
    elif filename.endswith('.docx'):
        doc = Document(io.BytesIO(decoded))
        text = "\n".join([para.text for para in doc.paragraphs])
    else:
        return "Unsupported file format. Please upload a PDF or DOCX file."
    
    return text

def generate_outline(text: str, instructions: str) -> str:
    prompt = f"""
    Analyze the following Project Work Statement (PWS) and create an outline 
    focusing on sections the indicate specific tasks and L&M (for compliance and writing guide). Extract the main headers, subheaders, and specific 
    requirements in each section. Pay special attention to requirements indicated 
    by words like "shall", "will", "must", and similar imperative language.

    Additional instructions: {instructions}

    Document text:
    {text}

    Provide the outline in a structured format, clearly highlighting the specific 
    requirements and their associated sections.
    """
    response = model.generate_content(prompt)
    return response.text

def generate_pink_team_document(outline: str, instructions: str) -> str:
    prompt = f"""
    Based on the following outline of a Project Work Statement (PWS):

    {outline}

    Additional instructions: {instructions}

    Create a detailed response document as if MicroHealth is responding to this PWS. 
    Follow these guidelines:
    1. Use Wikipedia style writing with active voice.  Be firm with the approach, no soft words like could be, may be, should, might.  Use definitve language.
    2. For each requirement, describe in detail how MicroHealth will innovate to address it.
    3. Explain the industry best practices that will be applied and the workflow to accomplish the steps in the best practice to address the requirement.
    4. Provide measurable outcomes for the customer.
    5. Limit the use of bullet points and write predominantly in paragraph format.
    6. Ensure a logical flow of steps taken by MicroHealth for each requirement.

    Generate a comprehensive response that showcases MicroHealth's expertise and approach.
    """
    response = model.generate_content(prompt)
    return response.text

def evaluate_compliance(document: str, requirements: str) -> str:
    prompt = f"""
    Evaluate the following document against the requirements from sections L&M of the PWS:

    Document:
    {document}

    Requirements:
    {requirements}

    Provide a compliance report by section number, highlighting:
    1. Areas that need improvement
    2. Suggestions on how MicroHealth can better respond to the requirements
    3. Best industry practices that should be applied
    4. Measurable outcomes that should be included
    5. Organize by document section headers and numbers

    Format the report clearly by section number.
    """
    response = model.generate_content(prompt)
    return response.text

def generate_red_document(document: str, compliance_report: str) -> str:
    prompt = f"""
    Based on the following document and compliance report:

    Original Document:
    {document}

    Compliance Report:
    {compliance_report}

    Generate a revised "Red Team" document that addresses all issues found in the compliance report. 
    Follow these guidelines:
    1. Use Wikipedia style writing with active voice.  Be firm with the approach, no soft words like could be, may be, should, might.  Use definitve language.
    2. For each requirement, describe in detail how MicroHealth will innovate to address it.
    3. Explain the industry best practices that will be applied and the workflow to accomplish the steps in the best practice to address the requirement.
    4. Provide measurable outcomes for the customer.
    5. Limit the use of bullet points and write predominantly in paragraph format.
    6. Ensure a logical flow of steps taken by MicroHealth for each requirement.

    Provide the revised document in a clear, structured format using paragraphs.
    Limit the use of bullet points and write predominantly in paragraph format.
    Ensure a logical flow of steps taken by MicroHealth for each requirement.
    """
    response = model.generate_content(prompt)
    return response.text

ddef generate_loe(document: str) -> Tuple[str, pd.DataFrame]:
    prompt = f"""
    Analyze the following document and provide a Level of Effort (LOE) breakdown:

    Document:
    {document}

    For each section header in the document:
    1. Identify the tasks to be completed
    2. Determine the appropriate labor categories for each task
    3. Estimate the number of hours required for each labor category to complete the task

    Provide a detailed breakdown and then summarize the information in a tabular format with the following columns:
    - Task Summary
    - Labor Categories
    - Hours per Labor Category
    - Total Hours

    Present the detailed breakdown first, followed by the summary table.
    Ensure the table is properly formatted with | as column separators and a header row.
    """
    response = model.generate_content(prompt)
    
    # Extract the table from the response
    table_start = response.text.find("| Task Summary |")
    table_end = response.text.find("\n\n", table_start)
    table_text = response.text[table_start:table_end]
    
    # Convert the table to a pandas DataFrame
    try:
        df = pd.read_csv(StringIO(table_text), sep='|', skipinitialspace=True).dropna(axis=1, how='all')
        df.columns = df.columns.str.strip()
    except pd.errors.EmptyDataError:
        # If no table is found or it's empty, create a default DataFrame
        df = pd.DataFrame(columns=['Task Summary', 'Labor Categories', 'Hours per Labor Category', 'Total Hours'])
        response.text += "\n\nNote: No detailed LOE table could be generated from the AI response."
    
    return response.text, df

# Layout
app.layout = dbc.Container([
    html.H1("MicroHealth PWS Analysis and Response Generator", className="my-4"),
    dbc.Tabs([
        dbc.Tab(label="Shred", tab_id="shred", children=[
            dbc.Textarea(
                id='shred-instructions',
                placeholder="Enter any additional instructions for shredding the document...",
                style={'height': '100px', 'marginBottom': '10px'}
            ),
            dcc.Upload(
                id='upload-document',
                children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
                style={
                    'width': '100%',
                    'height': '60px',
                    'lineHeight': '60px',
                    'borderWidth': '1px',
                    'borderStyle': 'dashed',
                    'borderRadius': '5px',
                    'textAlign': 'center',
                    'margin': '10px'
                },
                multiple=False
            ),
            dbc.Spinner(html.Div(id='shred-output')),
            dbc.Button("Download Outline", id="download-shred", className="mt-3"),
            dcc.Download(id="download-shred-doc")
        ]),
        dbc.Tab(label="Pink", tab_id="pink", children=[
            dbc.Textarea(
                id='pink-instructions',
                placeholder="Enter any additional instructions for generating the Pink Team document...",
                style={'height': '100px', 'marginBottom': '10px'}
            ),
            dbc.Button("Generate Pink Team Document", id="generate-pink", className="mt-3"),
            dbc.Spinner(html.Div(id='pink-output')),
            dbc.Button("Download Pink Team Document", id="download-pink", className="mt-3"),
            dcc.Download(id="download-pink-doc")
        ]),
        dbc.Tab(label="P.Review", tab_id="p-review", children=[
            dcc.Upload(
                id='upload-p-review',
                children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
                style={
                    'width': '100%',
                    'height': '60px',
                    'lineHeight': '60px',
                    'borderWidth': '1px',
                    'borderStyle': 'dashed',
                    'borderRadius': '5px',
                    'textAlign': 'center',
                    'margin': '10px'
                },
                multiple=False
            ),
            dbc.Button("Evaluate Compliance", id="evaluate-p-review", className="mt-3"),
            dbc.Spinner(html.Div(id='p-review-output')),
            dbc.Button("Download P.Review Report", id="download-p-review", className="mt-3"),
            dcc.Download(id="download-p-review-doc")
        ]),
        dbc.Tab(label="Red", tab_id="red", children=[
            dcc.Upload(
                id='upload-red',
                children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
                style={
                    'width': '100%',
                    'height': '60px',
                    'lineHeight': '60px',
                    'borderWidth': '1px',
                    'borderStyle': 'dashed',
                    'borderRadius': '5px',
                    'textAlign': 'center',
                    'margin': '10px'
                },
                multiple=False
            ),
            dbc.Button("Generate Red Team Document", id="generate-red", className="mt-3"),
            dbc.Spinner(html.Div(id='red-output')),
            dbc.Button("Download Red Team Document", id="download-red", className="mt-3"),
            dcc.Download(id="download-red-doc")
        ]),
        dbc.Tab(label="R.Review", tab_id="r-review", children=[
            dcc.Upload(
                id='upload-r-review',
                children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
                style={
                    'width': '100%',
                    'height': '60px',
                    'lineHeight': '60px',
                    'borderWidth': '1px',
                    'borderStyle': 'dashed',
                    'borderRadius': '5px',
                    'textAlign': 'center',
                    'margin': '10px'
                },
                multiple=False
            ),
            dbc.Button("Evaluate Compliance", id="evaluate-r-review", className="mt-3"),
            dbc.Spinner(html.Div(id='r-review-output')),
            dbc.Button("Download R.Review Report", id="download-r-review", className="mt-3"),
            dcc.Download(id="download-r-review-doc")
        ]),
        dbc.Tab(label="G.Review", tab_id="g-review", children=[
            dcc.Upload(
                id='upload-g-review',
                children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
                style={
                    'width': '100%',
                    'height': '60px',
                    'lineHeight': '60px',
                    'borderWidth': '1px',
                    'borderStyle': 'dashed',
                    'borderRadius': '5px',
                    'textAlign': 'center',
                    'margin': '10px'
                },
                multiple=False
            ),
            dbc.Button("Evaluate Compliance", id="evaluate-g-review", className="mt-3"),
            dbc.Spinner(html.Div(id='g-review-output')),
            dbc.Button("Download G.Review Report", id="download-g-review", className="mt-3"),
            dcc.Download(id="download-g-review-doc")
        ]),
        dbc.Tab(label="LOE", tab_id="loe", children=[
            dbc.Button("Generate LOE", id="generate-loe", className="mt-3"),
            dbc.Spinner(html.Div(id='loe-output')),
            dbc.Button("Download LOE Report", id="download-loe", className="mt-3"),
            dcc.Download(id="download-loe-doc")
        ]),
    ], id="tabs", active_tab="shred"),
])

@app.callback(
    Output('shred-output', 'children'),
    Input('upload-document', 'contents'),
    State('upload-document', 'filename'),
    State('shred-instructions', 'value')
)
def update_shred_output(contents, filename, instructions):
    if contents is None:
        return "Upload a document to begin."
    
    text = process_document(contents, filename)
    outline = generate_outline(text, instructions or "")
    return dcc.Markdown(outline)

@app.callback(
    Output('pink-output', 'children'),
    Input('generate-pink', 'n_clicks'),
    State('shred-output', 'children'),
    State('pink-instructions', 'value')
)
def update_pink_output(n_clicks, shred_output, instructions):
    if n_clicks is None or shred_output is None:
        return "Generate an outline in the Shred tab first."
    
    pink_doc = generate_pink_team_document(shred_output, instructions or "")
    return dcc.Markdown(pink_doc)

@app.callback(
    Output('p-review-output', 'children'),
    Input('evaluate-p-review', 'n_clicks'),
    State('upload-p-review', 'contents'),
    State('upload-p-review', 'filename'),
    State('pink-output', 'children'),
    State('shred-output', 'children')
)
def update_p_review_output(n_clicks, contents, filename, pink_doc, requirements):
    if n_clicks is None:
        return "Click 'Evaluate Compliance' to begin."
    
    if contents:
        document = process_document(contents, filename)
    elif pink_doc:
        document = pink_doc
    else:
        return "Please upload a document or generate a Pink Team document first."
    
    compliance_report = evaluate_compliance(document, requirements)
    return dcc.Markdown(compliance_report)

@app.callback(
    Output('g-review-output', 'children'),
    Input('evaluate-g-review', 'n_clicks'),
    State('upload-g-review', 'contents'),
    State('upload-g-review', 'filename'),
    State('shred-output', 'children')
)
def update_g_review_output(n_clicks, contents, filename, requirements):
    if n_clicks is None:
        return "Click 'Evaluate Compliance' to begin."
    
    if contents is None:
        return "Please upload a document first."
    
    document = process_document(contents, filename)
    compliance_report = evaluate_compliance(document, requirements)
    return dcc.Markdown(compliance_report)

@app.callback(
    Output('loe-output', 'children'),
    Input('generate-loe', 'n_clicks'),
    State('shred-output', 'children')
)
def update_loe_output(n_clicks, shred_output):
    if n_clicks is None:
        return "Click 'Generate LOE' to begin."
    
    if shred_output is None:
        return "Please complete the Shred tab first."
    
    loe_text, loe_df = generate_loe(shred_output)
    
    return [
        dcc.Markdown(loe_text),
        dash_table.DataTable(
            data=loe_df.to_dict('records'),
            columns=[{'name': i, 'id': i} for i in loe_df.columns],
            style_table={'overflowX': 'auto'},
            style_cell={'textAlign': 'left', 'padding': '5px'},
            style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'}
        )
    ]

@app.callback(
    Output('red-output', 'children'),
    Input('generate-red', 'n_clicks'),
    State('upload-red', 'contents'),
    State('upload-red', 'filename'),
    State('p-review-output', 'children')
)
def update_red_output(n_clicks, contents, filename, p_review_output):
    if n_clicks is None:
        return "Click 'Generate Red Team Document' to begin."
    
    if contents:
        document = process_document(contents, filename)
    elif p_review_output:
        document = p_review_output
    else:
        return "Please upload a document or complete the P.Review first."
    
    red_doc = generate_red_document(document, p_review_output)
    return dcc.Markdown(red_doc)

@app.callback(
    Output('r-review-output', 'children'),
    Input('evaluate-r-review', 'n_clicks'),
    State('upload-r-review', 'contents'),
    State('upload-r-review', 'filename'),
    State('red-output', 'children'),
    State('shred-output', 'children')
)
def update_r_review_output(n_clicks, contents, filename, red_doc, requirements):
    if n_clicks is None:
        return "Click 'Evaluate Compliance' to begin."
    
    if contents:
        document = process_document(contents, filename)
    elif red_doc:
        document = red_doc
    else:
        return "Please upload a document or generate a Red Team document first."
    
    compliance_report = evaluate_compliance(document, requirements)
    return dcc.Markdown(compliance_report)

def parse_markdown(doc, content):
    # Split content into paragraphs
    paragraphs = content.split('\n\n')
    
    for para in paragraphs:
        # Check for headers
        header_match = re.match(r'^(#{1,6})\s+(.+)$', para)
        if header_match:
            level = len(header_match.group(1))
            text = header_match.group(2)
            doc.add_heading(text, level=level)
        else:
            p = doc.add_paragraph()
            # Split paragraph into runs
            runs = re.split(r'(\*\*|\*|__|\~\~)', para)
            is_bold = is_italic = is_underline = is_strikethrough = False
            for run in runs:
                if run == '**' or run == '__':
                    is_bold = not is_bold
                elif run == '*':
                    is_italic = not is_italic
                elif run == '~~':
                    is_strikethrough = not is_strikethrough
                else:
                    r = p.add_run(run)
                    r.bold = is_bold
                    r.italic = is_italic
                    r.underline = is_underline
                    r.font.strike = is_strikethrough

def create_docx(content):
    doc = Document()
    
    # Add styles
    styles = doc.styles
    style_names = [style.name for style in styles]
    if 'Code' not in style_names:
        code_style = styles.add_style('Code', WD_STYLE_TYPE.PARAGRAPH)
        code_font = code_style.font
        code_font.name = 'Courier New'
        code_font.size = Pt(10)
    
    parse_markdown(doc, content)
    return doc

@app.callback(
    Output("download-shred-doc", "data"),
    Input("download-shred", "n_clicks"),
    State('shred-output', 'children'),
    prevent_initial_call=True,
)
def download_shred(n_clicks, shred_output):
    if shred_output is None:
        return dash.no_update
    content = shred_output['props']['children'] if isinstance(shred_output, dict) else str(shred_output)
    doc = create_docx(content)
    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return dcc.send_bytes(buffer.getvalue(), "shred_outline.docx")

@app.callback(
    Output("download-pink-doc", "data"),
    Input("download-pink", "n_clicks"),
    State('pink-output', 'children'),
    prevent_initial_call=True,
)
def download_pink(n_clicks, pink_output):
    if pink_output is None:
        return dash.no_update
    content = pink_output['props']['children'] if isinstance(pink_output, dict) else str(pink_output)
    doc = create_docx(content)
    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return dcc.send_bytes(buffer.getvalue(), "pink_team_document.docx")

@app.callback(
    Output("download-p-review-doc", "data"),
    Input("download-p-review", "n_clicks"),
    State('p-review-output', 'children'),
    prevent_initial_call=True,
)
def download_p_review(n_clicks, p_review_output):
    if p_review_output is None:
        return dash.no_update
    content = p_review_output['props']['children'] if isinstance(p_review_output, dict) else str(p_review_output)
    doc = create_docx(content)
    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return dcc.send_bytes(buffer.getvalue(), "p_review_report.docx")

@app.callback(
    Output("download-red-doc", "data"),
    Input("download-red", "n_clicks"),
    State('red-output', 'children'),
    prevent_initial_call=True,
)
def download_red(n_clicks, red_output):
    if red_output is None:
        return dash.no_update
    content = red_output['props']['children'] if isinstance(red_output, dict) else str(red_output)
    doc = create_docx(content)
    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return dcc.send_bytes(buffer.getvalue(), "red_team_document.docx")

@app.callback(
    Output("download-r-review-doc", "data"),
    Input("download-r-review", "n_clicks"),
    State('r-review-output', 'children'),
    prevent_initial_call=True,
)
def download_r_review(n_clicks, r_review_output):
    if r_review_output is None:
        return dash.no_update
    content = r_review_output['props']['children'] if isinstance(r_review_output, dict) else str(r_review_output)
    doc = create_docx(content)
    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return dcc.send_bytes(buffer.getvalue(), "r_review_report.docx")

@app.callback(
    Output("download-g-review-doc", "data"),
    Input("download-g-review", "n_clicks"),
    State('g-review-output', 'children'),
    prevent_initial_call=True,
)
def download_g_review(n_clicks, g_review_output):
    if g_review_output is None:
        return dash.no_update
    content = g_review_output['props']['children'] if isinstance(g_review_output, dict) else str(g_review_output)
    doc = create_docx(content)
    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return dcc.send_bytes(buffer.getvalue(), "g_review_report.docx")

@app.callback(
    Output("download-loe-doc", "data"),
    Input("download-loe", "n_clicks"),
    State('loe-output', 'children'),
    prevent_initial_call=True,
)
def download_loe(n_clicks, loe_output):
    if loe_output is None:
        return dash.no_update
    if isinstance(loe_output, list) and len(loe_output) > 0:
        content = loe_output[0]['props']['children'] if isinstance(loe_output[0], dict) else str(loe_output[0])
    else:
        content = str(loe_output)
    doc = create_docx(content)
    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return dcc.send_bytes(buffer.getvalue(), "loe_report.docx")

if __name__ == '__main__':
    print("Starting the Dash application...")
    app.run(debug=True, host='0.0.0.0', port=7860)
    print("Dash application has finished running.")