Spaces:

MicroHealth
/

proposal-writer

Paused

App Files Files Community

proposal-writer / app.py

bluenevus

Update app.py

46e47d9 verified 4 months ago

raw

history blame

25.8 kB

	import base64
	import io
	import os
	import threading
	import time
	from typing import List, Tuple
	import re
	import pandas as pd
	from docx import Document
	from io import BytesIO
	import dash
	import dash_bootstrap_components as dbc
	from dash import html, dcc, Input, Output, State, ctx, dash_table, callback_context
	import google.generativeai as genai
	from docx import Document
	from docx.shared import Pt
	from docx.enum.style import WD_STYLE_TYPE
	from PyPDF2 import PdfReader
	from io import StringIO

	# Initialize Dash app
	app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

	# Configure Gemini AI
	genai.configure(api_key=os.environ["GEMINI_API_KEY"])
	model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')

	def process_document(contents: str, filename: str) -> str:
	content_type, content_string = contents.split(',')
	decoded = base64.b64decode(content_string)

	try:
	if filename.lower().endswith('.pdf'):
	pdf = PdfReader(io.BytesIO(decoded))
	text = ""
	for page in pdf.pages:
	text += page.extract_text()
	elif filename.lower().endswith('.docx'):
	doc = Document(io.BytesIO(decoded))
	text = "\n".join([para.text for para in doc.paragraphs])
	else:
	return f"Unsupported file format: {filename}. Please upload a PDF or DOCX file."

	if not text.strip():
	return "The document appears to be empty. Please check the file and try again."

	return text
	except Exception as e:
	return f"Error processing document: {str(e)}"

	def generate_loe(document: str, is_file: bool = False, filename: str = "") -> Tuple[str, pd.DataFrame]:
	if is_file:
	# Process the uploaded document
	document_text = process_document(document, filename)
	if document_text.startswith("Unsupported file format") or document_text.startswith("Error processing document"):
	return document_text, pd.DataFrame()
	else:
	document_text = document

	def generate_outline(text: str, instructions: str) -> str:
	prompt = f"""
	Analyze the following Project Work Statement (PWS) and create an outline
	focusing on sections the indicate specific tasks and L&M (for compliance and writing guide). Extract the main headers, subheaders, and specific
	requirements in each section. Pay special attention to requirements indicated
	by words like "shall", "will", "must", and similar imperative language.

	Additional instructions: {instructions}

	Document text:
	{text}

	Provide the outline in a structured format, clearly highlighting the specific
	requirements and their associated sections.
	"""
	response = model.generate_content(prompt)
	return response.text

	def generate_pink_team_document(outline: str, instructions: str) -> str:
	prompt = f"""
	Based on the following outline of a Project Work Statement (PWS):

	{outline}

	Additional instructions: {instructions}

	Create a detailed response document as if MicroHealth is responding to this PWS.
	Follow these guidelines:
	1. Use Wikipedia style writing with active voice. Be firm with the approach, no soft words like could be, may be, should, might. Use definitve language.
	2. For each requirement, describe in detail how MicroHealth will innovate to address it.
	3. Explain the industry best practices that will be applied and the workflow to accomplish the steps in the best practice to address the requirement.
	4. Provide measurable outcomes for the customer.
	5. Limit the use of bullet points and write predominantly in paragraph format.
	6. Ensure a logical flow of steps taken by MicroHealth for each requirement.
	7. Where applicable, describe the labor category or labor categories that perform the task as part of the process

	Generate a comprehensive response that showcases MicroHealth's expertise and approach.
	"""
	response = model.generate_content(prompt)
	return response.text

	def evaluate_compliance(document: str, requirements: str) -> str:
	prompt = f"""
	Evaluate the following document against the requirements from sections L&M of the PWS:

	Document:
	{document}

	Requirements:
	{requirements}

	Provide a compliance report by section number, highlighting:
	1. Areas that need improvement
	2. Suggestions on how MicroHealth can better respond to the requirements
	3. Best industry practices that should be applied
	4. Measurable outcomes that should be included
	5. Organize by document section headers and numbers

	Format the report clearly by section number.
	"""
	response = model.generate_content(prompt)
	return response.text

	def generate_red_document(document: str, compliance_report: str) -> str:
	prompt = f"""
	Based on the following document and compliance report:

	Original Document:
	{document}

	Compliance Report:
	{compliance_report}

	Generate a revised "Red Team" document that addresses all issues found in the compliance report.
	Follow these guidelines:
	1. Use Wikipedia style writing with active voice. Be firm with the approach, no soft words like could be, may be, should, might. Use definitve language.
	2. For each requirement, describe in detail how MicroHealth will innovate to address it.
	3. Explain the industry best practices that will be applied and the workflow to accomplish the steps in the best practice to address the requirement.
	4. Provide measurable outcomes for the customer.
	5. Limit the use of bullet points and write predominantly in paragraph format.
	6. Ensure a logical flow of steps taken by MicroHealth for each requirement.
	7. Where applicable, describe the labor category or labor categories that perform the task as part of the process
	"""
	response = model.generate_content(prompt)
	return response.text

	def generate_loe(document: str, is_file: bool = False) -> Tuple[str, pd.DataFrame]:
	if is_file:
	# Process the uploaded document
	document_text = process_document(document, document.split(',')[0])
	else:
	document_text = document

	prompt = f"""
	Analyze the following document and provide a Level of Effort (LOE) breakdown:

	Document:
	{document_text}

	For each section header in the document:
	1. Identify the tasks to be completed
	2. Determine the appropriate labor categories for each task
	3. Estimate the number of hours required for each labor category to complete the task

	Provide a detailed breakdown and then summarize the information in a tabular format with the following columns:
	- Task Summary
	- Labor Categories
	- Hours per Labor Category
	- Total Hours

	Present the detailed breakdown first, followed by the summary table.
	Ensure the table is properly formatted with \| as column separators and a header row.
	"""
	response = model.generate_content(prompt)
	response_text = response.text

	# Extract the table from the response
	table_start = response_text.find("\| Task Summary \|")
	table_end = response_text.find("\n\n", table_start)
	table_text = response_text[table_start:table_end]

	# Convert the table to a pandas DataFrame
	try:
	if not table_text.strip():
	raise pd.errors.EmptyDataError("No table found in the response")
	df = pd.read_csv(StringIO(table_text), sep='\|', skipinitialspace=True).dropna(axis=1, how='all')
	df.columns = df.columns.str.strip()
	except pd.errors.EmptyDataError:
	# If no table is found or it's empty, create a default DataFrame
	df = pd.DataFrame(columns=['Task Summary', 'Labor Categories', 'Hours per Labor Category', 'Total Hours'])
	response_text += "\n\nNote: No detailed LOE table could be generated from the AI response."

	return response_text, df

	# Layout
	app.layout = dbc.Container([
	html.H1("MicroHealth PWS Analysis and Response Generator", className="my-4"),
	dbc.Tabs([
	dbc.Tab(label="Shred", tab_id="shred", children=[
	dbc.Textarea(
	id='shred-instructions',
	placeholder="Enter any additional instructions for shredding the document...",
	style={'height': '100px', 'marginBottom': '10px'}
	),
	dcc.Upload(
	id='upload-document',
	children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
	style={
	'width': '100%',
	'height': '60px',
	'lineHeight': '60px',
	'borderWidth': '1px',
	'borderStyle': 'dashed',
	'borderRadius': '5px',
	'textAlign': 'center',
	'margin': '10px'
	},
	multiple=False
	),
	dbc.Spinner(html.Div(id='shred-output')),
	dbc.Button("Download Outline", id="download-shred", className="mt-3"),
	dcc.Download(id="download-shred-doc")
	]),
	dbc.Tab(label="Pink", tab_id="pink", children=[
	dbc.Textarea(
	id='pink-instructions',
	placeholder="Enter any additional instructions for generating the Pink Team document...",
	style={'height': '100px', 'marginBottom': '10px'}
	),
	dbc.Button("Generate Pink Team Document", id="generate-pink", className="mt-3"),
	dbc.Spinner(html.Div(id='pink-output')),
	dbc.Button("Download Pink Team Document", id="download-pink", className="mt-3"),
	dcc.Download(id="download-pink-doc")
	]),
	dbc.Tab(label="P.Review", tab_id="p-review", children=[
	dcc.Upload(
	id='upload-p-review',
	children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
	style={
	'width': '100%',
	'height': '60px',
	'lineHeight': '60px',
	'borderWidth': '1px',
	'borderStyle': 'dashed',
	'borderRadius': '5px',
	'textAlign': 'center',
	'margin': '10px'
	},
	multiple=False
	),
	dbc.Button("Evaluate Compliance", id="evaluate-p-review", className="mt-3"),
	dbc.Spinner(html.Div(id='p-review-output')),
	dbc.Button("Download P.Review Report", id="download-p-review", className="mt-3"),
	dcc.Download(id="download-p-review-doc")
	]),
	dbc.Tab(label="Red", tab_id="red", children=[
	dcc.Upload(
	id='upload-red',
	children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
	style={
	'width': '100%',
	'height': '60px',
	'lineHeight': '60px',
	'borderWidth': '1px',
	'borderStyle': 'dashed',
	'borderRadius': '5px',
	'textAlign': 'center',
	'margin': '10px'
	},
	multiple=False
	),
	dbc.Button("Generate Red Team Document", id="generate-red", className="mt-3"),
	dbc.Spinner(html.Div(id='red-output')),
	dbc.Button("Download Red Team Document", id="download-red", className="mt-3"),
	dcc.Download(id="download-red-doc")
	]),
	dbc.Tab(label="R.Review", tab_id="r-review", children=[
	dcc.Upload(
	id='upload-r-review',
	children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
	style={
	'width': '100%',
	'height': '60px',
	'lineHeight': '60px',
	'borderWidth': '1px',
	'borderStyle': 'dashed',
	'borderRadius': '5px',
	'textAlign': 'center',
	'margin': '10px'
	},
	multiple=False
	),
	dbc.Button("Evaluate Compliance", id="evaluate-r-review", className="mt-3"),
	dbc.Spinner(html.Div(id='r-review-output')),
	dbc.Button("Download R.Review Report", id="download-r-review", className="mt-3"),
	dcc.Download(id="download-r-review-doc")
	]),
	dbc.Tab(label="G.Review", tab_id="g-review", children=[
	dcc.Upload(
	id='upload-g-review',
	children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
	style={
	'width': '100%',
	'height': '60px',
	'lineHeight': '60px',
	'borderWidth': '1px',
	'borderStyle': 'dashed',
	'borderRadius': '5px',
	'textAlign': 'center',
	'margin': '10px'
	},
	multiple=False
	),
	dbc.Button("Evaluate Compliance", id="evaluate-g-review", className="mt-3"),
	dbc.Spinner(html.Div(id='g-review-output')),
	dbc.Button("Download G.Review Report", id="download-g-review", className="mt-3"),
	dcc.Download(id="download-g-review-doc")
	]),
	dbc.Tab(label="LOE", tab_id="loe", children=[
	dcc.Upload(
	id='upload-loe',
	children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
	style={
	'width': '100%',
	'height': '60px',
	'lineHeight': '60px',
	'borderWidth': '1px',
	'borderStyle': 'dashed',
	'borderRadius': '5px',
	'textAlign': 'center',
	'margin': '10px'
	},
	multiple=False
	),
	dbc.Button("Generate LOE", id="generate-loe", className="mt-3"),
	dbc.Spinner(html.Div(id='loe-output')),
	dbc.Button("Download LOE Report", id="download-loe", className="mt-3"),
	dcc.Download(id="download-loe-doc")
	]),
	], id="tabs", active_tab="shred"),
	])

	@app.callback(
	Output('shred-output', 'children'),
	Input('upload-document', 'contents'),
	State('upload-document', 'filename'),
	State('shred-instructions', 'value')
	)
	def update_shred_output(contents, filename, instructions):
	if contents is None:
	return "Upload a document to begin."

	text = process_document(contents, filename)
	outline = generate_outline(text, instructions or "")
	return dcc.Markdown(outline)

	@app.callback(
	Output('pink-output', 'children'),
	Input('generate-pink', 'n_clicks'),
	State('shred-output', 'children'),
	State('pink-instructions', 'value')
	)
	def update_pink_output(n_clicks, shred_output, instructions):
	if n_clicks is None or shred_output is None:
	return "Generate an outline in the Shred tab first."

	pink_doc = generate_pink_team_document(shred_output, instructions or "")
	return dcc.Markdown(pink_doc)

	@app.callback(
	Output('p-review-output', 'children'),
	Input('evaluate-p-review', 'n_clicks'),
	State('upload-p-review', 'contents'),
	State('upload-p-review', 'filename'),
	State('pink-output', 'children'),
	State('shred-output', 'children')
	)
	def update_p_review_output(n_clicks, contents, filename, pink_doc, requirements):
	if n_clicks is None:
	return "Click 'Evaluate Compliance' to begin."

	if contents:
	document = process_document(contents, filename)
	elif pink_doc:
	document = pink_doc
	else:
	return "Please upload a document or generate a Pink Team document first."

	compliance_report = evaluate_compliance(document, requirements)
	return dcc.Markdown(compliance_report)

	@app.callback(
	Output('g-review-output', 'children'),
	Input('evaluate-g-review', 'n_clicks'),
	State('upload-g-review', 'contents'),
	State('upload-g-review', 'filename'),
	State('shred-output', 'children')
	)
	def update_g_review_output(n_clicks, contents, filename, requirements):
	if n_clicks is None:
	return "Click 'Evaluate Compliance' to begin."

	if contents is None:
	return "Please upload a document first."

	document = process_document(contents, filename)
	compliance_report = evaluate_compliance(document, requirements)
	return dcc.Markdown(compliance_report)

	@app.callback(
	Output('loe-output', 'children'),
	Input('generate-loe', 'n_clicks'),
	State('upload-loe', 'contents'),
	State('shred-output', 'children')
	)
	def update_loe_output(n_clicks, upload_contents, shred_output):
	if n_clicks is None:
	return "Click 'Generate LOE' to begin."

	try:
	if upload_contents:
	loe_text, loe_df = generate_loe(upload_contents, is_file=True)
	elif shred_output:
	loe_text, loe_df = generate_loe(shred_output)
	else:
	return "Please upload a document or complete the Shred tab first."

	return [
	dcc.Markdown(loe_text),
	dash_table.DataTable(
	data=loe_df.to_dict('records'),
	columns=[{'name': i, 'id': i} for i in loe_df.columns],
	style_table={'overflowX': 'auto'},
	style_cell={'textAlign': 'left', 'padding': '5px'},
	style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'}
	)
	]
	except Exception as e:
	return f"An error occurred: {str(e)}"

	@app.callback(
	Output('red-output', 'children'),
	Input('generate-red', 'n_clicks'),
	State('upload-red', 'contents'),
	State('upload-red', 'filename'),
	State('p-review-output', 'children')
	)
	def update_red_output(n_clicks, contents, filename, p_review_output):
	if n_clicks is None:
	return "Click 'Generate Red Team Document' to begin."

	if contents:
	document = process_document(contents, filename)
	elif p_review_output:
	document = p_review_output
	else:
	return "Please upload a document or complete the P.Review first."

	red_doc = generate_red_document(document, p_review_output)
	return dcc.Markdown(red_doc)

	@app.callback(
	Output('r-review-output', 'children'),
	Input('evaluate-r-review', 'n_clicks'),
	State('upload-r-review', 'contents'),
	State('upload-r-review', 'filename'),
	State('red-output', 'children'),
	State('shred-output', 'children')
	)
	def update_r_review_output(n_clicks, contents, filename, red_doc, requirements):
	if n_clicks is None:
	return "Click 'Evaluate Compliance' to begin."

	if contents:
	document = process_document(contents, filename)
	elif red_doc:
	document = red_doc
	else:
	return "Please upload a document or generate a Red Team document first."

	compliance_report = evaluate_compliance(document, requirements)
	return dcc.Markdown(compliance_report)

	def parse_markdown(doc, content):
	# Split content into paragraphs
	paragraphs = content.split('\n\n')

	for para in paragraphs:
	# Check for headers
	header_match = re.match(r'^(#{1,6})\s+(.+)$', para)
	if header_match:
	level = len(header_match.group(1))
	text = header_match.group(2)
	doc.add_heading(text, level=level)
	else:
	p = doc.add_paragraph()
	# Split paragraph into runs
	runs = re.split(r'(\\\|\*\|__\|\~\~)', para)
	is_bold = is_italic = is_underline = is_strikethrough = False
	for run in runs:
	if run == '**' or run == '__':
	is_bold = not is_bold
	elif run == '*':
	is_italic = not is_italic
	elif run == '~~':
	is_strikethrough = not is_strikethrough
	else:
	r = p.add_run(run)
	r.bold = is_bold
	r.italic = is_italic
	r.underline = is_underline
	r.font.strike = is_strikethrough

	def create_docx(content):
	doc = Document()

	# Add styles
	styles = doc.styles
	style_names = [style.name for style in styles]
	if 'Code' not in style_names:
	code_style = styles.add_style('Code', WD_STYLE_TYPE.PARAGRAPH)
	code_font = code_style.font
	code_font.name = 'Courier New'
	code_font.size = Pt(10)

	parse_markdown(doc, content)
	return doc

	@app.callback(
	Output("download-shred-doc", "data"),
	Input("download-shred", "n_clicks"),
	State('shred-output', 'children'),
	prevent_initial_call=True,
	)
	def download_shred(n_clicks, shred_output):
	if shred_output is None:
	return dash.no_update
	doc = create_docx(shred_output)
	buffer = BytesIO()
	doc.save(buffer)
	return dcc.send_bytes(buffer.getvalue(), "shred_outline.docx")

	@app.callback(
	Output("download-pink-doc", "data"),
	Input("download-pink", "n_clicks"),
	State('pink-output', 'children'),
	prevent_initial_call=True,
	)
	def download_pink(n_clicks, pink_output):
	if pink_output is None:
	return dash.no_update
	doc = create_docx(pink_output)
	buffer = BytesIO()
	doc.save(buffer)
	return dcc.send_bytes(buffer.getvalue(), "pink_team_document.docx")

	@app.callback(
	Output("download-p-review-doc", "data"),
	Input("download-p-review", "n_clicks"),
	State('p-review-output', 'children'),
	prevent_initial_call=True,
	)
	def download_p_review(n_clicks, p_review_output):
	if p_review_output is None:
	return dash.no_update
	doc = create_docx(p_review_output)
	buffer = BytesIO()
	doc.save(buffer)
	return dcc.send_bytes(buffer.getvalue(), "p_review_report.docx")

	@app.callback(
	Output("download-red-doc", "data"),
	Input("download-red", "n_clicks"),
	State('red-output', 'children'),
	prevent_initial_call=True,
	)
	def download_red(n_clicks, red_output):
	if red_output is None:
	return dash.no_update
	doc = create_docx(red_output)
	buffer = BytesIO()
	doc.save(buffer)
	return dcc.send_bytes(buffer.getvalue(), "red_team_document.docx")

	@app.callback(
	Output("download-r-review-doc", "data"),
	Input("download-r-review", "n_clicks"),
	State('r-review-output', 'children'),
	prevent_initial_call=True,
	)
	def download_r_review(n_clicks, r_review_output):
	if r_review_output is None:
	return dash.no_update
	doc = create_docx(r_review_output)
	buffer = BytesIO()
	doc.save(buffer)
	return dcc.send_bytes(buffer.getvalue(), "r_review_report.docx")

	@app.callback(
	Output("download-g-review-doc", "data"),
	Input("download-g-review", "n_clicks"),
	State('g-review-output', 'children'),
	prevent_initial_call=True,
	)
	def download_g_review(n_clicks, g_review_output):
	if g_review_output is None:
	return dash.no_update
	doc = create_docx(g_review_output)
	buffer = BytesIO()
	doc.save(buffer)
	return dcc.send_bytes(buffer.getvalue(), "g_review_report.docx")

	@app.callback(
	Output("download-loe-doc", "data"),
	Input("download-loe", "n_clicks"),
	State('loe-output', 'children'),
	prevent_initial_call=True,
	)
	def download_loe(n_clicks, loe_output):
	if loe_output is None or isinstance(loe_output, str):
	return dash.no_update
	loe_text = loe_output[0]['props']['children']
	doc = create_docx(loe_text)
	buffer = BytesIO()
	doc.save(buffer)
	return dcc.send_bytes(buffer.getvalue(), "loe_report.docx")

	from dash import callback_context

	@app.callback(
	Output('loe-output', 'children', allow_duplicate=True),
	Input('generate-loe', 'n_clicks'),
	Input('upload-loe', 'contents'),
	State('upload-loe', 'filename'),
	State('shred-output', 'children'),
	prevent_initial_call=True
	)
	def update_loe_output(n_clicks, upload_contents, upload_filename, shred_output):
	ctx = callback_context
	triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]

	if not ctx.triggered:
	return dash.no_update

	try:
	if triggered_id in ['generate-loe', 'upload-loe']:
	if upload_contents:
	loe_text, loe_df = generate_loe(upload_contents, is_file=True, filename=upload_filename)
	elif shred_output:
	loe_text, loe_df = generate_loe(shred_output)
	else:
	return "Please upload a document or complete the Shred tab first."

	if isinstance(loe_text, str) and loe_text.startswith(("Unsupported file format", "Error processing document", "The document appears to be empty")):
	return loe_text

	return [
	dcc.Markdown(loe_text),
	dash_table.DataTable(
	data=loe_df.to_dict('records'),
	columns=[{'name': i, 'id': i} for i in loe_df.columns],
	style_table={'overflowX': 'auto'},
	style_cell={'textAlign': 'left', 'padding': '5px'},
	style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'}
	)
	]
	else:
	return dash.no_update
	except Exception as e:
	return f"An error occurred: {str(e)}"

	if __name__ == '__main__':
	print("Starting the Dash application...")
	app.run(debug=True, host='0.0.0.0', port=7860)
	print("Dash application has finished running.")