Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from io import BytesIO | |
import base64 | |
import random | |
import io | |
import re | |
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas | |
from streamlit_tags import st_tags | |
from streamlit_vertical_slider import vertical_slider | |
import pdf_generator | |
# Set page config | |
st.set_page_config( | |
page_title="Experimental Validation Method Planner", | |
page_icon="🧪", | |
layout="wide", | |
initial_sidebar_state="collapsed" | |
) | |
# Password protection | |
def check_password(): | |
def password_entered(): | |
if st.session_state["password"] == st.secrets["app_password"]: | |
st.session_state["password_correct"] = True | |
del st.session_state["password"] | |
else: | |
st.session_state["password_correct"] = False | |
if "password_correct" not in st.session_state: | |
st.markdown("\n\n") | |
st.text_input("Enter the password", type="password", on_change=password_entered, key="password") | |
st.divider() | |
st.info("Developed by Milan Mrdenovic © IBM Norway 2024") | |
return False | |
elif not st.session_state["password_correct"]: | |
st.markdown("\n\n") | |
st.text_input("Enter the password", type="password", on_change=password_entered, key="password") | |
st.divider() | |
st.info("Developed by Milan Mrdenovic © IBM Norway 2024") | |
st.error("😕 Password incorrect") | |
return False | |
else: | |
return True | |
if not check_password(): | |
st.stop() | |
# Initialize session state | |
if 'current_page' not in st.session_state: | |
st.session_state.current_page = 0 | |
if 'answers' not in st.session_state: | |
st.session_state.answers = { | |
'idea_testing': { | |
'rapid_prototyping': {'input': '', 'process': '', 'output': ''}, | |
'framework': '', | |
'tools': '' | |
}, | |
'capability_testing': { | |
'capability': '', | |
'assessment_method': '', | |
'success_definition': '', | |
'validation_criteria': {'qualitative': [], 'quantitative': []} | |
}, | |
'approach_comparison': {'standardization': '', 'experiment_overview': ''}, | |
'mock_application': {'user_testing': '', 'insights': '', 'presentation': ''} | |
} | |
# Define the content for each page | |
pages = [ | |
{ | |
'title': "Idea Testing", | |
'content': """ | |
Can we create a framework to validate the idea/capability without building a pilot version? Think of it as a sketch, e.g. - how do we choose the right tooling/LLM and test them out on a mock application or GUI. | |
E.g. If we use an LLM to summarize a type of information, can we validate that THIS data provided in THIS form reacts as we expect or desire? If we give documentation -> does it transform it in a desired form -> Is it robust and repeatable? Is it susceptible to meddling or interference? | |
\nCan we create a good overview based on performance? \nDo we know any libraries, tools, assets that can help us expedite this process? | |
Consider elements such as how to validate capability robustness, repeatability or how we can evaluate the solution's susceptibility to meddling. | |
""", | |
'input_key': 'idea_testing', | |
'input_type': 'custom' | |
}, | |
{ | |
'title': "Capability Testing", | |
'content': """ | |
What is the root of the capability we are looking for and what do we need to validate? (e.g., retrieval of relevant documentation, data transformation, performance/precision) | |
How will we assess it? (e.g., batches of prompts, mock data, human evaluation, metrics like f1 scores) How do we define success? | |
\nDefine the qualitative and quantitative validation criteria. For quantitative criteria, use the format: CriteriaName[min - max] Example: F1_Score[0.0 - 1.0] or Accuracy[0% - 100%] | |
""", | |
'input_key': 'capability_testing', | |
'input_type': 'custom' | |
}, | |
{ | |
'title': "Approach Comparison", | |
'content': """ | |
How do we compare different approaches to define the best option? Can we formulate our methodology around creating reusable or ready-to-go assets in standardized formats? | |
How can we maintain an overview of our different experiments in one place? Do we want to capture high fidelity data (e.g., costs, durations)? | |
""", | |
'input_key': 'approach_comparison', | |
'input_type': 'custom' | |
}, | |
{ | |
'title': "Mock Application", | |
'content': """ | |
How do we want to perform user testing or utilize the results of our experiment? What insights do we want to capture and from whom? | |
How polished should the mock application be? How do we prepare it for showcase? What tools can we use to create it efficiently? (e.g., Streamlit, Gradio, Hugging Face Spaces) | |
""", | |
'input_key': 'mock_application', | |
'input_type': 'custom' | |
}, | |
{ | |
'title': "Generate Evaluation Report", | |
'content': "You have completed the Experimental Validation Method Planner. \nClick the button below to generate and download your PDF report.", | |
'input_key': None | |
} | |
] | |
st.session_state.pages = pages | |
# Main Streamlit app | |
st.title("Experimental Validation Method Planner") | |
# Navigation buttons | |
col1, col2, col3 = st.columns([1, 2, 1]) | |
with col1: | |
if st.session_state.current_page > 0: | |
if st.button("Back"): | |
st.session_state.current_page -= 1 | |
st.rerun() | |
with col3: | |
if st.session_state.current_page < len(pages) - 1: | |
if st.button("Next", use_container_width=True): | |
st.session_state.current_page += 1 | |
st.rerun() | |
# Display current page | |
current_page = pages[st.session_state.current_page] | |
st.header(current_page['title']) | |
with st.expander("Description", expanded=False): | |
st.markdown(current_page['content']) | |
# Input fields | |
if 'input_key' in current_page and current_page['input_key'] is not None: | |
if current_page['input_key'] == 'idea_testing': | |
st.subheader("Idea Testing") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.session_state.answers['idea_testing']['rapid_prototyping']['input'] = st.text_area( | |
"Input:", | |
value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('input', ""), | |
key="rapid_prototyping_input", | |
height=150 | |
) | |
with col2: | |
st.session_state.answers['idea_testing']['rapid_prototyping']['process'] = st.text_area( | |
"Process:", | |
value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('process', ""), | |
key="rapid_prototyping_process", | |
height=150 | |
) | |
with col3: | |
st.session_state.answers['idea_testing']['rapid_prototyping']['output'] = st.text_area( | |
"Output:", | |
value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('output', ""), | |
key="rapid_prototyping_output", | |
height=150 | |
) | |
st.subheader("How to Approach Validation") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.session_state.answers['idea_testing']['framework'] = st.text_area( | |
"Framework for validating the idea:", | |
value=st.session_state.answers['idea_testing'].get('framework', ""), | |
height=225 | |
) | |
with col2: | |
st.session_state.answers['idea_testing']['tools'] = st.text_area( | |
"Useful libraries, tools, or assets:", | |
value=st.session_state.answers['idea_testing'].get('tools', ""), | |
height=225 | |
) | |
elif current_page['input_key'] == 'capability_testing': | |
st.subheader("Capability Testing") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.session_state.answers['capability_testing']['capability'] = st.text_area( | |
"Base capability and validation focus:", | |
value=st.session_state.answers['capability_testing'].get('capability', ""), | |
height=150 | |
) | |
with col2: | |
st.session_state.answers['capability_testing']['assessment_method'] = st.text_area( | |
"Assessment method:", | |
value=st.session_state.answers['capability_testing'].get('assessment_method', ""), | |
height=150 | |
) | |
with col3: | |
st.session_state.answers['capability_testing']['success_definition'] = st.text_area( | |
"Success definition:", | |
value=st.session_state.answers['capability_testing'].get('success_definition', ""), | |
height=150 | |
) | |
st.subheader("Validation Criteria") | |
col1, col2 = st.columns(2) | |
with col1: | |
qualitative_criteria = st_tags( | |
label='Enter Qualitative Criteria:', | |
text='Press enter to add more', | |
value=st.session_state.answers['capability_testing']['validation_criteria'].get('qualitative', []), | |
suggestions=[], | |
maxtags=5, | |
key='qualitative_criteria' | |
) | |
st.session_state.answers['capability_testing']['validation_criteria']['qualitative'] = qualitative_criteria | |
# Add description fields for each qualitative criterion | |
for i, criterion in enumerate(qualitative_criteria): | |
description_key = f'qual_desc_{i}' | |
description = st.text_area( | |
f"Description for {criterion}:", | |
value=st.session_state.answers['capability_testing']['validation_criteria'].get(description_key, ""), | |
key=description_key, | |
height=100 | |
) | |
st.session_state.answers['capability_testing']['validation_criteria'][description_key] = description | |
with col2: | |
quantitative_criteria = st_tags( | |
label='Enter Quantitative Criteria:', | |
text='Use format: CriteriaName[min - max] (can use %, floats, or integers)', | |
value=st.session_state.answers['capability_testing']['validation_criteria'].get('quantitative', []), | |
suggestions=[], | |
maxtags=5, | |
key='quantitative_criteria' | |
) | |
st.session_state.answers['capability_testing']['validation_criteria']['quantitative'] = quantitative_criteria | |
if quantitative_criteria: | |
slider_cols = st.columns(len(quantitative_criteria)) | |
for i, (criterion, slider_col) in enumerate(zip(quantitative_criteria, slider_cols)): | |
parsed = pdf_generator.parse_quantitative_criteria(criterion) | |
if parsed: | |
name, min_val, max_val, is_percentage, is_integer = parsed | |
current_value = st.session_state.answers['capability_testing']['validation_criteria'].get(f'quant_value_{i}', min_val) | |
with slider_col: | |
value = vertical_slider( | |
label=name, | |
key=f"quant_slider_{i}", | |
min_value=min_val, | |
max_value=max_val, | |
step=(max_val - min_val) / 100, | |
default_value=current_value, | |
height=200, | |
thumb_shape="circle", | |
thumb_color="#9999FF", | |
slider_color=('green', 'orange'), | |
value_always_visible=True | |
) | |
st.session_state.answers['capability_testing']['validation_criteria'][f'quant_value_{i}'] = value | |
if is_percentage: | |
st.markdown(f"**{name}: {value*100:.1f}%**") | |
elif is_integer: | |
st.markdown(f"**{name}: {int(value)}**") | |
else: | |
st.markdown(f"**{name}: {value:.2f}**") | |
else: | |
st.warning(f"Invalid format for quantitative criterion: {criterion}") | |
elif current_page['input_key'] == 'approach_comparison': | |
cols = st.columns(2) | |
with cols[0]: | |
st.session_state.answers[current_page['input_key']]['standardization'] = st.text_area( | |
"Standardization of assets and formats:", | |
value=st.session_state.answers[current_page['input_key']].get('standardization', ""), | |
height=300 | |
) | |
with cols[1]: | |
st.session_state.answers[current_page['input_key']]['experiment_overview'] = st.text_area( | |
"Experiment overview and data capture:", | |
value=st.session_state.answers[current_page['input_key']].get('experiment_overview', ""), | |
height=300 | |
) | |
elif current_page['input_key'] == 'mock_application': | |
cols = st.columns(3) | |
with cols[0]: | |
st.session_state.answers[current_page['input_key']]['user_testing'] = st.text_area( | |
"User testing approach:", | |
value=st.session_state.answers[current_page['input_key']].get('user_testing', ""), | |
height=300 | |
) | |
with cols[1]: | |
st.session_state.answers[current_page['input_key']]['insights'] = st.text_area( | |
"Desired insights and target audience:", | |
value=st.session_state.answers[current_page['input_key']].get('insights', ""), | |
height=300 | |
) | |
with cols[2]: | |
st.session_state.answers[current_page['input_key']]['presentation'] = st.text_area( | |
"Presentation and tools for efficient creation:", | |
value=st.session_state.answers[current_page['input_key']].get('presentation', ""), | |
height=300 | |
) | |
# Generate PDF button (only on the last page) | |
if st.session_state.current_page == len(pages) - 1: | |
if st.button("Generate and Download PDF", use_container_width=True): | |
pdf = pdf_generator.generate_pdf(st.session_state) | |
st.download_button( | |
label="Download PDF", | |
data=pdf, | |
file_name="Experimental_Validation_Method_Plan.pdf", | |
mime="application/pdf", | |
use_container_width=True | |
) | |
# Display progress | |
st.progress((st.session_state.current_page + 1) / len(pages)) | |
st.divider() |