Spaces:

NCEE-Build-Lab-Archive
/

EXVAL

Sleeping

App Files Files Community

MilanM commited on Aug 26, 2024

Commit

7fcc82d

verified ·

1 Parent(s): 48ddb73

Create app.py

Browse files

Files changed (1) hide show

app.py +328 -0

app.py ADDED Viewed

	@@ -0,0 +1,328 @@

+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+from io import BytesIO
+import base64
+import random
+import io
+import re
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+from streamlit_tags import st_tags
+from streamlit_vertical_slider import vertical_slider
+import pdf_generator
+# Set page config
+st.set_page_config(
+    page_title="Experimental Validation Method Planner",
+    page_icon="🧪",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
+# Password protection
+def check_password():
+    def password_entered():
+        if st.session_state["password"] == st.secrets["app_password"]:
+            st.session_state["password_correct"] = True
+            del st.session_state["password"]
+        else:
+            st.session_state["password_correct"] = False
+    if "password_correct" not in st.session_state:
+        st.markdown("\n\n")
+        st.text_input("Enter the password", type="password", on_change=password_entered, key="password")
+        st.divider()
+        st.info("Developed by Milan Mrdenovic © IBM Norway 2024")
+        return False
+    elif not st.session_state["password_correct"]:
+        st.markdown("\n\n")
+        st.text_input("Enter the password", type="password", on_change=password_entered, key="password")
+        st.divider()
+        st.info("Developed by Milan Mrdenovic © IBM Norway 2024")
+        st.error("😕 Password incorrect")
+        return False
+    else:
+        return True
+if not check_password():
+    st.stop()
+# Initialize session state
+if 'current_page' not in st.session_state:
+    st.session_state.current_page = 0
+if 'answers' not in st.session_state:
+    st.session_state.answers = {
+        'idea_testing': {
+            'rapid_prototyping': {'input': '', 'process': '', 'output': ''},
+            'framework': '',
+            'tools': ''
+        },
+        'capability_testing': {
+            'capability': '',
+            'assessment_method': '',
+            'success_definition': '',
+            'validation_criteria': {'qualitative': [], 'quantitative': []}
+        },
+        'approach_comparison': {'standardization': '', 'experiment_overview': ''},
+        'mock_application': {'user_testing': '', 'insights': '', 'presentation': ''}
+    }
+# Define the content for each page
+pages = [
+    {
+        'title': "Idea Testing",
+        'content': """
+        Can we create a framework to validate the idea/capability without building a pilot version? Think of it as a sketch, e.g. - how do we choose the right tooling/LLM and test them out on a mock application or GUI.
+        E.g. If we use an LLM to summarize a type of information, can we validate that THIS data provided in THIS form reacts as we expect or desire? If we give documentation -> does it transform it in a desired form -> Is it robust and repeatable? Is it susceptible to meddling or interference?
+        \nCan we create a good overview based on performance? \nDo we know any libraries, tools, assets that can help us expedite this process?
+        Consider elements such as how to validate capability robustness, repeatability or how we can evaluate the solution's susceptibility to meddling.
+        """,
+        'input_key': 'idea_testing',
+        'input_type': 'custom'
+    },
+    {
+        'title': "Capability Testing",
+        'content': """
+        What is the root of the capability we are looking for and what do we need to validate? (e.g., retrieval of relevant documentation, data transformation, performance/precision)
+        How will we assess it? (e.g., batches of prompts, mock data, human evaluation, metrics like f1 scores) How do we define success?
+        \nDefine the qualitative and quantitative validation criteria. For quantitative criteria, use the format: CriteriaName[min - max]  Example: F1_Score[0.0 - 1.0] or Accuracy[0% - 100%]
+        """,
+        'input_key': 'capability_testing',
+        'input_type': 'custom'
+    },
+    {
+        'title': "Approach Comparison",
+        'content': """
+        How do we compare different approaches to define the best option? Can we formulate our methodology around creating reusable or ready-to-go assets in standardized formats?
+        How can we maintain an overview of our different experiments in one place? Do we want to capture high fidelity data (e.g., costs, durations)?
+        """,
+        'input_key': 'approach_comparison',
+        'input_type': 'custom'
+    },
+    {
+        'title': "Mock Application",
+        'content': """
+        How do we want to perform user testing or utilize the results of our experiment? What insights do we want to capture and from whom?
+        How polished should the mock application be? How do we prepare it for showcase? What tools can we use to create it efficiently? (e.g., Streamlit, Gradio, Hugging Face Spaces)
+        """,
+        'input_key': 'mock_application',
+        'input_type': 'custom'
+    },
+    {
+        'title': "Generate Evaluation Report",
+        'content': "You have completed the Experimental Validation Method Planner. \nClick the button below to generate and download your PDF report.",
+        'input_key': None
+    }
+]
+st.session_state.pages = pages
+# Main Streamlit app
+st.title("Experimental Validation Method Planner")
+# Navigation buttons
+col1, col2, col3 = st.columns([1, 2, 1])
+with col1:
+    if st.session_state.current_page > 0:
+        if st.button("Back"):
+            st.session_state.current_page -= 1
+            st.rerun()
+with col3:
+    if st.session_state.current_page < len(pages) - 1:
+        if st.button("Next", use_container_width=True):
+            st.session_state.current_page += 1
+            st.rerun()
+# Display current page
+current_page = pages[st.session_state.current_page]
+st.header(current_page['title'])
+with st.expander("Description", expanded=False):
+    st.markdown(current_page['content'])
+# Input fields
+if 'input_key' in current_page and current_page['input_key'] is not None:
+    if current_page['input_key'] == 'idea_testing':
+        st.subheader("Idea Testing")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.session_state.answers['idea_testing']['rapid_prototyping']['input'] = st.text_area(
+                "Input:",
+                value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('input', ""),
+                key="rapid_prototyping_input",
+                height=150
+            )
+        with col2:
+            st.session_state.answers['idea_testing']['rapid_prototyping']['process'] = st.text_area(
+                "Process:",
+                value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('process', ""),
+                key="rapid_prototyping_process",
+                height=150
+            )
+        with col3:
+            st.session_state.answers['idea_testing']['rapid_prototyping']['output'] = st.text_area(
+                "Output:",
+                value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('output', ""),
+                key="rapid_prototyping_output",
+                height=150
+            )
+        st.subheader("How to Approach Validation")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.session_state.answers['idea_testing']['framework'] = st.text_area(
+                "Framework for validating the idea:",
+                value=st.session_state.answers['idea_testing'].get('framework', ""),
+                height=225
+            )
+        with col2:
+            st.session_state.answers['idea_testing']['tools'] = st.text_area(
+                "Useful libraries, tools, or assets:",
+                value=st.session_state.answers['idea_testing'].get('tools', ""),
+                height=225
+            )
+    elif current_page['input_key'] == 'capability_testing':
+        st.subheader("Capability Testing")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.session_state.answers['capability_testing']['capability'] = st.text_area(
+                "Base capability and validation focus:",
+                value=st.session_state.answers['capability_testing'].get('capability', ""),
+                height=150
+            )
+        with col2:
+            st.session_state.answers['capability_testing']['assessment_method'] = st.text_area(
+                "Assessment method:",
+                value=st.session_state.answers['capability_testing'].get('assessment_method', ""),
+                height=150
+            )
+        with col3:
+            st.session_state.answers['capability_testing']['success_definition'] = st.text_area(
+                "Success definition:",
+                value=st.session_state.answers['capability_testing'].get('success_definition', ""),
+                height=150
+            )
+        st.subheader("Validation Criteria")
+        col1, col2 = st.columns(2)
+        with col1:
+            qualitative_criteria = st_tags(
+                label='Enter Qualitative Criteria:',
+                text='Press enter to add more',
+                value=st.session_state.answers['capability_testing']['validation_criteria'].get('qualitative', []),
+                suggestions=[],
+                maxtags=5,
+                key='qualitative_criteria'
+            )
+            st.session_state.answers['capability_testing']['validation_criteria']['qualitative'] = qualitative_criteria
+            # Add description fields for each qualitative criterion
+            for i, criterion in enumerate(qualitative_criteria):
+                description_key = f'qual_desc_{i}'
+                description = st.text_area(
+                    f"Description for {criterion}:",
+                    value=st.session_state.answers['capability_testing']['validation_criteria'].get(description_key, ""),
+                    key=description_key,
+                    height=100
+                )
+                st.session_state.answers['capability_testing']['validation_criteria'][description_key] = description
+        with col2:
+            quantitative_criteria = st_tags(
+                label='Enter Quantitative Criteria:',
+                text='Use format: CriteriaName[min - max] (can use %, floats, or integers)',
+                value=st.session_state.answers['capability_testing']['validation_criteria'].get('quantitative', []),
+                suggestions=[],
+                maxtags=5,
+                key='quantitative_criteria'
+            )
+            st.session_state.answers['capability_testing']['validation_criteria']['quantitative'] = quantitative_criteria
+            if quantitative_criteria:
+                slider_cols = st.columns(len(quantitative_criteria))
+                for i, (criterion, slider_col) in enumerate(zip(quantitative_criteria, slider_cols)):
+                    parsed = pdf_generator.parse_quantitative_criteria(criterion)
+                    if parsed:
+                        name, min_val, max_val, is_percentage, is_integer = parsed
+                        current_value = st.session_state.answers['capability_testing']['validation_criteria'].get(f'quant_value_{i}', min_val)
+                        with slider_col:
+                            value = vertical_slider(
+                                label=name,
+                                key=f"quant_slider_{i}",
+                                min_value=min_val,
+                                max_value=max_val,
+                                step=(max_val - min_val) / 100,
+                                default_value=current_value,
+                                height=200,
+                                thumb_shape="circle",
+                                thumb_color="#9999FF",
+                                slider_color=('green', 'orange'),
+                                value_always_visible=True
+                            )
+                            st.session_state.answers['capability_testing']['validation_criteria'][f'quant_value_{i}'] = value
+                            if is_percentage:
+                                st.markdown(f"**{name}: {value*100:.1f}%**")
+                            elif is_integer:
+                                st.markdown(f"**{name}: {int(value)}**")
+                            else:
+                                st.markdown(f"**{name}: {value:.2f}**")
+                    else:
+                        st.warning(f"Invalid format for quantitative criterion: {criterion}")
+    elif current_page['input_key'] == 'approach_comparison':
+        cols = st.columns(2)
+        with cols[0]:
+            st.session_state.answers[current_page['input_key']]['standardization'] = st.text_area(
+                "Standardization of assets and formats:",
+                value=st.session_state.answers[current_page['input_key']].get('standardization', ""),
+                height=300
+            )
+        with cols[1]:
+            st.session_state.answers[current_page['input_key']]['experiment_overview'] = st.text_area(
+                "Experiment overview and data capture:",
+                value=st.session_state.answers[current_page['input_key']].get('experiment_overview', ""),
+                height=300
+            )
+    elif current_page['input_key'] == 'mock_application':
+        cols = st.columns(3)
+        with cols[0]:
+            st.session_state.answers[current_page['input_key']]['user_testing'] = st.text_area(
+                "User testing approach:",
+                value=st.session_state.answers[current_page['input_key']].get('user_testing', ""),
+                height=300
+            )
+        with cols[1]:
+            st.session_state.answers[current_page['input_key']]['insights'] = st.text_area(
+                "Desired insights and target audience:",
+                value=st.session_state.answers[current_page['input_key']].get('insights', ""),
+                height=300
+            )
+        with cols[2]:
+            st.session_state.answers[current_page['input_key']]['presentation'] = st.text_area(
+                "Presentation and tools for efficient creation:",
+                value=st.session_state.answers[current_page['input_key']].get('presentation', ""),
+                height=300
+            )
+# Generate PDF button (only on the last page)
+if st.session_state.current_page == len(pages) - 1:
+    if st.button("Generate and Download PDF", use_container_width=True):
+        pdf = pdf_generator.generate_pdf(st.session_state)
+        st.download_button(
+            label="Download PDF",
+            data=pdf,
+            file_name="Experimental_Validation_Method_Plan.pdf",
+            mime="application/pdf",
+            use_container_width=True
+        )
+# Display progress
+st.progress((st.session_state.current_page + 1) / len(pages))
+st.divider()