MilanM commited on
Commit
7fcc82d
·
verified ·
1 Parent(s): 48ddb73

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +328 -0
app.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from io import BytesIO
5
+ import base64
6
+ import random
7
+ import io
8
+ import re
9
+ from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
10
+ from streamlit_tags import st_tags
11
+ from streamlit_vertical_slider import vertical_slider
12
+ import pdf_generator
13
+
14
+ # Set page config
15
+ st.set_page_config(
16
+ page_title="Experimental Validation Method Planner",
17
+ page_icon="🧪",
18
+ layout="wide",
19
+ initial_sidebar_state="collapsed"
20
+ )
21
+
22
+ # Password protection
23
+ def check_password():
24
+ def password_entered():
25
+ if st.session_state["password"] == st.secrets["app_password"]:
26
+ st.session_state["password_correct"] = True
27
+ del st.session_state["password"]
28
+ else:
29
+ st.session_state["password_correct"] = False
30
+
31
+ if "password_correct" not in st.session_state:
32
+ st.markdown("\n\n")
33
+ st.text_input("Enter the password", type="password", on_change=password_entered, key="password")
34
+ st.divider()
35
+ st.info("Developed by Milan Mrdenovic © IBM Norway 2024")
36
+ return False
37
+ elif not st.session_state["password_correct"]:
38
+ st.markdown("\n\n")
39
+ st.text_input("Enter the password", type="password", on_change=password_entered, key="password")
40
+ st.divider()
41
+ st.info("Developed by Milan Mrdenovic © IBM Norway 2024")
42
+ st.error("😕 Password incorrect")
43
+ return False
44
+ else:
45
+ return True
46
+
47
+ if not check_password():
48
+ st.stop()
49
+
50
+ # Initialize session state
51
+ if 'current_page' not in st.session_state:
52
+ st.session_state.current_page = 0
53
+
54
+ if 'answers' not in st.session_state:
55
+ st.session_state.answers = {
56
+ 'idea_testing': {
57
+ 'rapid_prototyping': {'input': '', 'process': '', 'output': ''},
58
+ 'framework': '',
59
+ 'tools': ''
60
+ },
61
+ 'capability_testing': {
62
+ 'capability': '',
63
+ 'assessment_method': '',
64
+ 'success_definition': '',
65
+ 'validation_criteria': {'qualitative': [], 'quantitative': []}
66
+ },
67
+ 'approach_comparison': {'standardization': '', 'experiment_overview': ''},
68
+ 'mock_application': {'user_testing': '', 'insights': '', 'presentation': ''}
69
+ }
70
+
71
+ # Define the content for each page
72
+ pages = [
73
+ {
74
+ 'title': "Idea Testing",
75
+ 'content': """
76
+ Can we create a framework to validate the idea/capability without building a pilot version? Think of it as a sketch, e.g. - how do we choose the right tooling/LLM and test them out on a mock application or GUI.
77
+ E.g. If we use an LLM to summarize a type of information, can we validate that THIS data provided in THIS form reacts as we expect or desire? If we give documentation -> does it transform it in a desired form -> Is it robust and repeatable? Is it susceptible to meddling or interference?
78
+ \nCan we create a good overview based on performance? \nDo we know any libraries, tools, assets that can help us expedite this process?
79
+ Consider elements such as how to validate capability robustness, repeatability or how we can evaluate the solution's susceptibility to meddling.
80
+ """,
81
+ 'input_key': 'idea_testing',
82
+ 'input_type': 'custom'
83
+ },
84
+ {
85
+ 'title': "Capability Testing",
86
+ 'content': """
87
+ What is the root of the capability we are looking for and what do we need to validate? (e.g., retrieval of relevant documentation, data transformation, performance/precision)
88
+ How will we assess it? (e.g., batches of prompts, mock data, human evaluation, metrics like f1 scores) How do we define success?
89
+ \nDefine the qualitative and quantitative validation criteria. For quantitative criteria, use the format: CriteriaName[min - max] Example: F1_Score[0.0 - 1.0] or Accuracy[0% - 100%]
90
+ """,
91
+ 'input_key': 'capability_testing',
92
+ 'input_type': 'custom'
93
+ },
94
+ {
95
+ 'title': "Approach Comparison",
96
+ 'content': """
97
+ How do we compare different approaches to define the best option? Can we formulate our methodology around creating reusable or ready-to-go assets in standardized formats?
98
+ How can we maintain an overview of our different experiments in one place? Do we want to capture high fidelity data (e.g., costs, durations)?
99
+ """,
100
+ 'input_key': 'approach_comparison',
101
+ 'input_type': 'custom'
102
+ },
103
+ {
104
+ 'title': "Mock Application",
105
+ 'content': """
106
+ How do we want to perform user testing or utilize the results of our experiment? What insights do we want to capture and from whom?
107
+ How polished should the mock application be? How do we prepare it for showcase? What tools can we use to create it efficiently? (e.g., Streamlit, Gradio, Hugging Face Spaces)
108
+ """,
109
+ 'input_key': 'mock_application',
110
+ 'input_type': 'custom'
111
+ },
112
+ {
113
+ 'title': "Generate Evaluation Report",
114
+ 'content': "You have completed the Experimental Validation Method Planner. \nClick the button below to generate and download your PDF report.",
115
+ 'input_key': None
116
+ }
117
+ ]
118
+
119
+ st.session_state.pages = pages
120
+
121
+ # Main Streamlit app
122
+ st.title("Experimental Validation Method Planner")
123
+
124
+ # Navigation buttons
125
+ col1, col2, col3 = st.columns([1, 2, 1])
126
+ with col1:
127
+ if st.session_state.current_page > 0:
128
+ if st.button("Back"):
129
+ st.session_state.current_page -= 1
130
+ st.rerun()
131
+ with col3:
132
+ if st.session_state.current_page < len(pages) - 1:
133
+ if st.button("Next", use_container_width=True):
134
+ st.session_state.current_page += 1
135
+ st.rerun()
136
+
137
+ # Display current page
138
+ current_page = pages[st.session_state.current_page]
139
+ st.header(current_page['title'])
140
+
141
+ with st.expander("Description", expanded=False):
142
+ st.markdown(current_page['content'])
143
+
144
+ # Input fields
145
+ if 'input_key' in current_page and current_page['input_key'] is not None:
146
+ if current_page['input_key'] == 'idea_testing':
147
+ st.subheader("Idea Testing")
148
+ col1, col2, col3 = st.columns(3)
149
+ with col1:
150
+ st.session_state.answers['idea_testing']['rapid_prototyping']['input'] = st.text_area(
151
+ "Input:",
152
+ value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('input', ""),
153
+ key="rapid_prototyping_input",
154
+ height=150
155
+ )
156
+ with col2:
157
+ st.session_state.answers['idea_testing']['rapid_prototyping']['process'] = st.text_area(
158
+ "Process:",
159
+ value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('process', ""),
160
+ key="rapid_prototyping_process",
161
+ height=150
162
+ )
163
+ with col3:
164
+ st.session_state.answers['idea_testing']['rapid_prototyping']['output'] = st.text_area(
165
+ "Output:",
166
+ value=st.session_state.answers['idea_testing']['rapid_prototyping'].get('output', ""),
167
+ key="rapid_prototyping_output",
168
+ height=150
169
+ )
170
+
171
+ st.subheader("How to Approach Validation")
172
+ col1, col2 = st.columns(2)
173
+ with col1:
174
+ st.session_state.answers['idea_testing']['framework'] = st.text_area(
175
+ "Framework for validating the idea:",
176
+ value=st.session_state.answers['idea_testing'].get('framework', ""),
177
+ height=225
178
+ )
179
+ with col2:
180
+ st.session_state.answers['idea_testing']['tools'] = st.text_area(
181
+ "Useful libraries, tools, or assets:",
182
+ value=st.session_state.answers['idea_testing'].get('tools', ""),
183
+ height=225
184
+ )
185
+
186
+ elif current_page['input_key'] == 'capability_testing':
187
+ st.subheader("Capability Testing")
188
+ col1, col2, col3 = st.columns(3)
189
+ with col1:
190
+ st.session_state.answers['capability_testing']['capability'] = st.text_area(
191
+ "Base capability and validation focus:",
192
+ value=st.session_state.answers['capability_testing'].get('capability', ""),
193
+ height=150
194
+ )
195
+ with col2:
196
+ st.session_state.answers['capability_testing']['assessment_method'] = st.text_area(
197
+ "Assessment method:",
198
+ value=st.session_state.answers['capability_testing'].get('assessment_method', ""),
199
+ height=150
200
+ )
201
+ with col3:
202
+ st.session_state.answers['capability_testing']['success_definition'] = st.text_area(
203
+ "Success definition:",
204
+ value=st.session_state.answers['capability_testing'].get('success_definition', ""),
205
+ height=150
206
+ )
207
+
208
+ st.subheader("Validation Criteria")
209
+ col1, col2 = st.columns(2)
210
+ with col1:
211
+ qualitative_criteria = st_tags(
212
+ label='Enter Qualitative Criteria:',
213
+ text='Press enter to add more',
214
+ value=st.session_state.answers['capability_testing']['validation_criteria'].get('qualitative', []),
215
+ suggestions=[],
216
+ maxtags=5,
217
+ key='qualitative_criteria'
218
+ )
219
+ st.session_state.answers['capability_testing']['validation_criteria']['qualitative'] = qualitative_criteria
220
+
221
+ # Add description fields for each qualitative criterion
222
+ for i, criterion in enumerate(qualitative_criteria):
223
+ description_key = f'qual_desc_{i}'
224
+ description = st.text_area(
225
+ f"Description for {criterion}:",
226
+ value=st.session_state.answers['capability_testing']['validation_criteria'].get(description_key, ""),
227
+ key=description_key,
228
+ height=100
229
+ )
230
+ st.session_state.answers['capability_testing']['validation_criteria'][description_key] = description
231
+
232
+ with col2:
233
+ quantitative_criteria = st_tags(
234
+ label='Enter Quantitative Criteria:',
235
+ text='Use format: CriteriaName[min - max] (can use %, floats, or integers)',
236
+ value=st.session_state.answers['capability_testing']['validation_criteria'].get('quantitative', []),
237
+ suggestions=[],
238
+ maxtags=5,
239
+ key='quantitative_criteria'
240
+ )
241
+ st.session_state.answers['capability_testing']['validation_criteria']['quantitative'] = quantitative_criteria
242
+
243
+ if quantitative_criteria:
244
+ slider_cols = st.columns(len(quantitative_criteria))
245
+ for i, (criterion, slider_col) in enumerate(zip(quantitative_criteria, slider_cols)):
246
+ parsed = pdf_generator.parse_quantitative_criteria(criterion)
247
+ if parsed:
248
+ name, min_val, max_val, is_percentage, is_integer = parsed
249
+ current_value = st.session_state.answers['capability_testing']['validation_criteria'].get(f'quant_value_{i}', min_val)
250
+
251
+ with slider_col:
252
+ value = vertical_slider(
253
+ label=name,
254
+ key=f"quant_slider_{i}",
255
+ min_value=min_val,
256
+ max_value=max_val,
257
+ step=(max_val - min_val) / 100,
258
+ default_value=current_value,
259
+ height=200,
260
+ thumb_shape="circle",
261
+ thumb_color="#9999FF",
262
+ slider_color=('green', 'orange'),
263
+ value_always_visible=True
264
+ )
265
+
266
+ st.session_state.answers['capability_testing']['validation_criteria'][f'quant_value_{i}'] = value
267
+
268
+ if is_percentage:
269
+ st.markdown(f"**{name}: {value*100:.1f}%**")
270
+ elif is_integer:
271
+ st.markdown(f"**{name}: {int(value)}**")
272
+ else:
273
+ st.markdown(f"**{name}: {value:.2f}**")
274
+ else:
275
+ st.warning(f"Invalid format for quantitative criterion: {criterion}")
276
+
277
+ elif current_page['input_key'] == 'approach_comparison':
278
+ cols = st.columns(2)
279
+ with cols[0]:
280
+ st.session_state.answers[current_page['input_key']]['standardization'] = st.text_area(
281
+ "Standardization of assets and formats:",
282
+ value=st.session_state.answers[current_page['input_key']].get('standardization', ""),
283
+ height=300
284
+ )
285
+ with cols[1]:
286
+ st.session_state.answers[current_page['input_key']]['experiment_overview'] = st.text_area(
287
+ "Experiment overview and data capture:",
288
+ value=st.session_state.answers[current_page['input_key']].get('experiment_overview', ""),
289
+ height=300
290
+ )
291
+
292
+ elif current_page['input_key'] == 'mock_application':
293
+ cols = st.columns(3)
294
+ with cols[0]:
295
+ st.session_state.answers[current_page['input_key']]['user_testing'] = st.text_area(
296
+ "User testing approach:",
297
+ value=st.session_state.answers[current_page['input_key']].get('user_testing', ""),
298
+ height=300
299
+ )
300
+ with cols[1]:
301
+ st.session_state.answers[current_page['input_key']]['insights'] = st.text_area(
302
+ "Desired insights and target audience:",
303
+ value=st.session_state.answers[current_page['input_key']].get('insights', ""),
304
+ height=300
305
+ )
306
+ with cols[2]:
307
+ st.session_state.answers[current_page['input_key']]['presentation'] = st.text_area(
308
+ "Presentation and tools for efficient creation:",
309
+ value=st.session_state.answers[current_page['input_key']].get('presentation', ""),
310
+ height=300
311
+ )
312
+
313
+ # Generate PDF button (only on the last page)
314
+ if st.session_state.current_page == len(pages) - 1:
315
+ if st.button("Generate and Download PDF", use_container_width=True):
316
+ pdf = pdf_generator.generate_pdf(st.session_state)
317
+ st.download_button(
318
+ label="Download PDF",
319
+ data=pdf,
320
+ file_name="Experimental_Validation_Method_Plan.pdf",
321
+ mime="application/pdf",
322
+ use_container_width=True
323
+ )
324
+
325
+ # Display progress
326
+ st.progress((st.session_state.current_page + 1) / len(pages))
327
+
328
+ st.divider()