MilanM commited on
Commit
6b23862
·
verified ·
1 Parent(s): eb3431d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -197
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import streamlit as st
 
2
  from io import BytesIO
3
  import time
4
  import re
5
  from report_gen import generate_pdf
 
6
 
7
  # Set page config
8
  st.set_page_config(
@@ -11,6 +13,9 @@ st.set_page_config(
11
  initial_sidebar_state="collapsed"
12
  )
13
 
 
 
 
14
  # Password protection
15
  def check_password():
16
  def password_entered():
@@ -44,195 +49,106 @@ if 'current_page' not in st.session_state:
44
  st.session_state.current_page = 0
45
 
46
  if 'answers' not in st.session_state:
47
- st.session_state.answers = {
48
- 'what_evaluating': '',
49
- 'stakeholder_groups': '',
50
- 'intentional_concealment': '',
51
- 'technological_literacy': '',
52
- 'cognitive_mismatch': '',
53
- 'trust_focus': '',
54
- 'trust_source': '',
55
- 'trust_warranted': '',
56
- 'trust_conclusion': ''
57
- }
58
-
59
- def format_text(text):
60
- # Make text before colon bold
61
- text = re.sub(r'(^|[\n\r])([^:\n\r]+):', r'\1##\2**:', text)
62
-
63
- # Make text in parentheses italic
64
- text = re.sub(r'\(([^)]+)\)', r'*(\1)*', text)
65
-
66
- return text
67
 
68
  # Define the content for each page
69
  pages = [
70
  {
71
- 'title': "What Are We Evaluating?",
72
- 'content': """
73
- Are we evaluating the overall solution or the role that the LLM plays itself?
74
- (If you want to do both, it is best to start with the solution and then do another evaluation for the model as part of it.)
75
- """,
76
- 'input_key': 'what_evaluating',
77
- 'input_type': 'radio',
78
- 'options': ['Overall Solution', 'Foundation Model'],
79
- 'example': """
80
- Solution: Evaluate a gen.ai chatbot for extracting precedent from legal documents for a law firm.
81
- Foundation Model: Evaluate what part Llama2 as a model in the chatbot may play in the risks.
82
- """
83
  },
84
  {
85
- 'title': "Identifying Main Stakeholder Groups",
86
- 'content': """
87
- We need to discover the most pressing governance barriers for our client.
88
- Specify up to three main stakeholder groups whose trust in the solution must be established for long-term success.
89
- """,
90
- 'input_key': 'stakeholder_groups',
91
- 'input_type': 'text_area',
92
- 'example': """
93
- Gen.ai chatbot solution for law firm example:
94
- Group 1 - End-users (attorneys/legal counsellors)
95
- Group 2 - IT Department (providing the tooling)
96
- Group 3 - End-clients (the plaintiffs or defendants)
97
- """
98
  },
99
  {
100
- 'title': "What is Opacity?",
101
- 'content': """
102
- Through this process we will look out for factors known as "Opacity", they represent limiters that prevent us from making informed decisions, knowing why things happen or making sure we can take accountability for issues that come up.
103
- They are not just technological problems, but societal ones as well.
104
-
105
- There are three main types we will focus on (most common and among the most dangerous):
106
- Intentional Concealment / Technological Literacy / Cognitive Mismatch
107
- """
108
  },
109
  {
110
- 'title': "Intentional Concealment",
111
- 'content': """
112
- Intentional Concealment is a form of opacity that occurs when organizations prevent or reduce access to information
113
- so that they can maintain their trade secrets or competitive advantages. It's always present in some shape or form with
114
- proprietary technologies, maybe they won't declare where they get their data, maybe they won't declare the techniques
115
- they use to train their models, etc.
116
- """,
117
- 'input_key': 'intentional_concealment',
118
- 'input_type': 'text_area',
119
- 'example': """
120
- Gen.ai chatbot solution for law firm example:
121
- Concealment 1: They don't let people audit the training data.
122
- Concealment 2: We can't deploy the model ourselves, only consume it from them via an endpoint.
123
- Transparency 1: They published an academic paper on how the model was developed.
124
- """
125
  },
126
  {
127
- 'title': "Technological Literacy",
128
- 'content': """
129
- Technological Literacy (often termed as technological illiteracy in literature) is a form
130
- of opacity that most impacts the general population because understanding these technologies is not a common or easily accessible
131
- skill. Often the thing that a user experiences is very different than what the technology actually does. It's similar
132
- to how a person may know how to use a smartphone, but not to code an app for one, or to engineer a phone themselves.
133
- """,
134
- 'input_key': 'technological_literacy',
135
- 'input_type': 'text_area',
136
- 'example': """
137
- Gen.ai chatbot solution for law firm example:
138
- Group 1 (Attorneys) - Yes, Yes : They should be able to evaluate the output if it seems abnormal due to their domain expertise, but may not understand how the solution works in the background.
139
- Group 2 (IT Department) - Yes, Yes : They have used foundation models for other tasks before to create other solutions.
140
- Group 3 (Plaintiffs/Defendants) - Yes, No : They aren't familiar with legal precedents, that's why they hired our client to help them. They need to trust their expertise but less so the solution.
141
- """
142
  },
143
  {
144
- 'title': "Cognitive Mismatch",
145
- 'content': """
146
- Cognitive Mismatch is the most difficult form of opacity, it's something we can only moderately protect ourselves from
147
- but never get rid of. It happens when we are dealing with systems, technologies or just scales that are way beyond our
148
- human point of reference. In AI systems it most often happens when we need to know exactly how a system came up
149
- with a decision. This is a problem inherent to both Predictive and Generative AI, it is the cause of the AI Black Box that you may have heard of before.
150
- """,
151
- 'input_key': 'cognitive_mismatch',
152
- 'input_type': 'text_area',
153
- 'example': """
154
- Gen.ai chatbot solution for law firm example:
155
- Group 1 - Yes: The chatbot invents a new type of precedent by hallucinating, but it turns out that it is a real thing that we never expected or noticed!?
156
- Group 2 - Yes: The government wants us to provide an explanation of why a model said what it said to a client and ruined their life, how do I do that when it has 70 billion parameters!?
157
- Group 3 - Yes: In the meeting the chatbot starts to tell the client that they will suffer 40 years of jail time for causing the death of the Dalai Lama because they didn't pay the taxes.
158
- """
159
  },
160
  {
161
- 'title': "Intrinsic and Extrinsic Trust",
162
- 'content': """
163
- Intrinsic trust comes from us, we believe we understand something enough or have enough
164
- experience from it. So we trust ourselves and the way we engage with the technology.
165
- Extrinsic trust comes from outside, when we rely on someone/something's reputation, the
166
- recommendations of experts, reports about their credibility and so forth. But it is also much more
167
- frail than intrinsic trust.
168
-
169
- Which form of trust do we want to focus on, and what is the source of that trust? Can it be further strengthened?
170
- """,
171
- 'input_key': 'trust_focus',
172
- 'input_type': 'combined',
173
- 'options': ['Intrinsic trust', 'Extrinsic trust'],
174
- 'example': """
175
- Gen.ai chatbot solution for law firm example:
176
- We choose to rely on Intrinsic Trust in this case. Because our client's customers come to them because they trust them, while the IT department trusts that they can handle this solution and we trust the expertise of our lawyers to handle any anomalies. But we can also provide the IT department with tools to help them maintain their intrinsic trust in their work by monitoring it.
177
-
178
- Source of Trust: The intrinsic trust comes from the lawyers' expertise in their field and the IT department's familiarity with similar systems. We can further strengthen this by providing regular training sessions on the AI system and its limitations, as well as implementing a robust feedback mechanism for continuous improvement.
179
- """
180
  },
181
  {
182
- 'title': "Warranted or Unwarranted Trust",
183
- 'content': """
184
- Warranted trust meaning that we have enough arguments to say that we firmly believe that
185
- we can trust ourselves to handle the usecase alongside our client, their customers and the
186
- solution itself.
187
-
188
- Unwarranted trust meaning that we still choose to trust our approach and case even though we
189
- don't have enough arguments and may be doing it for irrational reasons such as "it feels like it can do
190
- it good" or "the results are the only thing that matters, even if we can't make sure the tech is
191
- sound."
192
-
193
- Based on your answers and thought process above, determine if our trust in this case is warranted or unwarranted.
194
- Then, create a short conclusion on why the trust is or isn't warranted based on the answers from the previous sections.
 
 
 
195
  """,
196
- 'input_key': 'trust_warranted',
197
- 'input_type': 'combined',
198
- 'options': ['Trust is warranted', 'Trust is unwarranted'],
199
- 'example': """
200
- Gen.ai chatbot solution for law firm example:
201
- Conclusion: We believe that trust in our solution is Warranted when we consider the subject matter expertise of our clients internal legal and IT experts, as well as our recommendation of using some watsonx.governance tooling to help bolster the IT department's intrinsic trust in the solution and reduce cognitive mismatch opacity.
202
-
203
- This trust is warranted because:
204
- 1. The legal experts (end-users) have the domain knowledge to critically evaluate the chatbot's outputs.
205
- 2. The IT department has experience with similar AI systems and will be equipped with additional monitoring tools.
206
- 3. We've identified potential opacity issues and have plans to address them, such as using explainable AI techniques.
207
- 4. The solution will be continuously monitored and improved based on feedback from all stakeholder groups.
208
- """
209
  },
210
  {
211
- 'title': "Improving Trust",
212
- 'content': """
213
- Based on your evaluation, how can we improve upon this trust?
214
- Consider the different types of opacity discussed earlier and the specific needs of each stakeholder group.
215
- Provide concrete suggestions for strengthening trust in the AI solution.
 
 
 
 
216
  """,
217
- 'input_key': 'trust_improvement',
218
- 'input_type': 'text_area',
219
- 'example': """
220
- To improve trust in our example gen.ai chatbot for the law firm:
221
-
222
- 1. Transparency: Develop a clear, non-technical explanation of how the AI works for all stakeholders.
223
- 2. Training: Provide regular training sessions for attorneys on how to effectively use and critically evaluate the chatbot's outputs.
224
- 3. Feedback Loop: Implement a robust feedback mechanism where users can flag incorrect or concerning outputs, feeding into continuous improvement.
225
- 4. Explainability: Integrate explainable AI techniques to provide rationale for the chatbot's recommendations, addressing cognitive mismatch.
226
- 5. Auditing: Establish regular audits of the system's performance and decision-making processes, sharing results with stakeholders.
227
- 6. Ethical Guidelines: Develop and prominently display clear ethical guidelines for the AI's use in legal contexts.
228
- 7. Collaboration: Foster ongoing collaboration between legal experts and IT teams to bridge the technological literacy gap.
229
- 8. Gradual Rollout: Implement the solution in phases, allowing time for trust to build and for refinements based on real-world use.
230
- """
231
  },
232
  {
233
  'title': "Generate Evaluation Report",
234
- 'content': "You have completed the AI Trust and Opacity Evaluation. Click the button below to generate and download your PDF report.",
235
- 'input_key': None
236
  }
237
  ]
238
 
@@ -247,7 +163,7 @@ with col1:
247
  st.session_state.current_page -= 1
248
  st.rerun()
249
  with col3:
250
- if st.session_state.current_page < len(pages) - 1: # Changed condition to allow "Next" on the second-to-last page
251
  if st.button("Next", use_container_width=True):
252
  st.session_state.current_page += 1
253
  st.rerun()
@@ -255,43 +171,69 @@ with col3:
255
  # Display current page
256
  current_page = pages[st.session_state.current_page]
257
  st.header(current_page['title'])
258
- st.markdown(current_page['content'])
259
 
260
- # Input fields (only for pages that need input)
261
- if 'input_key' in current_page and current_page['input_key'] is not None:
262
- if current_page['input_type'] == 'radio':
263
- st.session_state.answers[current_page['input_key']] = st.radio(
264
- "Select an option:",
265
- current_page['options'],
266
- key=current_page['input_key']
267
- )
268
- elif current_page['input_type'] == 'text_area':
 
 
 
269
  st.session_state.answers[current_page['input_key']] = st.text_area(
270
- "Your answer:",
271
- value=st.session_state.answers.get(current_page['input_key'], ""),
272
  key=current_page['input_key'],
273
- height=300
274
  )
275
- elif current_page['input_type'] == 'combined':
276
- st.session_state.answers[current_page['input_key']] = st.radio(
277
- "Select an option:",
278
- current_page['options'],
279
- key=f"{current_page['input_key']}_radio"
280
  )
281
- st.session_state.answers[f"{current_page['input_key']}_conclusion"] = st.text_area(
282
- "Provide your conclusion:",
283
- value=st.session_state.answers.get(f"{current_page['input_key']}_conclusion", ""),
284
- key=f"{current_page['input_key']}_text_area",
285
- height=200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  )
287
 
288
- # Add example in an expander
289
- if 'example' in current_page:
290
- with st.expander("Reveal Example"):
291
- st.markdown(current_page['example'])
292
-
293
- # Generate PDF button (only on the last page)
294
- if st.session_state.current_page == len(pages) - 1:
295
  if st.button("Generate and Download PDF", use_container_width=True):
296
  pdf = generate_pdf(pages, st.session_state.answers)
297
  st.download_button(
@@ -305,5 +247,5 @@ if st.session_state.current_page == len(pages) - 1:
305
  # Display progress
306
  st.progress((st.session_state.current_page + 1) / len(pages))
307
 
308
- # st.divider()
309
- # st.info("Developed by Milan Mrdenovic © IBM Norway 2024")
 
1
  import streamlit as st
2
+ import streamlit_mermaid as stmd
3
  from io import BytesIO
4
  import time
5
  import re
6
  from report_gen import generate_pdf
7
+ from sol_inf import generate_inference # Assuming this function exists in sol_inf.py
8
 
9
  # Set page config
10
  st.set_page_config(
 
13
  initial_sidebar_state="collapsed"
14
  )
15
 
16
+ # Global variables
17
+ ARCHITECTURE_VIEW_TYPE = 0 # 0 for SVG, 1 for Mermaid
18
+
19
  # Password protection
20
  def check_password():
21
  def password_entered():
 
49
  st.session_state.current_page = 0
50
 
51
  if 'answers' not in st.session_state:
52
+ st.session_state.answers = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Define the content for each page
55
  pages = [
56
  {
57
+ 'title': "Beyond Basic T's & C's",
58
+ 'content': "This section evaluates the Acceptable Use Policy of AI solutions.",
59
+ 'type': 'description',
60
+ 'image': "path_to_image.jpg",
61
+ 'expander_content': "Additional information about Acceptable Use Policy"
 
 
 
 
 
 
 
62
  },
63
  {
64
+ 'title': "Case Study 1",
65
+ 'type': 'quiz',
66
+ 'image': "path_to_case1_image.jpg",
67
+ 'description': "Description of Case Study 1",
68
+ 'input_key': 'case1_answer',
69
+ 'slider_key': 'case1_risk',
70
+ 'expander_image': "path_to_tc_snippet1.jpg"
 
 
 
 
 
 
71
  },
72
  {
73
+ 'title': "Case Study 2",
74
+ 'type': 'quiz',
75
+ 'image': "path_to_case2_image.jpg",
76
+ 'description': "Description of Case Study 2",
77
+ 'input_key': 'case2_answer',
78
+ 'slider_key': 'case2_risk',
79
+ 'expander_image': "path_to_tc_snippet2.jpg"
 
80
  },
81
  {
82
+ 'title': "Case Study 3",
83
+ 'type': 'quiz',
84
+ 'image': "path_to_case3_image.jpg",
85
+ 'description': "Description of Case Study 3",
86
+ 'input_key': 'case3_answer',
87
+ 'slider_key': 'case3_risk',
88
+ 'expander_image': "path_to_tc_snippet3.jpg"
 
 
 
 
 
 
 
 
89
  },
90
  {
91
+ 'title': "Solution Robustness",
92
+ 'content': "This section evaluates the robustness of AI solutions.",
93
+ 'type': 'description',
94
+ 'image': "path_to_robustness_image.jpg",
95
+ 'expander_content': "Additional information about Solution Robustness"
 
 
 
 
 
 
 
 
 
 
96
  },
97
  {
98
+ 'title': "Robustness Test 1",
99
+ 'type': 'inference',
100
+ 'prompt_description': "Description of Robustness Test 1",
101
+ 'input_key': 'robustness1_input',
102
+ 'output_key': 'robustness1_output',
103
+ 'placeholder': "Enter your test input here..."
 
 
 
 
 
 
 
 
 
104
  },
105
  {
106
+ 'title': "Robustness Test 2",
107
+ 'type': 'inference',
108
+ 'prompt_description': "Description of Robustness Test 2",
109
+ 'input_key': 'robustness2_input',
110
+ 'output_key': 'robustness2_output',
111
+ 'placeholder': "Enter your test input here..."
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  },
113
  {
114
+ 'title': "Data Security",
115
+ 'content': "This section evaluates the data security aspects of AI solutions.",
116
+ 'type': 'description',
117
+ 'image': "path_to_security_image.jpg",
118
+ 'expander_content': "Additional information about Data Security"
119
+ },
120
+ {
121
+ 'title': "Security Analysis 1",
122
+ 'type': 'architecture',
123
+ 'svg_path': "path_to_architecture_svg1.svg",
124
+ 'mermaid_code': """
125
+ graph TD
126
+ A[Client] --> B[Load Balancer]
127
+ B --> C[Web Server]
128
+ C --> D[Application Server]
129
+ D --> E[Database]
130
  """,
131
+ 'expander_content': "Explanation of the solution architecture",
132
+ 'input_key': 'security1_analysis'
 
 
 
 
 
 
 
 
 
 
 
133
  },
134
  {
135
+ 'title': "Security Analysis 2",
136
+ 'type': 'architecture',
137
+ 'svg_path': "path_to_architecture_svg2.svg",
138
+ 'mermaid_code': """
139
+ graph TD
140
+ A[User] --> B[API Gateway]
141
+ B --> C[Authentication Service]
142
+ B --> D[Data Processing Service]
143
+ D --> E[Storage Service]
144
  """,
145
+ 'expander_content': "Explanation of the solution architecture",
146
+ 'input_key': 'security2_analysis'
 
 
 
 
 
 
 
 
 
 
 
 
147
  },
148
  {
149
  'title': "Generate Evaluation Report",
150
+ 'content': "You have completed the Critical Infrastructure - LLM Considerations evaluation. Click the button below to generate and download your PDF report.",
151
+ 'type': 'report'
152
  }
153
  ]
154
 
 
163
  st.session_state.current_page -= 1
164
  st.rerun()
165
  with col3:
166
+ if st.session_state.current_page < len(pages) - 1:
167
  if st.button("Next", use_container_width=True):
168
  st.session_state.current_page += 1
169
  st.rerun()
 
171
  # Display current page
172
  current_page = pages[st.session_state.current_page]
173
  st.header(current_page['title'])
 
174
 
175
+ if current_page['type'] == 'description':
176
+ st.write(current_page['content'])
177
+ st.image(current_page['image'])
178
+ with st.expander("Learn More"):
179
+ st.write(current_page['expander_content'])
180
+
181
+ elif current_page['type'] == 'quiz':
182
+ col1, col2 = st.columns(2)
183
+ with col1:
184
+ st.image(current_page['image'])
185
+ st.write(current_page['description'])
186
+ with col2:
187
  st.session_state.answers[current_page['input_key']] = st.text_area(
188
+ "Your analysis:",
 
189
  key=current_page['input_key'],
190
+ height=200
191
  )
192
+ st.session_state.answers[current_page['slider_key']] = st.slider(
193
+ "Risk Level",
194
+ 1, 10,
195
+ key=current_page['slider_key']
 
196
  )
197
+ with st.expander("View T&C Snippet"):
198
+ st.image(current_page['expander_image'])
199
+
200
+ elif current_page['type'] == 'inference':
201
+ col1, col2 = st.columns(2)
202
+ with col1:
203
+ with st.expander("Prompt Description"):
204
+ st.write(current_page['prompt_description'])
205
+ user_input = st.text_area(
206
+ "Enter your test input:",
207
+ key=current_page['input_key'],
208
+ height=200,
209
+ placeholder=current_page['placeholder']
210
+ )
211
+ if st.button("Infer"):
212
+ st.session_state.answers[current_page['input_key']] = user_input
213
+ st.session_state.answers[current_page['output_key']] = generate_inference(user_input)
214
+ with col2:
215
+ if current_page['output_key'] in st.session_state.answers:
216
+ st.write("Inference Result:")
217
+ st.write(st.session_state.answers[current_page['output_key']])
218
+
219
+ elif current_page['type'] == 'architecture':
220
+ col1, col2 = st.columns(2)
221
+ with col1:
222
+ if ARCHITECTURE_VIEW_TYPE == 0:
223
+ st.image(current_page['svg_path'])
224
+ else:
225
+ stmd.st_mermaid(current_page['mermaid_code'])
226
+ with st.expander("Solution Explanation"):
227
+ st.write(current_page['expander_content'])
228
+ with col2:
229
+ st.session_state.answers[current_page['input_key']] = st.text_area(
230
+ "Your security analysis:",
231
+ key=current_page['input_key'],
232
+ height=300
233
  )
234
 
235
+ elif current_page['type'] == 'report':
236
+ st.write(current_page['content'])
 
 
 
 
 
237
  if st.button("Generate and Download PDF", use_container_width=True):
238
  pdf = generate_pdf(pages, st.session_state.answers)
239
  st.download_button(
 
247
  # Display progress
248
  st.progress((st.session_state.current_page + 1) / len(pages))
249
 
250
+ st.divider()
251
+ st.info("Developed by Milan Mrdenovic © IBM Norway 2024")