Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
from io import BytesIO
|
3 |
import time
|
4 |
import re
|
5 |
from report_gen import generate_pdf
|
|
|
6 |
|
7 |
# Set page config
|
8 |
st.set_page_config(
|
@@ -11,6 +13,9 @@ st.set_page_config(
|
|
11 |
initial_sidebar_state="collapsed"
|
12 |
)
|
13 |
|
|
|
|
|
|
|
14 |
# Password protection
|
15 |
def check_password():
|
16 |
def password_entered():
|
@@ -44,195 +49,106 @@ if 'current_page' not in st.session_state:
|
|
44 |
st.session_state.current_page = 0
|
45 |
|
46 |
if 'answers' not in st.session_state:
|
47 |
-
st.session_state.answers = {
|
48 |
-
'what_evaluating': '',
|
49 |
-
'stakeholder_groups': '',
|
50 |
-
'intentional_concealment': '',
|
51 |
-
'technological_literacy': '',
|
52 |
-
'cognitive_mismatch': '',
|
53 |
-
'trust_focus': '',
|
54 |
-
'trust_source': '',
|
55 |
-
'trust_warranted': '',
|
56 |
-
'trust_conclusion': ''
|
57 |
-
}
|
58 |
-
|
59 |
-
def format_text(text):
|
60 |
-
# Make text before colon bold
|
61 |
-
text = re.sub(r'(^|[\n\r])([^:\n\r]+):', r'\1##\2**:', text)
|
62 |
-
|
63 |
-
# Make text in parentheses italic
|
64 |
-
text = re.sub(r'\(([^)]+)\)', r'*(\1)*', text)
|
65 |
-
|
66 |
-
return text
|
67 |
|
68 |
# Define the content for each page
|
69 |
pages = [
|
70 |
{
|
71 |
-
'title': "
|
72 |
-
'content': ""
|
73 |
-
|
74 |
-
|
75 |
-
""
|
76 |
-
'input_key': 'what_evaluating',
|
77 |
-
'input_type': 'radio',
|
78 |
-
'options': ['Overall Solution', 'Foundation Model'],
|
79 |
-
'example': """
|
80 |
-
Solution: Evaluate a gen.ai chatbot for extracting precedent from legal documents for a law firm.
|
81 |
-
Foundation Model: Evaluate what part Llama2 as a model in the chatbot may play in the risks.
|
82 |
-
"""
|
83 |
},
|
84 |
{
|
85 |
-
'title': "
|
86 |
-
'
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
'
|
91 |
-
'
|
92 |
-
'example': """
|
93 |
-
Gen.ai chatbot solution for law firm example:
|
94 |
-
Group 1 - End-users (attorneys/legal counsellors)
|
95 |
-
Group 2 - IT Department (providing the tooling)
|
96 |
-
Group 3 - End-clients (the plaintiffs or defendants)
|
97 |
-
"""
|
98 |
},
|
99 |
{
|
100 |
-
'title': "
|
101 |
-
'
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
"""
|
108 |
},
|
109 |
{
|
110 |
-
'title': "
|
111 |
-
'
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
""
|
117 |
-
'input_key': 'intentional_concealment',
|
118 |
-
'input_type': 'text_area',
|
119 |
-
'example': """
|
120 |
-
Gen.ai chatbot solution for law firm example:
|
121 |
-
Concealment 1: They don't let people audit the training data.
|
122 |
-
Concealment 2: We can't deploy the model ourselves, only consume it from them via an endpoint.
|
123 |
-
Transparency 1: They published an academic paper on how the model was developed.
|
124 |
-
"""
|
125 |
},
|
126 |
{
|
127 |
-
'title': "
|
128 |
-
'content': ""
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
to how a person may know how to use a smartphone, but not to code an app for one, or to engineer a phone themselves.
|
133 |
-
""",
|
134 |
-
'input_key': 'technological_literacy',
|
135 |
-
'input_type': 'text_area',
|
136 |
-
'example': """
|
137 |
-
Gen.ai chatbot solution for law firm example:
|
138 |
-
Group 1 (Attorneys) - Yes, Yes : They should be able to evaluate the output if it seems abnormal due to their domain expertise, but may not understand how the solution works in the background.
|
139 |
-
Group 2 (IT Department) - Yes, Yes : They have used foundation models for other tasks before to create other solutions.
|
140 |
-
Group 3 (Plaintiffs/Defendants) - Yes, No : They aren't familiar with legal precedents, that's why they hired our client to help them. They need to trust their expertise but less so the solution.
|
141 |
-
"""
|
142 |
},
|
143 |
{
|
144 |
-
'title': "
|
145 |
-
'
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
""",
|
151 |
-
'input_key': 'cognitive_mismatch',
|
152 |
-
'input_type': 'text_area',
|
153 |
-
'example': """
|
154 |
-
Gen.ai chatbot solution for law firm example:
|
155 |
-
Group 1 - Yes: The chatbot invents a new type of precedent by hallucinating, but it turns out that it is a real thing that we never expected or noticed!?
|
156 |
-
Group 2 - Yes: The government wants us to provide an explanation of why a model said what it said to a client and ruined their life, how do I do that when it has 70 billion parameters!?
|
157 |
-
Group 3 - Yes: In the meeting the chatbot starts to tell the client that they will suffer 40 years of jail time for causing the death of the Dalai Lama because they didn't pay the taxes.
|
158 |
-
"""
|
159 |
},
|
160 |
{
|
161 |
-
'title': "
|
162 |
-
'
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
frail than intrinsic trust.
|
168 |
-
|
169 |
-
Which form of trust do we want to focus on, and what is the source of that trust? Can it be further strengthened?
|
170 |
-
""",
|
171 |
-
'input_key': 'trust_focus',
|
172 |
-
'input_type': 'combined',
|
173 |
-
'options': ['Intrinsic trust', 'Extrinsic trust'],
|
174 |
-
'example': """
|
175 |
-
Gen.ai chatbot solution for law firm example:
|
176 |
-
We choose to rely on Intrinsic Trust in this case. Because our client's customers come to them because they trust them, while the IT department trusts that they can handle this solution and we trust the expertise of our lawyers to handle any anomalies. But we can also provide the IT department with tools to help them maintain their intrinsic trust in their work by monitoring it.
|
177 |
-
|
178 |
-
Source of Trust: The intrinsic trust comes from the lawyers' expertise in their field and the IT department's familiarity with similar systems. We can further strengthen this by providing regular training sessions on the AI system and its limitations, as well as implementing a robust feedback mechanism for continuous improvement.
|
179 |
-
"""
|
180 |
},
|
181 |
{
|
182 |
-
'title': "
|
183 |
-
'content': ""
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
|
|
|
|
|
|
195 |
""",
|
196 |
-
'
|
197 |
-
'
|
198 |
-
'options': ['Trust is warranted', 'Trust is unwarranted'],
|
199 |
-
'example': """
|
200 |
-
Gen.ai chatbot solution for law firm example:
|
201 |
-
Conclusion: We believe that trust in our solution is Warranted when we consider the subject matter expertise of our clients internal legal and IT experts, as well as our recommendation of using some watsonx.governance tooling to help bolster the IT department's intrinsic trust in the solution and reduce cognitive mismatch opacity.
|
202 |
-
|
203 |
-
This trust is warranted because:
|
204 |
-
1. The legal experts (end-users) have the domain knowledge to critically evaluate the chatbot's outputs.
|
205 |
-
2. The IT department has experience with similar AI systems and will be equipped with additional monitoring tools.
|
206 |
-
3. We've identified potential opacity issues and have plans to address them, such as using explainable AI techniques.
|
207 |
-
4. The solution will be continuously monitored and improved based on feedback from all stakeholder groups.
|
208 |
-
"""
|
209 |
},
|
210 |
{
|
211 |
-
'title': "
|
212 |
-
'
|
213 |
-
|
214 |
-
|
215 |
-
|
|
|
|
|
|
|
|
|
216 |
""",
|
217 |
-
'
|
218 |
-
'
|
219 |
-
'example': """
|
220 |
-
To improve trust in our example gen.ai chatbot for the law firm:
|
221 |
-
|
222 |
-
1. Transparency: Develop a clear, non-technical explanation of how the AI works for all stakeholders.
|
223 |
-
2. Training: Provide regular training sessions for attorneys on how to effectively use and critically evaluate the chatbot's outputs.
|
224 |
-
3. Feedback Loop: Implement a robust feedback mechanism where users can flag incorrect or concerning outputs, feeding into continuous improvement.
|
225 |
-
4. Explainability: Integrate explainable AI techniques to provide rationale for the chatbot's recommendations, addressing cognitive mismatch.
|
226 |
-
5. Auditing: Establish regular audits of the system's performance and decision-making processes, sharing results with stakeholders.
|
227 |
-
6. Ethical Guidelines: Develop and prominently display clear ethical guidelines for the AI's use in legal contexts.
|
228 |
-
7. Collaboration: Foster ongoing collaboration between legal experts and IT teams to bridge the technological literacy gap.
|
229 |
-
8. Gradual Rollout: Implement the solution in phases, allowing time for trust to build and for refinements based on real-world use.
|
230 |
-
"""
|
231 |
},
|
232 |
{
|
233 |
'title': "Generate Evaluation Report",
|
234 |
-
'content': "You have completed the
|
235 |
-
'
|
236 |
}
|
237 |
]
|
238 |
|
@@ -247,7 +163,7 @@ with col1:
|
|
247 |
st.session_state.current_page -= 1
|
248 |
st.rerun()
|
249 |
with col3:
|
250 |
-
if st.session_state.current_page < len(pages) - 1:
|
251 |
if st.button("Next", use_container_width=True):
|
252 |
st.session_state.current_page += 1
|
253 |
st.rerun()
|
@@ -255,43 +171,69 @@ with col3:
|
|
255 |
# Display current page
|
256 |
current_page = pages[st.session_state.current_page]
|
257 |
st.header(current_page['title'])
|
258 |
-
st.markdown(current_page['content'])
|
259 |
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
269 |
st.session_state.answers[current_page['input_key']] = st.text_area(
|
270 |
-
"Your
|
271 |
-
value=st.session_state.answers.get(current_page['input_key'], ""),
|
272 |
key=current_page['input_key'],
|
273 |
-
height=
|
274 |
)
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
current_page['
|
279 |
-
key=f"{current_page['input_key']}_radio"
|
280 |
)
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
)
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
with st.expander("Reveal Example"):
|
291 |
-
st.markdown(current_page['example'])
|
292 |
-
|
293 |
-
# Generate PDF button (only on the last page)
|
294 |
-
if st.session_state.current_page == len(pages) - 1:
|
295 |
if st.button("Generate and Download PDF", use_container_width=True):
|
296 |
pdf = generate_pdf(pages, st.session_state.answers)
|
297 |
st.download_button(
|
@@ -305,5 +247,5 @@ if st.session_state.current_page == len(pages) - 1:
|
|
305 |
# Display progress
|
306 |
st.progress((st.session_state.current_page + 1) / len(pages))
|
307 |
|
308 |
-
|
309 |
-
|
|
|
1 |
import streamlit as st
|
2 |
+
import streamlit_mermaid as stmd
|
3 |
from io import BytesIO
|
4 |
import time
|
5 |
import re
|
6 |
from report_gen import generate_pdf
|
7 |
+
from sol_inf import generate_inference # Assuming this function exists in sol_inf.py
|
8 |
|
9 |
# Set page config
|
10 |
st.set_page_config(
|
|
|
13 |
initial_sidebar_state="collapsed"
|
14 |
)
|
15 |
|
16 |
+
# Global variables
|
17 |
+
ARCHITECTURE_VIEW_TYPE = 0 # 0 for SVG, 1 for Mermaid
|
18 |
+
|
19 |
# Password protection
|
20 |
def check_password():
|
21 |
def password_entered():
|
|
|
49 |
st.session_state.current_page = 0
|
50 |
|
51 |
if 'answers' not in st.session_state:
|
52 |
+
st.session_state.answers = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
# Define the content for each page
|
55 |
pages = [
|
56 |
{
|
57 |
+
'title': "Beyond Basic T's & C's",
|
58 |
+
'content': "This section evaluates the Acceptable Use Policy of AI solutions.",
|
59 |
+
'type': 'description',
|
60 |
+
'image': "path_to_image.jpg",
|
61 |
+
'expander_content': "Additional information about Acceptable Use Policy"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
},
|
63 |
{
|
64 |
+
'title': "Case Study 1",
|
65 |
+
'type': 'quiz',
|
66 |
+
'image': "path_to_case1_image.jpg",
|
67 |
+
'description': "Description of Case Study 1",
|
68 |
+
'input_key': 'case1_answer',
|
69 |
+
'slider_key': 'case1_risk',
|
70 |
+
'expander_image': "path_to_tc_snippet1.jpg"
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
},
|
72 |
{
|
73 |
+
'title': "Case Study 2",
|
74 |
+
'type': 'quiz',
|
75 |
+
'image': "path_to_case2_image.jpg",
|
76 |
+
'description': "Description of Case Study 2",
|
77 |
+
'input_key': 'case2_answer',
|
78 |
+
'slider_key': 'case2_risk',
|
79 |
+
'expander_image': "path_to_tc_snippet2.jpg"
|
|
|
80 |
},
|
81 |
{
|
82 |
+
'title': "Case Study 3",
|
83 |
+
'type': 'quiz',
|
84 |
+
'image': "path_to_case3_image.jpg",
|
85 |
+
'description': "Description of Case Study 3",
|
86 |
+
'input_key': 'case3_answer',
|
87 |
+
'slider_key': 'case3_risk',
|
88 |
+
'expander_image': "path_to_tc_snippet3.jpg"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
},
|
90 |
{
|
91 |
+
'title': "Solution Robustness",
|
92 |
+
'content': "This section evaluates the robustness of AI solutions.",
|
93 |
+
'type': 'description',
|
94 |
+
'image': "path_to_robustness_image.jpg",
|
95 |
+
'expander_content': "Additional information about Solution Robustness"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
},
|
97 |
{
|
98 |
+
'title': "Robustness Test 1",
|
99 |
+
'type': 'inference',
|
100 |
+
'prompt_description': "Description of Robustness Test 1",
|
101 |
+
'input_key': 'robustness1_input',
|
102 |
+
'output_key': 'robustness1_output',
|
103 |
+
'placeholder': "Enter your test input here..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
},
|
105 |
{
|
106 |
+
'title': "Robustness Test 2",
|
107 |
+
'type': 'inference',
|
108 |
+
'prompt_description': "Description of Robustness Test 2",
|
109 |
+
'input_key': 'robustness2_input',
|
110 |
+
'output_key': 'robustness2_output',
|
111 |
+
'placeholder': "Enter your test input here..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
},
|
113 |
{
|
114 |
+
'title': "Data Security",
|
115 |
+
'content': "This section evaluates the data security aspects of AI solutions.",
|
116 |
+
'type': 'description',
|
117 |
+
'image': "path_to_security_image.jpg",
|
118 |
+
'expander_content': "Additional information about Data Security"
|
119 |
+
},
|
120 |
+
{
|
121 |
+
'title': "Security Analysis 1",
|
122 |
+
'type': 'architecture',
|
123 |
+
'svg_path': "path_to_architecture_svg1.svg",
|
124 |
+
'mermaid_code': """
|
125 |
+
graph TD
|
126 |
+
A[Client] --> B[Load Balancer]
|
127 |
+
B --> C[Web Server]
|
128 |
+
C --> D[Application Server]
|
129 |
+
D --> E[Database]
|
130 |
""",
|
131 |
+
'expander_content': "Explanation of the solution architecture",
|
132 |
+
'input_key': 'security1_analysis'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
},
|
134 |
{
|
135 |
+
'title': "Security Analysis 2",
|
136 |
+
'type': 'architecture',
|
137 |
+
'svg_path': "path_to_architecture_svg2.svg",
|
138 |
+
'mermaid_code': """
|
139 |
+
graph TD
|
140 |
+
A[User] --> B[API Gateway]
|
141 |
+
B --> C[Authentication Service]
|
142 |
+
B --> D[Data Processing Service]
|
143 |
+
D --> E[Storage Service]
|
144 |
""",
|
145 |
+
'expander_content': "Explanation of the solution architecture",
|
146 |
+
'input_key': 'security2_analysis'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
},
|
148 |
{
|
149 |
'title': "Generate Evaluation Report",
|
150 |
+
'content': "You have completed the Critical Infrastructure - LLM Considerations evaluation. Click the button below to generate and download your PDF report.",
|
151 |
+
'type': 'report'
|
152 |
}
|
153 |
]
|
154 |
|
|
|
163 |
st.session_state.current_page -= 1
|
164 |
st.rerun()
|
165 |
with col3:
|
166 |
+
if st.session_state.current_page < len(pages) - 1:
|
167 |
if st.button("Next", use_container_width=True):
|
168 |
st.session_state.current_page += 1
|
169 |
st.rerun()
|
|
|
171 |
# Display current page
|
172 |
current_page = pages[st.session_state.current_page]
|
173 |
st.header(current_page['title'])
|
|
|
174 |
|
175 |
+
if current_page['type'] == 'description':
|
176 |
+
st.write(current_page['content'])
|
177 |
+
st.image(current_page['image'])
|
178 |
+
with st.expander("Learn More"):
|
179 |
+
st.write(current_page['expander_content'])
|
180 |
+
|
181 |
+
elif current_page['type'] == 'quiz':
|
182 |
+
col1, col2 = st.columns(2)
|
183 |
+
with col1:
|
184 |
+
st.image(current_page['image'])
|
185 |
+
st.write(current_page['description'])
|
186 |
+
with col2:
|
187 |
st.session_state.answers[current_page['input_key']] = st.text_area(
|
188 |
+
"Your analysis:",
|
|
|
189 |
key=current_page['input_key'],
|
190 |
+
height=200
|
191 |
)
|
192 |
+
st.session_state.answers[current_page['slider_key']] = st.slider(
|
193 |
+
"Risk Level",
|
194 |
+
1, 10,
|
195 |
+
key=current_page['slider_key']
|
|
|
196 |
)
|
197 |
+
with st.expander("View T&C Snippet"):
|
198 |
+
st.image(current_page['expander_image'])
|
199 |
+
|
200 |
+
elif current_page['type'] == 'inference':
|
201 |
+
col1, col2 = st.columns(2)
|
202 |
+
with col1:
|
203 |
+
with st.expander("Prompt Description"):
|
204 |
+
st.write(current_page['prompt_description'])
|
205 |
+
user_input = st.text_area(
|
206 |
+
"Enter your test input:",
|
207 |
+
key=current_page['input_key'],
|
208 |
+
height=200,
|
209 |
+
placeholder=current_page['placeholder']
|
210 |
+
)
|
211 |
+
if st.button("Infer"):
|
212 |
+
st.session_state.answers[current_page['input_key']] = user_input
|
213 |
+
st.session_state.answers[current_page['output_key']] = generate_inference(user_input)
|
214 |
+
with col2:
|
215 |
+
if current_page['output_key'] in st.session_state.answers:
|
216 |
+
st.write("Inference Result:")
|
217 |
+
st.write(st.session_state.answers[current_page['output_key']])
|
218 |
+
|
219 |
+
elif current_page['type'] == 'architecture':
|
220 |
+
col1, col2 = st.columns(2)
|
221 |
+
with col1:
|
222 |
+
if ARCHITECTURE_VIEW_TYPE == 0:
|
223 |
+
st.image(current_page['svg_path'])
|
224 |
+
else:
|
225 |
+
stmd.st_mermaid(current_page['mermaid_code'])
|
226 |
+
with st.expander("Solution Explanation"):
|
227 |
+
st.write(current_page['expander_content'])
|
228 |
+
with col2:
|
229 |
+
st.session_state.answers[current_page['input_key']] = st.text_area(
|
230 |
+
"Your security analysis:",
|
231 |
+
key=current_page['input_key'],
|
232 |
+
height=300
|
233 |
)
|
234 |
|
235 |
+
elif current_page['type'] == 'report':
|
236 |
+
st.write(current_page['content'])
|
|
|
|
|
|
|
|
|
|
|
237 |
if st.button("Generate and Download PDF", use_container_width=True):
|
238 |
pdf = generate_pdf(pages, st.session_state.answers)
|
239 |
st.download_button(
|
|
|
247 |
# Display progress
|
248 |
st.progress((st.session_state.current_page + 1) / len(pages))
|
249 |
|
250 |
+
st.divider()
|
251 |
+
st.info("Developed by Milan Mrdenovic © IBM Norway 2024")
|