arthrod commited on
Commit
8b4fb42
·
verified ·
1 Parent(s): 0402432

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +744 -344
app.py CHANGED
@@ -1,408 +1,808 @@
1
  import gradio as gr
2
  import json
3
- import pandas as pd
4
  from datetime import datetime
5
  import os
 
6
 
7
- class PreferenceCollector:
8
- def __init__(self, jsonl_file):
9
- self.jsonl_file = jsonl_file
10
  self.data = []
11
  self.current_index = 0
12
- self.preferences = {}
13
- self.load_data()
 
 
14
 
15
- def load_data(self):
 
 
 
 
 
 
 
16
  """Load data from JSONL file"""
 
17
  try:
18
- with open(self.jsonl_file, 'r') as f:
19
  for line in f:
20
  self.data.append(json.loads(line))
21
- except FileNotFoundError:
22
- print(f"File {self.jsonl_file} not found!")
23
- self.data = []
24
-
25
- def get_current_question(self):
26
- """Get the current question data"""
27
- if 0 <= self.current_index < len(self.data):
28
- return self.data[self.current_index]
29
- return None
 
 
 
 
 
 
 
 
 
30
 
31
- def record_preference(self, sample_choice):
32
- """Record user's preference for current question"""
33
- if sample_choice in ["sample_zero", "sample_one", "sample_two"]:
34
- self.preferences[self.current_index] = {
35
  "question_index": self.current_index,
36
- "preferred_sample": sample_choice,
37
  "timestamp": datetime.now().isoformat()
38
  }
39
  return True
40
  return False
41
 
42
- def save_results(self, filename="preferences_results.json"):
43
- """Save preference results to file"""
44
- with open(filename, 'w') as f:
45
- json.dump(self.preferences, f, indent=2)
 
 
 
 
 
46
 
47
- # Also create a summary
48
- summary = {
49
- "total_questions": len(self.data),
50
- "answered_questions": len(self.preferences),
51
- "sample_zero_count": sum(1 for p in self.preferences.values() if p["preferred_sample"] == "sample_zero"),
52
- "sample_one_count": sum(1 for p in self.preferences.values() if p["preferred_sample"] == "sample_one"),
53
- "sample_two_count": sum(1 for p in self.preferences.values() if p["preferred_sample"] == "sample_two")
54
- }
55
 
56
- with open("preferences_summary.json", 'w') as f:
57
- json.dump(summary, f, indent=2)
 
 
 
58
 
59
- return summary
60
-
61
- # Initialize the preference collector
62
- collector = None
63
- selected_sample = None
64
-
65
- def initialize_collector(file_path):
66
- """Initialize the collector with a file path"""
67
- global collector
68
- if file_path:
69
- collector = PreferenceCollector(file_path.name)
70
- if collector.data:
71
- return update_display()
72
- else:
73
- return (
74
- "No data loaded. Please check your file.",
75
- "", "", "", "",
76
- gr.update(interactive=False),
77
- gr.update(interactive=False),
78
- gr.update(interactive=False),
79
- gr.update(interactive=False),
80
- gr.update(interactive=False),
81
- "0 / 0"
82
- )
83
- return (
84
- "Please upload a JSONL file to begin.",
85
- "", "", "", "",
86
- gr.update(interactive=False),
87
- gr.update(interactive=False),
88
- gr.update(interactive=False),
89
- gr.update(interactive=False),
90
- gr.update(interactive=False),
91
- "0 / 0"
92
- )
93
-
94
- def update_display():
95
- """Update the display with current question data"""
96
- global collector, selected_sample
97
-
98
- if not collector or not collector.data:
99
- return (
100
- "No data loaded",
101
- "", "", "", "",
102
- gr.update(interactive=False),
103
- gr.update(interactive=False),
104
- gr.update(interactive=False),
105
- gr.update(interactive=False),
106
- gr.update(interactive=False),
107
- "0 / 0"
108
- )
109
-
110
- question = collector.get_current_question()
111
- if not question:
112
- return (
113
- "No more questions",
114
- "", "", "", "",
115
- gr.update(interactive=False),
116
- gr.update(interactive=False),
117
- gr.update(interactive=False),
118
- gr.update(interactive=False),
119
- gr.update(interactive=False),
120
- f"{collector.current_index + 1} / {len(collector.data)}"
121
- )
122
-
123
- # Reset selected sample
124
- selected_sample = None
125
-
126
- # Check if this question has been answered before
127
- is_answered = collector.current_index in collector.preferences
128
 
129
- return (
130
- question.get("introductory_example", "No introductory example"),
131
- question.get("sample_zero", "No sample zero"),
132
- question.get("sample_one", "No sample one"),
133
- question.get("sample_two", "No sample two"),
134
- f"{'✓ Answered' if is_answered else 'Not answered yet'}",
135
- gr.update(interactive=True, variant="primary" if selected_sample == "sample_zero" else "secondary"),
136
- gr.update(interactive=True, variant="primary" if selected_sample == "sample_one" else "secondary"),
137
- gr.update(interactive=True, variant="primary" if selected_sample == "sample_two" else "secondary"),
138
- gr.update(interactive=collector.current_index > 0),
139
- gr.update(interactive=collector.current_index < len(collector.data) - 1),
140
- f"{collector.current_index + 1} / {len(collector.data)}"
141
- )
 
 
 
 
 
 
 
 
142
 
143
- def select_sample(sample_name):
144
- """Handle sample selection"""
145
- global selected_sample
146
- selected_sample = sample_name
147
-
148
- # Update button variants to show selection
149
- return (
150
- gr.update(variant="primary" if sample_name == "sample_zero" else "secondary"),
151
- gr.update(variant="primary" if sample_name == "sample_one" else "secondary"),
152
- gr.update(variant="primary" if sample_name == "sample_two" else "secondary"),
153
- gr.update(interactive=True) # Enable confirm button
154
- )
155
 
156
- def confirm_selection():
157
- """Confirm the current selection"""
158
- global collector, selected_sample
159
-
160
- if not collector or not selected_sample:
161
- return (
162
- gr.update(value="Please select a sample first"),
163
- gr.update(interactive=False)
164
- )
165
-
166
- if collector.record_preference(selected_sample):
167
- # Auto-advance to next question if not the last one
168
- if collector.current_index < len(collector.data) - 1:
169
- collector.current_index += 1
170
- return (
171
- gr.update(value="✓ Selection recorded! Moving to next question..."),
172
- gr.update(interactive=False),
173
- *update_display()
174
- )
175
- else:
176
- return (
177
- gr.update(value="✓ Selection recorded! You've completed all questions."),
178
- gr.update(interactive=False),
179
- *update_display()
180
- )
181
-
182
- return (
183
- gr.update(value="Error recording selection"),
184
- gr.update(interactive=False)
185
- )
186
 
187
- def previous_question():
188
- """Go to previous question"""
189
- global collector
190
- if collector and collector.current_index > 0:
191
- collector.current_index -= 1
192
- return update_display()
193
- return update_display()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- def next_question():
196
- """Go to next question"""
197
- global collector
198
- if collector and collector.current_index < len(collector.data) - 1:
199
- collector.current_index += 1
200
- return update_display()
201
- return update_display()
202
-
203
- def export_results():
204
- """Export the results"""
205
- global collector
206
- if collector:
207
- summary = collector.save_results()
208
- return f"""Results exported successfully!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- Total questions: {summary['total_questions']}
211
- Answered: {summary['answered_questions']}
212
-
213
- Preferences:
214
- - Sample 0: {summary['sample_zero_count']} votes
215
- - Sample 1: {summary['sample_one_count']} votes
216
- - Sample 2: {summary['sample_two_count']} votes
217
-
218
- Files saved: preferences_results.json, preferences_summary.json"""
219
- return "No data to export"
220
-
221
- # Create the Gradio interface
222
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
223
- gr.Markdown(
224
- """
225
- # 📊 Preference Collection Interface
226
 
227
- Upload a JSONL file and select your preferred sample for each question.
228
- """
229
- )
230
-
231
- with gr.Row():
232
- file_input = gr.File(
233
- label="Upload JSONL File",
234
- file_types=[".jsonl"],
235
- type="filepath"
236
- )
237
- load_btn = gr.Button("Load File", variant="primary")
238
-
239
- gr.Markdown("---")
240
-
241
- # Question display
242
- with gr.Column():
243
- intro_display = gr.Textbox(
244
- label="📝 Question / Introductory Example",
245
- lines=3,
246
- interactive=False
247
- )
248
 
249
- progress_label = gr.Textbox(
250
- label="Progress",
251
- value="0 / 0",
252
- interactive=False,
253
- max_lines=1
254
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
- status_label = gr.Textbox(
257
- label="Status",
258
- value="Not answered yet",
259
- interactive=False,
260
- max_lines=1
261
- )
262
-
263
- gr.Markdown("### 🤔 Choose your preferred sample:")
264
-
265
- # Sample displays
266
- with gr.Row():
267
- with gr.Column():
268
- sample_0_display = gr.Textbox(
269
- label="Sample 0",
270
- lines=6,
271
- interactive=False
272
- )
273
- select_0_btn = gr.Button(
274
- "Select Sample 0",
275
- variant="secondary",
276
- interactive=False
277
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
- with gr.Column():
280
- sample_1_display = gr.Textbox(
281
- label="Sample 1",
282
- lines=6,
283
- interactive=False
284
- )
285
- select_1_btn = gr.Button(
286
- "Select Sample 1",
287
- variant="secondary",
288
- interactive=False
289
- )
290
 
291
- with gr.Column():
292
- sample_2_display = gr.Textbox(
293
- label="Sample 2",
294
- lines=6,
295
- interactive=False
296
- )
297
- select_2_btn = gr.Button(
298
- "Select Sample 2",
299
- variant="secondary",
300
- interactive=False
301
- )
302
-
303
- # Action buttons
304
- with gr.Row():
305
- prev_btn = gr.Button("← Previous", interactive=False)
306
- confirm_btn = gr.Button("✓ Confirm Selection", variant="primary", interactive=False)
307
- next_btn = gr.Button("Next →", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
- confirm_status = gr.Textbox(label="Action Status", interactive=False)
 
310
 
311
- gr.Markdown("---")
 
 
 
 
 
312
 
313
- # Export section
314
- with gr.Row():
315
- export_btn = gr.Button("📥 Export Results", variant="primary")
316
- export_status = gr.Textbox(label="Export Status", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
  # Event handlers
319
- load_btn.click(
320
- initialize_collector,
321
- inputs=[file_input],
322
- outputs=[
323
- intro_display, sample_0_display, sample_1_display, sample_2_display,
324
- status_label, select_0_btn, select_1_btn, select_2_btn,
325
- prev_btn, next_btn, progress_label
326
- ]
327
- )
328
-
329
- select_0_btn.click(
330
- lambda: select_sample("sample_zero"),
331
- outputs=[select_0_btn, select_1_btn, select_2_btn, confirm_btn]
332
- )
333
-
334
- select_1_btn.click(
335
- lambda: select_sample("sample_one"),
336
- outputs=[select_0_btn, select_1_btn, select_2_btn, confirm_btn]
337
- )
338
-
339
- select_2_btn.click(
340
- lambda: select_sample("sample_two"),
341
- outputs=[select_0_btn, select_1_btn, select_2_btn, confirm_btn]
342
  )
343
 
344
  confirm_btn.click(
345
- confirm_selection,
346
- outputs=[
347
- confirm_status, confirm_btn,
348
- intro_display, sample_0_display, sample_1_display, sample_2_display,
349
- status_label, select_0_btn, select_1_btn, select_2_btn,
350
- prev_btn, next_btn, progress_label
351
- ]
352
  )
353
 
354
  prev_btn.click(
355
- previous_question,
356
- outputs=[
357
- intro_display, sample_0_display, sample_1_display, sample_2_display,
358
- status_label, select_0_btn, select_1_btn, select_2_btn,
359
- prev_btn, next_btn, progress_label
360
- ]
361
  )
362
 
363
  next_btn.click(
364
- next_question,
365
- outputs=[
366
- intro_display, sample_0_display, sample_1_display, sample_2_display,
367
- status_label, select_0_btn, select_1_btn, select_2_btn,
368
- prev_btn, next_btn, progress_label
369
- ]
370
  )
371
 
372
  export_btn.click(
373
- export_results,
374
- outputs=[export_status]
 
 
 
 
 
 
 
 
 
 
 
 
375
  )
376
 
377
- # Launch the app
378
- if __name__ == "__main__":
379
- # Create a sample JSONL file for testing
380
  sample_data = [
381
  {
382
- "introductory_example": "Which response best explains machine learning?",
383
- "sample_zero": "Machine learning is a subset of AI that enables systems to learn and improve from experience without being explicitly programmed.",
384
- "sample_one": "Machine learning involves training computers to recognize patterns in data and make decisions based on those patterns.",
385
- "sample_two": "Machine learning is the science of getting computers to act without being explicitly programmed by using algorithms and statistical models."
386
  },
387
  {
388
- "introductory_example": "How would you describe cloud computing?",
389
- "sample_zero": "Cloud computing delivers computing services over the internet, including servers, storage, databases, and software.",
390
- "sample_one": "Cloud computing is the on-demand availability of computer system resources, especially data storage and computing power.",
391
- "sample_two": "Cloud computing provides shared processing resources and data to computers and other devices on demand via the internet."
392
  },
393
  {
394
- "introductory_example": "What is the best explanation of blockchain?",
395
- "sample_zero": "Blockchain is a distributed ledger technology that records transactions across many computers in a way that cannot be altered retroactively.",
396
- "sample_one": "Blockchain is a system of recording information in a way that makes it difficult to change, hack, or cheat the system.",
397
- "sample_two": "Blockchain is a decentralized, distributed database that maintains a continuously growing list of ordered records called blocks."
398
  }
399
  ]
400
 
401
- # Save sample data
402
- with open("sample_questions.jsonl", "w") as f:
403
  for item in sample_data:
404
  f.write(json.dumps(item) + "\n")
405
-
406
- print("Sample file 'sample_questions.jsonl' created!")
407
-
408
  demo.launch(share=False)
 
1
  import gradio as gr
2
  import json
3
+ import uuid
4
  from datetime import datetime
5
  import os
6
+ import pandas as pd
7
 
8
+ class LegalTechEvaluator:
9
+ def __init__(self):
 
10
  self.data = []
11
  self.current_index = 0
12
+ self.user_sessions = {}
13
+ self.current_user_id = None
14
+ self.current_user_name = None
15
+ self.auto_load_data()
16
 
17
+ def auto_load_data(self):
18
+ """Automatically load test_legal_tech.jsonl if it exists"""
19
+ if os.path.exists("test_legal_tech.jsonl"):
20
+ self.load_from_file("test_legal_tech.jsonl")
21
+ return True
22
+ return False
23
+
24
+ def load_from_file(self, filename):
25
  """Load data from JSONL file"""
26
+ self.data = []
27
  try:
28
+ with open(filename, 'r') as f:
29
  for line in f:
30
  self.data.append(json.loads(line))
31
+ return True
32
+ except Exception as e:
33
+ print(f"Error loading file: {e}")
34
+ return False
35
+
36
+ def create_user_session(self, name):
37
+ """Create a new user session with UUID"""
38
+ user_id = str(uuid.uuid4())
39
+ self.current_user_id = user_id
40
+ self.current_user_name = name
41
+ self.user_sessions[user_id] = {
42
+ "id": user_id,
43
+ "name": name,
44
+ "start_time": datetime.now().isoformat(),
45
+ "responses": {},
46
+ "completed": False
47
+ }
48
+ return user_id
49
 
50
+ def record_choice(self, sample_choice):
51
+ """Record user's choice for current question"""
52
+ if self.current_user_id and 0 <= self.current_index < len(self.data):
53
+ self.user_sessions[self.current_user_id]["responses"][self.current_index] = {
54
  "question_index": self.current_index,
55
+ "choice": sample_choice,
56
  "timestamp": datetime.now().isoformat()
57
  }
58
  return True
59
  return False
60
 
61
+ def get_current_question(self):
62
+ """Get current question data"""
63
+ if 0 <= self.current_index < len(self.data):
64
+ return self.data[self.current_index]
65
+ return None
66
+
67
+ def export_results(self):
68
+ """Export results to multiple formats"""
69
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
70
 
71
+ # Create results directory if it doesn't exist
72
+ os.makedirs("results", exist_ok=True)
 
 
 
 
 
 
73
 
74
+ # 1. Export as JSONL (all user sessions)
75
+ jsonl_filename = f"results/legal_tech_evaluation_{timestamp}.jsonl"
76
+ with open(jsonl_filename, 'w') as f:
77
+ for session in self.user_sessions.values():
78
+ f.write(json.dumps(session) + '\n')
79
 
80
+ # 2. Create markdown summary
81
+ md_content = self.generate_markdown_summary()
82
+ md_filename = f"results/legal_tech_summary_{timestamp}.md"
83
+ with open(md_filename, 'w') as f:
84
+ f.write(md_content)
85
+
86
+ # 3. Create detailed JSON report
87
+ json_filename = f"results/legal_tech_detailed_{timestamp}.json"
88
+ with open(json_filename, 'w') as f:
89
+ json.dump({
90
+ "evaluation_date": datetime.now().isoformat(),
91
+ "total_questions": len(self.data),
92
+ "total_participants": len(self.user_sessions),
93
+ "user_sessions": self.user_sessions,
94
+ "questions": self.data
95
+ }, f, indent=2)
96
+
97
+ return jsonl_filename, md_filename, json_filename, md_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ def generate_markdown_summary(self):
100
+ """Generate a markdown summary of results"""
101
+ total_users = len(self.user_sessions)
102
+ total_questions = len(self.data)
103
+
104
+ # Calculate preferences
105
+ sample_counts = {"sample_zero": 0, "sample_one": 0, "sample_two": 0}
106
+ question_stats = {}
107
+
108
+ for session in self.user_sessions.values():
109
+ for q_idx, response in session["responses"].items():
110
+ choice = response["choice"]
111
+ sample_counts[choice] += 1
112
+
113
+ if q_idx not in question_stats:
114
+ question_stats[q_idx] = {"sample_zero": 0, "sample_one": 0, "sample_two": 0}
115
+ question_stats[q_idx][choice] += 1
116
+
117
+ # Generate markdown
118
+ md = f"""# Legal Tech Tool Evaluation Summary
119
+ Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
120
 
121
+ ## Overview
122
+ - **Total Participants**: {total_users}
123
+ - **Total Questions**: {total_questions}
124
+ - **Total Responses**: {sum(sample_counts.values())}
 
 
 
 
 
 
 
 
125
 
126
+ ## Overall Preferences
127
+ | Sample | Votes | Percentage |
128
+ |--------|-------|------------|
129
+ | Sample 0 | {sample_counts['sample_zero']} | {sample_counts['sample_zero'] / max(sum(sample_counts.values()), 1) * 100:.1f}% |
130
+ | Sample 1 | {sample_counts['sample_one']} | {sample_counts['sample_one'] / max(sum(sample_counts.values()), 1) * 100:.1f}% |
131
+ | Sample 2 | {sample_counts['sample_two']} | {sample_counts['sample_two'] / max(sum(sample_counts.values()), 1) * 100:.1f}% |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
+ ## Question-by-Question Breakdown
134
+ """
135
+
136
+ for idx in range(len(self.data)):
137
+ if str(idx) in question_stats:
138
+ stats = question_stats[str(idx)]
139
+ total_responses = sum(stats.values())
140
+ if total_responses > 0:
141
+ question = self.data[idx]
142
+ md += f"\n### Question {idx + 1}\n"
143
+ md += f"**Prompt**: {question.get('introductory_example', 'N/A')[:100]}...\n\n"
144
+ md += "| Sample | Votes | Percentage |\n"
145
+ md += "|--------|-------|------------|\n"
146
+ md += f"| Sample 0 | {stats['sample_zero']} | {stats['sample_zero'] / total_responses * 100:.1f}% |\n"
147
+ md += f"| Sample 1 | {stats['sample_one']} | {stats['sample_one'] / total_responses * 100:.1f}% |\n"
148
+ md += f"| Sample 2 | {stats['sample_two']} | {stats['sample_two'] / total_responses * 100:.1f}% |\n"
149
+
150
+ md += f"\n## Participants\n"
151
+ for session in self.user_sessions.values():
152
+ responses = len(session["responses"])
153
+ md += f"- **{session['name']}** (ID: {session['id'][:8]}...): {responses}/{total_questions} questions answered\n"
154
+
155
+ return md
156
 
157
+ # Global evaluator instance
158
+ evaluator = LegalTechEvaluator()
159
+
160
+ # Custom CSS for elegant interface
161
+ custom_css = """
162
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
163
+
164
+ * {
165
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
166
+ }
167
+
168
+ .gradio-container {
169
+ max-width: 1200px !important;
170
+ margin: 0 auto !important;
171
+ padding: 0 !important;
172
+ }
173
+
174
+ #main-container {
175
+ background: #ffffff;
176
+ border-radius: 16px;
177
+ box-shadow: 0 4px 24px rgba(0, 0, 0, 0.06);
178
+ padding: 48px;
179
+ margin: 24px;
180
+ }
181
+
182
+ #welcome-screen {
183
+ text-align: center;
184
+ padding: 60px 40px;
185
+ background: linear-gradient(135deg, #f5f5f5 0%, #ffffff 100%);
186
+ border-radius: 16px;
187
+ margin-bottom: 32px;
188
+ }
189
+
190
+ #welcome-screen h1 {
191
+ font-size: 32px;
192
+ font-weight: 600;
193
+ color: #1a1a1a;
194
+ margin-bottom: 16px;
195
+ }
196
+
197
+ #welcome-screen p {
198
+ font-size: 18px;
199
+ color: #666;
200
+ margin-bottom: 32px;
201
+ line-height: 1.6;
202
+ }
203
+
204
+ #name-input {
205
+ max-width: 400px;
206
+ margin: 0 auto 24px;
207
+ }
208
+
209
+ #name-input input {
210
+ font-size: 16px;
211
+ padding: 12px 20px;
212
+ border: 2px solid #e0e0e0;
213
+ border-radius: 8px;
214
+ width: 100%;
215
+ transition: all 0.3s ease;
216
+ }
217
+
218
+ #name-input input:focus {
219
+ border-color: #4a90e2;
220
+ outline: none;
221
+ }
222
+
223
+ #start-button {
224
+ background: #2563eb;
225
+ color: white;
226
+ border: none;
227
+ padding: 14px 40px;
228
+ font-size: 16px;
229
+ font-weight: 500;
230
+ border-radius: 8px;
231
+ cursor: pointer;
232
+ transition: all 0.3s ease;
233
+ }
234
+
235
+ #start-button:hover {
236
+ background: #1d4ed8;
237
+ transform: translateY(-1px);
238
+ box-shadow: 0 4px 12px rgba(37, 99, 235, 0.3);
239
+ }
240
+
241
+ #evaluation-container {
242
+ display: none;
243
+ }
244
+
245
+ #question-header {
246
+ text-align: center;
247
+ margin-bottom: 40px;
248
+ padding-bottom: 24px;
249
+ border-bottom: 1px solid #e5e7eb;
250
+ }
251
+
252
+ #question-header h2 {
253
+ font-size: 24px;
254
+ font-weight: 600;
255
+ color: #1a1a1a;
256
+ margin-bottom: 8px;
257
+ }
258
+
259
+ #progress-text {
260
+ font-size: 14px;
261
+ color: #6b7280;
262
+ font-weight: 500;
263
+ }
264
+
265
+ #example-section {
266
+ background: #f9fafb;
267
+ border-radius: 12px;
268
+ padding: 24px;
269
+ margin-bottom: 32px;
270
+ }
271
+
272
+ #example-section h3 {
273
+ font-size: 16px;
274
+ font-weight: 600;
275
+ color: #374151;
276
+ margin-bottom: 12px;
277
+ text-transform: uppercase;
278
+ letter-spacing: 0.05em;
279
+ }
280
+
281
+ #example-text {
282
+ font-size: 16px;
283
+ line-height: 1.6;
284
+ color: #1f2937;
285
+ }
286
+
287
+ #samples-container {
288
+ display: grid;
289
+ grid-template-columns: repeat(3, 1fr);
290
+ gap: 24px;
291
+ margin-bottom: 40px;
292
+ }
293
+
294
+ .sample-card {
295
+ background: white;
296
+ border: 2px solid #e5e7eb;
297
+ border-radius: 12px;
298
+ padding: 24px;
299
+ transition: all 0.3s ease;
300
+ position: relative;
301
+ }
302
+
303
+ .sample-card.selected {
304
+ border-color: #2563eb;
305
+ box-shadow: 0 0 0 4px rgba(37, 99, 235, 0.1);
306
+ }
307
+
308
+ .sample-card h4 {
309
+ font-size: 14px;
310
+ font-weight: 600;
311
+ color: #6b7280;
312
+ margin-bottom: 16px;
313
+ text-transform: uppercase;
314
+ letter-spacing: 0.05em;
315
+ }
316
+
317
+ .sample-text {
318
+ font-size: 15px;
319
+ line-height: 1.6;
320
+ color: #374151;
321
+ margin-bottom: 20px;
322
+ min-height: 120px;
323
+ }
324
+
325
+ .sample-button {
326
+ width: 100%;
327
+ padding: 12px 24px;
328
+ background: #f3f4f6;
329
+ border: 2px solid transparent;
330
+ border-radius: 8px;
331
+ font-size: 14px;
332
+ font-weight: 500;
333
+ color: #374151;
334
+ cursor: pointer;
335
+ transition: all 0.3s ease;
336
+ }
337
+
338
+ .sample-button:hover {
339
+ background: #e5e7eb;
340
+ }
341
+
342
+ .sample-card.selected .sample-button {
343
+ background: #2563eb;
344
+ color: white;
345
+ border-color: #2563eb;
346
+ }
347
+
348
+ #action-buttons {
349
+ display: flex;
350
+ justify-content: center;
351
+ gap: 16px;
352
+ margin-top: 40px;
353
+ padding-top: 32px;
354
+ border-top: 1px solid #e5e7eb;
355
+ }
356
+
357
+ .nav-button {
358
+ padding: 12px 32px;
359
+ background: #f3f4f6;
360
+ border: none;
361
+ border-radius: 8px;
362
+ font-size: 14px;
363
+ font-weight: 500;
364
+ color: #374151;
365
+ cursor: pointer;
366
+ transition: all 0.3s ease;
367
+ }
368
+
369
+ .nav-button:hover:not(:disabled) {
370
+ background: #e5e7eb;
371
+ }
372
+
373
+ .nav-button:disabled {
374
+ opacity: 0.5;
375
+ cursor: not-allowed;
376
+ }
377
+
378
+ #confirm-button {
379
+ background: #10b981;
380
+ color: white;
381
+ }
382
+
383
+ #confirm-button:hover:not(:disabled) {
384
+ background: #059669;
385
+ }
386
+
387
+ #export-section {
388
+ text-align: center;
389
+ margin-top: 48px;
390
+ padding-top: 48px;
391
+ border-top: 2px solid #e5e7eb;
392
+ }
393
+
394
+ #export-button {
395
+ background: #6366f1;
396
+ color: white;
397
+ padding: 14px 40px;
398
+ border: none;
399
+ border-radius: 8px;
400
+ font-size: 16px;
401
+ font-weight: 500;
402
+ cursor: pointer;
403
+ transition: all 0.3s ease;
404
+ }
405
+
406
+ #export-button:hover {
407
+ background: #4f46e5;
408
+ transform: translateY(-1px);
409
+ box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3);
410
+ }
411
+
412
+ #file-upload-section {
413
+ text-align: center;
414
+ padding: 40px;
415
+ background: #f9fafb;
416
+ border-radius: 12px;
417
+ margin-top: 32px;
418
+ }
419
+
420
+ #file-upload-section h3 {
421
+ font-size: 18px;
422
+ font-weight: 600;
423
+ color: #374151;
424
+ margin-bottom: 16px;
425
+ }
426
+
427
+ .hide {
428
+ display: none !important;
429
+ }
430
+
431
+ .success-message {
432
+ background: #d1fae5;
433
+ color: #065f46;
434
+ padding: 12px 20px;
435
+ border-radius: 8px;
436
+ margin: 16px 0;
437
+ text-align: center;
438
+ font-weight: 500;
439
+ }
440
+
441
+ .user-info {
442
+ position: absolute;
443
+ top: 20px;
444
+ right: 20px;
445
+ background: #f3f4f6;
446
+ padding: 8px 16px;
447
+ border-radius: 8px;
448
+ font-size: 14px;
449
+ color: #6b7280;
450
+ }
451
+ """
452
+
453
+ # HTML Template
454
+ html_template = """
455
+ <div id="main-container">
456
+ <div id="welcome-screen">
457
+ <h1>Legal Tech Tool Evaluation</h1>
458
+ <p>Compare and evaluate outputs from different legal technology tools to help us understand user preferences.</p>
459
+ <div id="name-input">
460
+ <input type="text" id="user-name" placeholder="Enter your name" />
461
+ </div>
462
+ <button id="start-button" onclick="startEvaluation()">Start Evaluation</button>
463
+ </div>
464
+
465
+ <div id="evaluation-container" class="hide">
466
+ <div class="user-info" id="user-info"></div>
467
 
468
+ <div id="question-header">
469
+ <h2>Question <span id="current-question">1</span> of <span id="total-questions">0</span></h2>
470
+ <p id="progress-text">Please select your preferred response</p>
471
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
+ <div id="example-section">
474
+ <h3>Prompt</h3>
475
+ <p id="example-text"></p>
476
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
+ <div id="samples-container">
479
+ <div class="sample-card" id="sample-0">
480
+ <h4>Sample A</h4>
481
+ <div class="sample-text" id="sample-0-text"></div>
482
+ <button class="sample-button" onclick="selectSample(0)">Select Sample A</button>
483
+ </div>
484
+
485
+ <div class="sample-card" id="sample-1">
486
+ <h4>Sample B</h4>
487
+ <div class="sample-text" id="sample-1-text"></div>
488
+ <button class="sample-button" onclick="selectSample(1)">Select Sample B</button>
489
+ </div>
490
+
491
+ <div class="sample-card" id="sample-2">
492
+ <h4>Sample C</h4>
493
+ <div class="sample-text" id="sample-2-text"></div>
494
+ <button class="sample-button" onclick="selectSample(2)">Select Sample C</button>
495
+ </div>
496
+ </div>
497
 
498
+ <div id="action-buttons">
499
+ <button class="nav-button" id="prev-button" onclick="previousQuestion()">← Previous</button>
500
+ <button class="nav-button" id="confirm-button" onclick="confirmSelection()">Confirm Selection</button>
501
+ <button class="nav-button" id="next-button" onclick="nextQuestion()">Next →</button>
502
+ </div>
503
+
504
+ <div id="export-section">
505
+ <button id="export-button" onclick="exportResults()">Export Results</button>
506
+ <div id="export-status"></div>
507
+ </div>
508
+ </div>
509
+
510
+ <div id="file-upload-section" class="hide">
511
+ <h3>No data file found</h3>
512
+ <p>Please upload a JSONL file to begin the evaluation</p>
513
+ <input type="file" id="file-input" accept=".jsonl" onchange="uploadFile()" />
514
+ </div>
515
+ </div>
516
+
517
+ <script>
518
+ let currentSelection = null;
519
+ let evaluationStarted = false;
520
+
521
+ function startEvaluation() {
522
+ const name = document.getElementById('user-name').value.trim();
523
+ if (!name) {
524
+ alert('Please enter your name');
525
+ return;
526
+ }
527
+
528
+ // Call Gradio function to start session
529
+ window.gradioApp().querySelector('#start-session-btn').click();
530
+ }
531
+
532
+ function selectSample(sampleIndex) {
533
+ // Clear previous selections
534
+ document.querySelectorAll('.sample-card').forEach(card => {
535
+ card.classList.remove('selected');
536
+ });
537
+
538
+ // Add selection to clicked sample
539
+ document.getElementById(`sample-${sampleIndex}`).classList.add('selected');
540
+ currentSelection = sampleIndex;
541
+
542
+ // Enable confirm button
543
+ document.getElementById('confirm-button').disabled = false;
544
+ }
545
+
546
+ function updateDisplay(data) {
547
+ if (!data) return;
548
+
549
+ // Parse the data
550
+ const questionData = JSON.parse(data);
551
+
552
+ if (questionData.showWelcome) {
553
+ document.getElementById('welcome-screen').classList.remove('hide');
554
+ document.getElementById('evaluation-container').classList.add('hide');
555
+ document.getElementById('file-upload-section').classList.add('hide');
556
+ } else if (questionData.showFileUpload) {
557
+ document.getElementById('welcome-screen').classList.add('hide');
558
+ document.getElementById('evaluation-container').classList.add('hide');
559
+ document.getElementById('file-upload-section').classList.remove('hide');
560
+ } else {
561
+ document.getElementById('welcome-screen').classList.add('hide');
562
+ document.getElementById('evaluation-container').classList.remove('hide');
563
+ document.getElementById('file-upload-section').classList.add('hide');
564
 
565
+ // Update user info
566
+ if (questionData.userName) {
567
+ document.getElementById('user-info').textContent = `Evaluator: ${questionData.userName}`;
568
+ }
 
 
 
 
 
 
 
569
 
570
+ // Update question info
571
+ document.getElementById('current-question').textContent = questionData.currentIndex + 1;
572
+ document.getElementById('total-questions').textContent = questionData.totalQuestions;
573
+
574
+ // Update example and samples
575
+ document.getElementById('example-text').textContent = questionData.introductoryExample;
576
+ document.getElementById('sample-0-text').textContent = questionData.sampleZero;
577
+ document.getElementById('sample-1-text').textContent = questionData.sampleOne;
578
+ document.getElementById('sample-2-text').textContent = questionData.sampleTwo;
579
+
580
+ // Update button states
581
+ document.getElementById('prev-button').disabled = questionData.currentIndex === 0;
582
+ document.getElementById('next-button').disabled = questionData.currentIndex >= questionData.totalQuestions - 1;
583
+ document.getElementById('confirm-button').disabled = true;
584
+
585
+ // Clear selection
586
+ document.querySelectorAll('.sample-card').forEach(card => {
587
+ card.classList.remove('selected');
588
+ });
589
+ currentSelection = null;
590
+ }
591
+ }
592
+
593
+ function confirmSelection() {
594
+ if (currentSelection === null) return;
595
+
596
+ const sampleMap = ['sample_zero', 'sample_one', 'sample_two'];
597
+ // Trigger Gradio function with selection
598
+ window.gradioApp().querySelector('#hidden-selection').value = sampleMap[currentSelection];
599
+ window.gradioApp().querySelector('#confirm-btn').click();
600
+ }
601
+
602
+ function previousQuestion() {
603
+ window.gradioApp().querySelector('#prev-btn').click();
604
+ }
605
+
606
+ function nextQuestion() {
607
+ window.gradioApp().querySelector('#next-btn').click();
608
+ }
609
+
610
+ function exportResults() {
611
+ window.gradioApp().querySelector('#export-btn').click();
612
+ }
613
+
614
+ function uploadFile() {
615
+ const fileInput = document.getElementById('file-input');
616
+ if (fileInput.files.length > 0) {
617
+ // Trigger Gradio file upload
618
+ window.gradioApp().querySelector('#file-upload-btn').click();
619
+ }
620
+ }
621
+
622
+ // Listen for Gradio updates
623
+ setInterval(() => {
624
+ const stateElement = window.gradioApp().querySelector('#current-state');
625
+ if (stateElement && stateElement.value) {
626
+ updateDisplay(stateElement.value);
627
+ }
628
+ }, 100);
629
+ </script>
630
+ """
631
+
632
+ def start_session(name):
633
+ """Start a new evaluation session"""
634
+ if not name:
635
+ return json.dumps({"showWelcome": True})
636
 
637
+ if not evaluator.data:
638
+ return json.dumps({"showFileUpload": True})
639
 
640
+ evaluator.create_user_session(name)
641
+ return get_current_state()
642
+
643
+ def get_current_state():
644
+ """Get current state as JSON"""
645
+ question = evaluator.get_current_question()
646
 
647
+ if not question:
648
+ return json.dumps({"showWelcome": True})
649
+
650
+ return json.dumps({
651
+ "showWelcome": False,
652
+ "showFileUpload": False,
653
+ "userName": evaluator.current_user_name,
654
+ "currentIndex": evaluator.current_index,
655
+ "totalQuestions": len(evaluator.data),
656
+ "introductoryExample": question.get("introductory_example", ""),
657
+ "sampleZero": question.get("sample_zero", ""),
658
+ "sampleOne": question.get("sample_one", ""),
659
+ "sampleTwo": question.get("sample_two", "")
660
+ })
661
+
662
+ def confirm_selection(selection):
663
+ """Confirm user's selection"""
664
+ if selection and evaluator.record_choice(selection):
665
+ # Auto-advance to next question
666
+ if evaluator.current_index < len(evaluator.data) - 1:
667
+ evaluator.current_index += 1
668
+ return get_current_state()
669
+
670
+ def previous_question():
671
+ """Navigate to previous question"""
672
+ if evaluator.current_index > 0:
673
+ evaluator.current_index -= 1
674
+ return get_current_state()
675
+
676
+ def next_question():
677
+ """Navigate to next question"""
678
+ if evaluator.current_index < len(evaluator.data) - 1:
679
+ evaluator.current_index += 1
680
+ return get_current_state()
681
+
682
+ def export_results_handler():
683
+ """Export results and return status"""
684
+ try:
685
+ jsonl_file, md_file, json_file, md_content = evaluator.export_results()
686
+
687
+ # Read files for download
688
+ with open(jsonl_file, 'r') as f:
689
+ jsonl_content = f.read()
690
+ with open(md_file, 'r') as f:
691
+ md_download = f.read()
692
+
693
+ return (
694
+ get_current_state(),
695
+ gr.update(value="✅ Results exported successfully!"),
696
+ gr.File.update(value=jsonl_file, visible=True),
697
+ gr.File.update(value=md_file, visible=True),
698
+ md_content
699
+ )
700
+ except Exception as e:
701
+ return (
702
+ get_current_state(),
703
+ gr.update(value=f"❌ Export failed: {str(e)}"),
704
+ gr.File.update(visible=False),
705
+ gr.File.update(visible=False),
706
+ ""
707
+ )
708
+
709
+ def load_file(file):
710
+ """Load data from uploaded file"""
711
+ if file:
712
+ evaluator.load_from_file(file.name)
713
+ return get_current_state()
714
+
715
+ # Create Gradio interface
716
+ with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo:
717
+ # Hidden components for JavaScript interaction
718
+ with gr.Column(visible=False):
719
+ name_input = gr.Textbox(elem_id="hidden-name")
720
+ start_btn = gr.Button("Start", elem_id="start-session-btn")
721
+ current_state = gr.Textbox(elem_id="current-state")
722
+ selection_input = gr.Textbox(elem_id="hidden-selection")
723
+ confirm_btn = gr.Button("Confirm", elem_id="confirm-btn")
724
+ prev_btn = gr.Button("Previous", elem_id="prev-btn")
725
+ next_btn = gr.Button("Next", elem_id="next-btn")
726
+ export_btn = gr.Button("Export", elem_id="export-btn")
727
+ file_upload = gr.File(elem_id="file-upload-input")
728
+ file_upload_btn = gr.Button("Upload", elem_id="file-upload-btn")
729
+
730
+ # Output components (hidden)
731
+ with gr.Column(visible=False):
732
+ export_status = gr.Textbox()
733
+ download_jsonl = gr.File(label="Download JSONL")
734
+ download_md = gr.File(label="Download Summary")
735
+ summary_display = gr.Markdown()
736
+
737
+ # Main HTML interface
738
+ gr.HTML(html_template)
739
 
740
  # Event handlers
741
+ start_btn.click(
742
+ fn=lambda: start_session(demo.name_input.value),
743
+ inputs=[name_input],
744
+ outputs=[current_state]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745
  )
746
 
747
  confirm_btn.click(
748
+ fn=confirm_selection,
749
+ inputs=[selection_input],
750
+ outputs=[current_state]
 
 
 
 
751
  )
752
 
753
  prev_btn.click(
754
+ fn=previous_question,
755
+ outputs=[current_state]
 
 
 
 
756
  )
757
 
758
  next_btn.click(
759
+ fn=next_question,
760
+ outputs=[current_state]
 
 
 
 
761
  )
762
 
763
  export_btn.click(
764
+ fn=export_results_handler,
765
+ outputs=[current_state, export_status, download_jsonl, download_md, summary_display]
766
+ )
767
+
768
+ file_upload_btn.click(
769
+ fn=load_file,
770
+ inputs=[file_upload],
771
+ outputs=[current_state]
772
+ )
773
+
774
+ # Initial state
775
+ demo.load(
776
+ fn=lambda: json.dumps({"showWelcome": True}) if not evaluator.data else get_current_state(),
777
+ outputs=[current_state]
778
  )
779
 
780
+ # Create sample data if no file exists
781
+ if not os.path.exists("test_legal_tech.jsonl"):
 
782
  sample_data = [
783
  {
784
+ "introductory_example": "Draft a confidentiality clause for a software development agreement",
785
+ "sample_zero": "The Receiving Party agrees to maintain the confidentiality of all Confidential Information received from the Disclosing Party and shall not disclose such information to any third party without prior written consent. This obligation shall survive termination of this Agreement for a period of five (5) years.",
786
+ "sample_one": "All proprietary information, trade secrets, and confidential data disclosed by either party shall be kept strictly confidential. The receiving party must implement reasonable security measures and limit access to authorized personnel only. Breach of this clause may result in immediate termination and legal action.",
787
+ "sample_two": "Confidential Information shall mean any non-public information disclosed by one party to the other, whether orally, in writing, or electronically. Both parties agree to protect such information using the same degree of care used for their own confidential information, but no less than reasonable care."
788
  },
789
  {
790
+ "introductory_example": "Create an indemnification provision for a service agreement",
791
+ "sample_zero": "The Service Provider shall indemnify, defend, and hold harmless the Client from any claims, damages, or losses arising from the Service Provider's negligence, willful misconduct, or breach of this Agreement, except to the extent caused by the Client's own negligence.",
792
+ "sample_one": "Each party agrees to indemnify the other against third-party claims arising from their respective breaches of this Agreement or negligent acts. This indemnification includes reasonable attorneys' fees and costs, subject to prompt notice and cooperation in defense.",
793
+ "sample_two": "Provider shall defend, indemnify, and hold Client harmless from all liabilities, costs, and expenses (including reasonable legal fees) resulting from Provider's performance under this Agreement, provided Client gives prompt notice of any claim and allows Provider to control the defense."
794
  },
795
  {
796
+ "introductory_example": "Write a limitation of liability clause for a technology services contract",
797
+ "sample_zero": "Neither party shall be liable for any indirect, incidental, special, consequential, or punitive damages, regardless of the cause of action. Total liability under this Agreement shall not exceed the fees paid in the twelve months preceding the claim.",
798
+ "sample_one": "IN NO EVENT SHALL EITHER PARTY BE LIABLE FOR LOST PROFITS, LOST DATA, OR CONSEQUENTIAL DAMAGES. THE MAXIMUM LIABILITY OF EITHER PARTY SHALL BE LIMITED TO THE TOTAL AMOUNT PAID UNDER THIS AGREEMENT IN THE SIX (6) MONTHS PRIOR TO THE EVENT GIVING RISE TO LIABILITY.",
799
+ "sample_two": "Except for breaches of confidentiality, indemnification obligations, or willful misconduct, neither party's liability shall exceed the greater of (a) $100,000 or (b) the fees paid in the prior 12 months. This limitation applies to all claims in aggregate."
800
  }
801
  ]
802
 
803
+ with open("test_legal_tech.jsonl", "w") as f:
 
804
  for item in sample_data:
805
  f.write(json.dumps(item) + "\n")
806
+
807
+ if __name__ == "__main__":
 
808
  demo.launch(share=False)