import gradio as gr import json import uuid from datetime import datetime import os from loguru import logger class LegalTechEvaluator: def __init__(self): self.data = [] self.current_index = 0 self.user_sessions = {} self.current_user_id = None self.current_user_name = None self.auto_load_data() def auto_load_data(self): """Automatically load test_legal_tech.jsonl if it exists""" if os.path.exists("test_legal_tech.jsonl"): self.load_from_file("test_legal_tech.jsonl") logger.info("found") return True logger.info("not found") return False def load_from_file(self, filename): """Load data from JSONL file""" self.data = [] try: with open("test_legal_tech.jsonl", 'r') as f: for line in f: self.data.append(json.loads(line)) logger.info("loaded") return True except Exception as e: print(f"Error loading file: {e}") logger.info("failed to load") return False def create_user_session(self, name): """Create a new user session with UUID""" user_id = str(uuid.uuid4()) self.current_user_id = user_id self.current_user_name = name self.user_sessions[user_id] = { "id": user_id, "name": name, "start_time": datetime.now().isoformat(), "responses": {}, "completed": False } self.current_index = 0 logger.info("new session created") return user_id def record_choice(self, sample_choice): """Record user's choice for current question""" if self.current_user_id and 0 <= self.current_index < len(self.data): self.user_sessions[self.current_user_id]["responses"][self.current_index] = { "question_index": self.current_index, "choice": sample_choice, "timestamp": datetime.now().isoformat() } return True logger.info("choice recorded") return False def get_current_question(self): """Get current question data""" if 0 <= self.current_index < len(self.data): return self.data[self.current_index] return None def export_results(self): """Export results to multiple formats""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Create results directory if it doesn't exist os.makedirs("results", exist_ok=True) # 1. Export as JSONL (all user sessions) jsonl_filename = f"results/legal_tech_evaluation_{timestamp}.jsonl" with open(jsonl_filename, 'w') as f: for session in self.user_sessions.values(): f.write(json.dumps(session) + '\n') # 2. Create markdown summary md_content = self.generate_markdown_summary() md_filename = f"results/legal_tech_summary_{timestamp}.md" with open(md_filename, 'w') as f: f.write(md_content) # 3. Create detailed JSON report json_filename = f"results/legal_tech_detailed_{timestamp}.json" with open(json_filename, 'w') as f: json.dump({ "evaluation_date": datetime.now().isoformat(), "total_questions": len(self.data), "total_participants": len(self.user_sessions), "user_sessions": self.user_sessions, "questions": self.data }, f, indent=2) return jsonl_filename, md_filename, json_filename, md_content def generate_markdown_summary(self): """Generate a markdown summary of results""" total_users = len(self.user_sessions) total_questions = len(self.data) # Calculate preferences sample_counts = {"sample_zero": 0, "sample_one": 0, "sample_two": 0} question_stats = {} for session in self.user_sessions.values(): for q_idx, response in session["responses"].items(): choice = response["choice"] sample_counts[choice] += 1 if q_idx not in question_stats: question_stats[q_idx] = {"sample_zero": 0, "sample_one": 0, "sample_two": 0} question_stats[q_idx][choice] += 1 # Generate markdown md = f"""# Legal Tech Tool Evaluation Summary Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} ## Overview - **Total Participants**: {total_users} - **Total Questions**: {total_questions} - **Total Responses**: {sum(sample_counts.values())} ## Overall Preferences | Sample | Votes | Percentage | |--------|-------|------------| | Sample 0 | {sample_counts['sample_zero']} | {sample_counts['sample_zero'] / max(sum(sample_counts.values()), 1) * 100:.1f}% | | Sample 1 | {sample_counts['sample_one']} | {sample_counts['sample_one'] / max(sum(sample_counts.values()), 1) * 100:.1f}% | | Sample 2 | {sample_counts['sample_two']} | {sample_counts['sample_two'] / max(sum(sample_counts.values()), 1) * 100:.1f}% | ## Question-by-Question Breakdown """ for idx in range(len(self.data)): if idx in question_stats: stats = question_stats[idx] total_responses = sum(stats.values()) if total_responses > 0: question = self.data[idx] md += f"\n### Question {idx + 1}\n" md += f"**Prompt**: {question.get('introductory_example', 'N/A')[:100]}...\n\n" md += "| Sample | Votes | Percentage |\n" md += "|--------|-------|------------|\n" md += f"| Sample 0 | {stats['sample_zero']} | {stats['sample_zero'] / total_responses * 100:.1f}% |\n" md += f"| Sample 1 | {stats['sample_one']} | {stats['sample_one'] / total_responses * 100:.1f}% |\n" md += f"| Sample 2 | {stats['sample_two']} | {stats['sample_two'] / total_responses * 100:.1f}% |\n" md += f"\n## Participants\n" for session in self.user_sessions.values(): responses = len(session["responses"]) md += f"- **{session['name']}** (ID: {session['id'][:8]}...): {responses}/{total_questions} questions answered\n" return md # Global evaluator instance evaluator = LegalTechEvaluator() # Custom CSS for elegant interface custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); * { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; } .gradio-container { max-width: 1200px !important; margin: 0 auto !important; padding: 0 !important; background: transparent !important; } /* Fix for input text visibility */ input[type="text"] { color: #1f2937 !important; background-color: white !important; } #main-container { background: #ffffff; border-radius: 16px; box-shadow: 0 4px 24px rgba(0, 0, 0, 0.06); padding: 48px; margin: 24px; } #welcome-screen { text-align: center; padding: 60px 40px; background: linear-gradient(135deg, #f5f5f5 0%, #ffffff 100%); border-radius: 16px; margin-bottom: 32px; } #welcome-screen h1 { font-size: 32px; font-weight: 600; color: #1a1a1a; margin-bottom: 16px; } #welcome-screen p { font-size: 18px; color: #666; margin-bottom: 32px; line-height: 1.6; } #name-input-container { max-width: 400px; margin: 0 auto 24px; } #name-input-container input { font-size: 16px; padding: 12px 20px; border: 2px solid #e0e0e0; border-radius: 8px; width: 100%; transition: all 0.3s ease; color: #1f2937 !important; background-color: white !important; } #name-input-container input:focus { border-color: #4a90e2; outline: none; } .start-button { background: #2563eb; color: white; border: none; padding: 14px 40px; font-size: 16px; font-weight: 500; border-radius: 8px; cursor: pointer; transition: all 0.3s ease; } .start-button:hover { background: #1d4ed8; transform: translateY(-1px); box-shadow: 0 4px 12px rgba(37, 99, 235, 0.3); } #evaluation-container { display: none; } #question-header { text-align: center; margin-bottom: 40px; padding-bottom: 24px; border-bottom: 1px solid #e5e7eb; } #question-header h2 { font-size: 24px; font-weight: 600; color: #1a1a1a; margin-bottom: 8px; } #progress-text { font-size: 14px; color: #6b7280; font-weight: 500; } #example-section { background: #f9fafb; border-radius: 12px; padding: 24px; margin-bottom: 32px; } #example-section h3 { font-size: 16px; font-weight: 600; color: #374151; margin-bottom: 12px; text-transform: uppercase; letter-spacing: 0.05em; } #example-text { font-size: 16px; line-height: 1.6; color: #1f2937; } #samples-container { display: grid; grid-template-columns: repeat(3, 1fr); gap: 24px; margin-bottom: 40px; } .sample-card { background: white; border: 2px solid #e5e7eb; border-radius: 12px; padding: 24px; transition: all 0.3s ease; position: relative; } .sample-card.selected { border-color: #2563eb; box-shadow: 0 0 0 4px rgba(37, 99, 235, 0.1); } .sample-card h4 { font-size: 14px; font-weight: 600; color: #6b7280; margin-bottom: 16px; text-transform: uppercase; letter-spacing: 0.05em; } .sample-text { font-size: 15px; line-height: 1.6; color: #374151; margin-bottom: 20px; min-height: 120px; } .sample-button { width: 100%; padding: 12px 24px; background: #f3f4f6; border: 2px solid transparent; border-radius: 8px; font-size: 14px; font-weight: 500; color: #374151; cursor: pointer; transition: all 0.3s ease; } .sample-button:hover { background: #e5e7eb; } .sample-card.selected .sample-button { background: #2563eb; color: white; border-color: #2563eb; } #action-buttons { display: flex; justify-content: center; gap: 16px; margin-top: 40px; padding-top: 32px; border-top: 1px solid #e5e7eb; } .nav-button { padding: 12px 32px; background: #f3f4f6; border: none; border-radius: 8px; font-size: 14px; font-weight: 500; color: #374151; cursor: pointer; transition: all 0.3s ease; } .nav-button:hover:not(:disabled) { background: #e5e7eb; } .nav-button:disabled { opacity: 0.5; cursor: not-allowed; } #confirm-button { background: #10b981; color: white; } #confirm-button:hover:not(:disabled) { background: #059669; } #export-section { text-align: center; margin-top: 48px; padding-top: 48px; border-top: 2px solid #e5e7eb; } .export-button { background: #6366f1; color: white; padding: 14px 40px; border: none; border-radius: 8px; font-size: 16px; font-weight: 500; cursor: pointer; transition: all 0.3s ease; } .export-button:hover { background: #4f46e5; transform: translateY(-1px); box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3); } #file-upload-section { text-align: center; padding: 40px; background: #f9fafb; border-radius: 12px; margin-top: 32px; } #file-upload-section h3 { font-size: 18px; font-weight: 600; color: #374151; margin-bottom: 16px; } .hide { display: none !important; } .show { display: block !important; } .success-message { background: #d1fae5; color: #065f46; padding: 12px 20px; border-radius: 8px; margin: 16px 0; text-align: center; font-weight: 500; } .user-info { position: absolute; top: 20px; right: 20px; background: #f3f4f6; padding: 8px 16px; border-radius: 8px; font-size: 14px; color: #6b7280; } """ # JavaScript code to handle interactions js_code = """ function startEvaluation() { const nameInput = document.querySelector('textarea[data-testid="textbox"]'); if (nameInput && nameInput.value.trim()) { const hiddenNameInput = document.querySelector('#hidden-name textarea'); if (hiddenNameInput) { hiddenNameInput.value = nameInput.value.trim(); hiddenNameInput.dispatchEvent(new Event('input', { bubbles: true })); setTimeout(() => { const startBtn = document.querySelector('#start-session-btn button'); if (startBtn) { startBtn.click(); } }, 100); } } else { alert('Please enter your name'); } } function selectSample(sampleIndex) { // Update hidden selection input const selectionInput = document.querySelector('#hidden-selection textarea'); if (selectionInput) { const sampleMap = ['sample_zero', 'sample_one', 'sample_two']; selectionInput.value = sampleMap[sampleIndex]; selectionInput.dispatchEvent(new Event('input', { bubbles: true })); // Update visual selection document.querySelectorAll('.sample-card').forEach(card => { card.classList.remove('selected'); }); document.getElementById(`sample-${sampleIndex}`).classList.add('selected'); // Enable confirm button const confirmBtn = document.getElementById('confirm-button'); if (confirmBtn) { confirmBtn.disabled = false; } } } function confirmSelection() { const confirmBtn = document.querySelector('#confirm-btn button'); if (confirmBtn) { confirmBtn.click(); } } function previousQuestion() { const prevBtn = document.querySelector('#prev-btn button'); if (prevBtn) { prevBtn.click(); } } function nextQuestion() { const nextBtn = document.querySelector('#next-btn button'); if (nextBtn) { nextBtn.click(); } } function exportResults() { const exportBtn = document.querySelector('#export-btn button'); if (exportBtn) { exportBtn.click(); } } // Update display based on state changes function updateDisplay() { const stateElement = document.querySelector('#current-state textarea'); if (stateElement && stateElement.value) { try { const data = JSON.parse(stateElement.value); const welcomeScreen = document.getElementById('welcome-screen'); const evaluationContainer = document.getElementById('evaluation-container'); const fileUploadSection = document.getElementById('file-upload-section'); if (data.showWelcome) { welcomeScreen.style.display = 'block'; evaluationContainer.style.display = 'none'; fileUploadSection.style.display = 'none'; } else if (data.showFileUpload) { welcomeScreen.style.display = 'none'; evaluationContainer.style.display = 'none'; fileUploadSection.style.display = 'block'; } else { welcomeScreen.style.display = 'none'; evaluationContainer.style.display = 'block'; fileUploadSection.style.display = 'none'; // Update content if (data.userName) { const userInfo = document.getElementById('user-info'); if (userInfo) userInfo.textContent = `Evaluator: ${data.userName}`; } document.getElementById('current-question').textContent = data.currentIndex + 1; document.getElementById('total-questions').textContent = data.totalQuestions; document.getElementById('example-text').textContent = data.introductoryExample; document.getElementById('sample-0-text').textContent = data.sampleZero; document.getElementById('sample-1-text').textContent = data.sampleOne; document.getElementById('sample-2-text').textContent = data.sampleTwo; // Update button states document.getElementById('prev-button').disabled = data.currentIndex === 0; document.getElementById('next-button').disabled = data.currentIndex >= data.totalQuestions - 1; document.getElementById('confirm-button').disabled = true; // Clear selection document.querySelectorAll('.sample-card').forEach(card => { card.classList.remove('selected'); }); } } catch (e) { console.error('Error parsing state:', e); } } } // Set up observer for state changes const observer = new MutationObserver(() => { updateDisplay(); }); // Start observing when DOM is ready document.addEventListener('DOMContentLoaded', () => { const stateElement = document.querySelector('#current-state textarea'); if (stateElement) { observer.observe(stateElement, { attributes: true, childList: true, characterData: true, subtree: true }); updateDisplay(); } }); """ # HTML Template with onclick handlers html_template = """

Legal Tech Tool Evaluation

Compare and evaluate outputs from different legal technology tools to help us understand user preferences.

Question 1 of 0

Please select your preferred response

Prompt

Sample A

Sample B

Sample C

No data file found

Please upload a JSONL file to begin the evaluation

""" def start_session(name): """Start a new evaluation session""" if not name: return json.dumps({"showWelcome": True}) if not evaluator.data: return json.dumps({"showFileUpload": True}) evaluator.create_user_session(name) return get_current_state() def get_current_state(): """Get current state as JSON""" question = evaluator.get_current_question() if not evaluator.current_user_name: return json.dumps({"showWelcome": True}) if not question: return json.dumps({"showFileUpload": True}) return json.dumps({ "showWelcome": False, "showFileUpload": False, "userName": evaluator.current_user_name, "currentIndex": evaluator.current_index, "totalQuestions": len(evaluator.data), "introductoryExample": question.get("introductory_example", ""), "sampleZero": question.get("sample_zero", ""), "sampleOne": question.get("sample_one", ""), "sampleTwo": question.get("sample_two", "") }) def confirm_selection(selection): """Confirm user's selection""" if selection and evaluator.record_choice(selection): # Auto-advance to next question if evaluator.current_index < len(evaluator.data) - 1: evaluator.current_index += 1 return get_current_state() def previous_question(): """Navigate to previous question""" if evaluator.current_index > 0: evaluator.current_index -= 1 return get_current_state() def next_question(): """Navigate to next question""" if evaluator.current_index < len(evaluator.data) - 1: evaluator.current_index += 1 return get_current_state() def export_results_handler(): """Export results and return status""" try: jsonl_file, md_file, json_file, md_content = evaluator.export_results() return ( get_current_state(), "✅ Results exported successfully!", jsonl_file, md_file, md_content ) except Exception as e: return ( get_current_state(), f"❌ Export failed: {str(e)}", None, None, "" ) def load_file(file): """Load data from uploaded file""" if file: evaluator.load_from_file(file.name) return get_current_state() # Create Gradio interface with gr.Blocks(css=custom_css, js=js_code, theme=gr.themes.Base()) as demo: gr.HTML(html_template) # Visible name input in welcome screen with gr.Column(elem_id="name-input-wrapper", visible=True): user_name_input = gr.Textbox( label="", placeholder="Enter your name", elem_id="user-name-field" ) start_button = gr.Button("Start Evaluation", elem_classes=["start-button"]) # Hidden components for JavaScript interaction with gr.Column(visible=False): hidden_name = gr.Textbox(elem_id="hidden-name") start_btn = gr.Button("Start", elem_id="start-session-btn") current_state = gr.Textbox(value=json.dumps({"showWelcome": True}), elem_id="current-state") selection_input = gr.Textbox(elem_id="hidden-selection") confirm_btn = gr.Button("Confirm", elem_id="confirm-btn") prev_btn = gr.Button("Previous", elem_id="prev-btn") next_btn = gr.Button("Next", elem_id="next-btn") export_btn = gr.Button("Export", elem_id="export-btn") file_upload = gr.File(elem_id="file-upload-input", file_types=[".jsonl"]) # Output components (hidden) with gr.Column(visible=False): export_status = gr.Textbox() download_jsonl = gr.File(label="Download JSONL") download_md = gr.File(label="Download Summary") summary_display = gr.Markdown() # Event handlers start_button.click( fn=start_session, inputs=[user_name_input], outputs=[current_state] ) start_btn.click( fn=start_session, inputs=[hidden_name], outputs=[current_state] ) confirm_btn.click( fn=confirm_selection, inputs=[selection_input], outputs=[current_state] ) prev_btn.click( fn=previous_question, outputs=[current_state] ) next_btn.click( fn=next_question, outputs=[current_state] ) export_btn.click( fn=export_results_handler, outputs=[current_state, export_status, download_jsonl, download_md, summary_display] ) file_upload.change( fn=load_file, inputs=[file_upload], outputs=[current_state] ) # Move name input to welcome screen on load demo.load( fn=None, js=""" () => { // Move the name input to the welcome screen const nameWrapper = document.querySelector('#name-input-wrapper'); const nameContainer = document.querySelector('#name-input-container'); if (nameWrapper && nameContainer) { const nameField = nameWrapper.querySelector('.wrap'); if (nameField) { nameContainer.appendChild(nameField); } nameWrapper.style.display = 'none'; } // Style the start button const startBtn = document.querySelector('#name-input-wrapper').nextElementSibling; if (startBtn) { const btn = startBtn.querySelector('button'); if (btn) { btn.className = 'start-button'; btn.onclick = () => startEvaluation(); } document.querySelector('#welcome-screen').appendChild(btn); startBtn.style.display = 'none'; } } """ ) # Create sample data if no file exists if not os.path.exists("test_legal_tech.jsonl"): sample_data = [ { "introductory_example": "Draft a confidentiality clause for a software development agreement", "sample_zero": "The Receiving Party agrees to maintain the confidentiality of all Confidential Information received from the Disclosing Party and shall not disclose such information to any third party without prior written consent. This obligation shall survive termination of this Agreement for a period of five (5) years.", "sample_one": "All proprietary information, trade secrets, and confidential data disclosed by either party shall be kept strictly confidential. The receiving party must implement reasonable security measures and limit access to authorized personnel only. Breach of this clause may result in immediate termination and legal action.", "sample_two": "Confidential Information shall mean any non-public information disclosed by one party to the other, whether orally, in writing, or electronically. Both parties agree to protect such information using the same degree of care used for their own confidential information, but no less than reasonable care." }, { "introductory_example": "Create an indemnification provision for a service agreement", "sample_zero": "The Service Provider shall indemnify, defend, and hold harmless the Client from any claims, damages, or losses arising from the Service Provider's negligence, willful misconduct, or breach of this Agreement, except to the extent caused by the Client's own negligence.", "sample_one": "Each party agrees to indemnify the other against third-party claims arising from their respective breaches of this Agreement or negligent acts. This indemnification includes reasonable attorneys' fees and costs, subject to prompt notice and cooperation in defense.", "sample_two": "Provider shall defend, indemnify, and hold Client harmless from all liabilities, costs, and expenses (including reasonable legal fees) resulting from Provider's performance under this Agreement, provided Client gives prompt notice of any claim and allows Provider to control the defense." }, { "introductory_example": "Write a limitation of liability clause for a technology services contract", "sample_zero": "Neither party shall be liable for any indirect, incidental, special, consequential, or punitive damages, regardless of the cause of action. Total liability under this Agreement shall not exceed the fees paid in the twelve months preceding the claim.", "sample_one": "IN NO EVENT SHALL EITHER PARTY BE LIABLE FOR LOST PROFITS, LOST DATA, OR CONSEQUENTIAL DAMAGES. THE MAXIMUM LIABILITY OF EITHER PARTY SHALL BE LIMITED TO THE TOTAL AMOUNT PAID UNDER THIS AGREEMENT IN THE SIX (6) MONTHS PRIOR TO THE EVENT GIVING RISE TO LIABILITY.", "sample_two": "Except for breaches of confidentiality, indemnification obligations, or willful misconduct, neither party's liability shall exceed the greater of (a) $100,000 or (b) the fees paid in the prior 12 months. This limitation applies to all claims in aggregate." } ] with open("test_legal_tech.jsonl", "w") as f: for item in sample_data: f.write(json.dumps(item) + "\n") if __name__ == "__main__": demo.launch(share=False, ssr_mode=False)