test-two / app.py
arthrod's picture
Update app.py
986af55 verified
raw
history blame
26.6 kB
import gradio as gr
import json
import uuid
from datetime import datetime
import os
import pandas as pd
class LegalTechEvaluator:
def __init__(self):
self.data = []
self.current_index = 0
self.user_sessions = {}
self.current_user_id = None
self.current_user_name = None
self.auto_load_data()
def auto_load_data(self):
"""Automatically load test_legal_tech.jsonl if it exists"""
if os.path.exists("test_legal_tech.jsonl"):
self.load_from_file("test_legal_tech.jsonl")
return True
return False
def load_from_file(self, filename):
"""Load data from JSONL file"""
self.data = []
try:
with open(filename, 'r') as f:
for line in f:
self.data.append(json.loads(line))
return True
except Exception as e:
print(f"Error loading file: {e}")
return False
def create_user_session(self, name):
"""Create a new user session with UUID"""
user_id = str(uuid.uuid4())
self.current_user_id = user_id
self.current_user_name = name
self.user_sessions[user_id] = {
"id": user_id,
"name": name,
"start_time": datetime.now().isoformat(),
"responses": {},
"completed": False
}
return user_id
def record_choice(self, sample_choice):
"""Record user's choice for current question"""
if self.current_user_id and 0 <= self.current_index < len(self.data):
self.user_sessions[self.current_user_id]["responses"][self.current_index] = {
"question_index": self.current_index,
"choice": sample_choice,
"timestamp": datetime.now().isoformat()
}
return True
return False
def get_current_question(self):
"""Get current question data"""
if 0 <= self.current_index < len(self.data):
return self.data[self.current_index]
return None
def export_results(self):
"""Export results to multiple formats"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Create results directory if it doesn't exist
os.makedirs("results", exist_ok=True)
# 1. Export as JSONL (all user sessions)
jsonl_filename = f"results/legal_tech_evaluation_{timestamp}.jsonl"
with open(jsonl_filename, 'w') as f:
for session in self.user_sessions.values():
f.write(json.dumps(session) + '\n')
# 2. Create markdown summary
md_content = self.generate_markdown_summary()
md_filename = f"results/legal_tech_summary_{timestamp}.md"
with open(md_filename, 'w') as f:
f.write(md_content)
# 3. Create detailed JSON report
json_filename = f"results/legal_tech_detailed_{timestamp}.json"
with open(json_filename, 'w') as f:
json.dump({
"evaluation_date": datetime.now().isoformat(),
"total_questions": len(self.data),
"total_participants": len(self.user_sessions),
"user_sessions": self.user_sessions,
"questions": self.data
}, f, indent=2)
return jsonl_filename, md_filename, json_filename, md_content
def generate_markdown_summary(self):
"""Generate a markdown summary of results"""
total_users = len(self.user_sessions)
total_questions = len(self.data)
# Calculate preferences
sample_counts = {"sample_zero": 0, "sample_one": 0, "sample_two": 0}
question_stats = {}
for session in self.user_sessions.values():
for q_idx, response in session["responses"].items():
choice = response["choice"]
sample_counts[choice] += 1
if q_idx not in question_stats:
question_stats[q_idx] = {"sample_zero": 0, "sample_one": 0, "sample_two": 0}
question_stats[q_idx][choice] += 1
# Generate markdown
md = f"""# Legal Tech Tool Evaluation Summary
Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
## Overview
- **Total Participants**: {total_users}
- **Total Questions**: {total_questions}
- **Total Responses**: {sum(sample_counts.values())}
## Overall Preferences
| Sample | Votes | Percentage |
|--------|-------|------------|
| Sample 0 | {sample_counts['sample_zero']} | {sample_counts['sample_zero'] / max(sum(sample_counts.values()), 1) * 100:.1f}% |
| Sample 1 | {sample_counts['sample_one']} | {sample_counts['sample_one'] / max(sum(sample_counts.values()), 1) * 100:.1f}% |
| Sample 2 | {sample_counts['sample_two']} | {sample_counts['sample_two'] / max(sum(sample_counts.values()), 1) * 100:.1f}% |
## Question-by-Question Breakdown
"""
for idx in range(len(self.data)):
if str(idx) in question_stats:
stats = question_stats[str(idx)]
total_responses = sum(stats.values())
if total_responses > 0:
question = self.data[idx]
md += f"\n### Question {idx + 1}\n"
md += f"**Prompt**: {question.get('introductory_example', 'N/A')[:100]}...\n\n"
md += "| Sample | Votes | Percentage |\n"
md += "|--------|-------|------------|\n"
md += f"| Sample 0 | {stats['sample_zero']} | {stats['sample_zero'] / total_responses * 100:.1f}% |\n"
md += f"| Sample 1 | {stats['sample_one']} | {stats['sample_one'] / total_responses * 100:.1f}% |\n"
md += f"| Sample 2 | {stats['sample_two']} | {stats['sample_two'] / total_responses * 100:.1f}% |\n"
md += f"\n## Participants\n"
for session in self.user_sessions.values():
responses = len(session["responses"])
md += f"- **{session['name']}** (ID: {session['id'][:8]}...): {responses}/{total_questions} questions answered\n"
return md
# Global evaluator instance
evaluator = LegalTechEvaluator()
# Custom CSS for elegant interface
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
* {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
}
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
padding: 0 !important;
}
#main-container {
background: #ffffff;
border-radius: 16px;
box-shadow: 0 4px 24px rgba(0, 0, 0, 0.06);
padding: 48px;
margin: 24px;
}
#welcome-screen {
text-align: center;
padding: 60px 40px;
background: linear-gradient(135deg, #f5f5f5 0%, #ffffff 100%);
border-radius: 16px;
margin-bottom: 32px;
}
#welcome-screen h1 {
font-size: 32px;
font-weight: 600;
color: #1a1a1a;
margin-bottom: 16px;
}
#welcome-screen p {
font-size: 18px;
color: #666;
margin-bottom: 32px;
line-height: 1.6;
}
#name-input {
max-width: 400px;
margin: 0 auto 24px;
}
#name-input input {
font-size: 16px;
padding: 12px 20px;
border: 2px solid #e0e0e0;
border-radius: 8px;
width: 100%;
transition: all 0.3s ease;
}
#name-input input:focus {
border-color: #4a90e2;
outline: none;
}
#start-button {
background: #2563eb;
color: white;
border: none;
padding: 14px 40px;
font-size: 16px;
font-weight: 500;
border-radius: 8px;
cursor: pointer;
transition: all 0.3s ease;
}
#start-button:hover {
background: #1d4ed8;
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(37, 99, 235, 0.3);
}
#evaluation-container {
display: none;
}
#question-header {
text-align: center;
margin-bottom: 40px;
padding-bottom: 24px;
border-bottom: 1px solid #e5e7eb;
}
#question-header h2 {
font-size: 24px;
font-weight: 600;
color: #1a1a1a;
margin-bottom: 8px;
}
#progress-text {
font-size: 14px;
color: #6b7280;
font-weight: 500;
}
#example-section {
background: #f9fafb;
border-radius: 12px;
padding: 24px;
margin-bottom: 32px;
}
#example-section h3 {
font-size: 16px;
font-weight: 600;
color: #374151;
margin-bottom: 12px;
text-transform: uppercase;
letter-spacing: 0.05em;
}
#example-text {
font-size: 16px;
line-height: 1.6;
color: #1f2937;
}
#samples-container {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 24px;
margin-bottom: 40px;
}
.sample-card {
background: white;
border: 2px solid #e5e7eb;
border-radius: 12px;
padding: 24px;
transition: all 0.3s ease;
position: relative;
}
.sample-card.selected {
border-color: #2563eb;
box-shadow: 0 0 0 4px rgba(37, 99, 235, 0.1);
}
.sample-card h4 {
font-size: 14px;
font-weight: 600;
color: #6b7280;
margin-bottom: 16px;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.sample-text {
font-size: 15px;
line-height: 1.6;
color: #374151;
margin-bottom: 20px;
min-height: 120px;
}
.sample-button {
width: 100%;
padding: 12px 24px;
background: #f3f4f6;
border: 2px solid transparent;
border-radius: 8px;
font-size: 14px;
font-weight: 500;
color: #374151;
cursor: pointer;
transition: all 0.3s ease;
}
.sample-button:hover {
background: #e5e7eb;
}
.sample-card.selected .sample-button {
background: #2563eb;
color: white;
border-color: #2563eb;
}
#action-buttons {
display: flex;
justify-content: center;
gap: 16px;
margin-top: 40px;
padding-top: 32px;
border-top: 1px solid #e5e7eb;
}
.nav-button {
padding: 12px 32px;
background: #f3f4f6;
border: none;
border-radius: 8px;
font-size: 14px;
font-weight: 500;
color: #374151;
cursor: pointer;
transition: all 0.3s ease;
}
.nav-button:hover:not(:disabled) {
background: #e5e7eb;
}
.nav-button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
#confirm-button {
background: #10b981;
color: white;
}
#confirm-button:hover:not(:disabled) {
background: #059669;
}
#export-section {
text-align: center;
margin-top: 48px;
padding-top: 48px;
border-top: 2px solid #e5e7eb;
}
#export-button {
background: #6366f1;
color: white;
padding: 14px 40px;
border: none;
border-radius: 8px;
font-size: 16px;
font-weight: 500;
cursor: pointer;
transition: all 0.3s ease;
}
#export-button:hover {
background: #4f46e5;
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3);
}
#file-upload-section {
text-align: center;
padding: 40px;
background: #f9fafb;
border-radius: 12px;
margin-top: 32px;
}
#file-upload-section h3 {
font-size: 18px;
font-weight: 600;
color: #374151;
margin-bottom: 16px;
}
.hide {
display: none !important;
}
.success-message {
background: #d1fae5;
color: #065f46;
padding: 12px 20px;
border-radius: 8px;
margin: 16px 0;
text-align: center;
font-weight: 500;
}
.user-info {
position: absolute;
top: 20px;
right: 20px;
background: #f3f4f6;
padding: 8px 16px;
border-radius: 8px;
font-size: 14px;
color: #6b7280;
}
"""
# HTML Template
html_template = """
<div id="main-container">
<div id="welcome-screen">
<h1>Legal Tech Tool Evaluation</h1>
<p>Compare and evaluate outputs from different legal technology tools to help us understand user preferences.</p>
<div id="name-input">
<input type="text" id="user-name" placeholder="Enter your name" />
</div>
<button id="start-button" onclick="startEvaluation()">Start Evaluation</button>
</div>
<div id="evaluation-container" class="hide">
<div class="user-info" id="user-info"></div>
<div id="question-header">
<h2>Question <span id="current-question">1</span> of <span id="total-questions">0</span></h2>
<p id="progress-text">Please select your preferred response</p>
</div>
<div id="example-section">
<h3>Prompt</h3>
<p id="example-text"></p>
</div>
<div id="samples-container">
<div class="sample-card" id="sample-0">
<h4>Sample A</h4>
<div class="sample-text" id="sample-0-text"></div>
<button class="sample-button" onclick="selectSample(0)">Select Sample A</button>
</div>
<div class="sample-card" id="sample-1">
<h4>Sample B</h4>
<div class="sample-text" id="sample-1-text"></div>
<button class="sample-button" onclick="selectSample(1)">Select Sample B</button>
</div>
<div class="sample-card" id="sample-2">
<h4>Sample C</h4>
<div class="sample-text" id="sample-2-text"></div>
<button class="sample-button" onclick="selectSample(2)">Select Sample C</button>
</div>
</div>
<div id="action-buttons">
<button class="nav-button" id="prev-button" onclick="previousQuestion()">← Previous</button>
<button class="nav-button" id="confirm-button" onclick="confirmSelection()">Confirm Selection</button>
<button class="nav-button" id="next-button" onclick="nextQuestion()">Next →</button>
</div>
<div id="export-section">
<button id="export-button" onclick="exportResults()">Export Results</button>
<div id="export-status"></div>
</div>
</div>
<div id="file-upload-section" class="hide">
<h3>No data file found</h3>
<p>Please upload a JSONL file to begin the evaluation</p>
<input type="file" id="file-input" accept=".jsonl" onchange="uploadFile()" />
</div>
</div>
<script>
let currentSelection = null;
let evaluationStarted = false;
function startEvaluation() {
const name = document.getElementById('user-name').value.trim();
if (!name) {
alert('Please enter your name');
return;
}
// Call Gradio function to start session
window.gradioApp().querySelector('#start-session-btn').click();
}
function selectSample(sampleIndex) {
// Clear previous selections
document.querySelectorAll('.sample-card').forEach(card => {
card.classList.remove('selected');
});
// Add selection to clicked sample
document.getElementById(`sample-${sampleIndex}`).classList.add('selected');
currentSelection = sampleIndex;
// Enable confirm button
document.getElementById('confirm-button').disabled = false;
}
function updateDisplay(data) {
if (!data) return;
// Parse the data
const questionData = JSON.parse(data);
if (questionData.showWelcome) {
document.getElementById('welcome-screen').classList.remove('hide');
document.getElementById('evaluation-container').classList.add('hide');
document.getElementById('file-upload-section').classList.add('hide');
} else if (questionData.showFileUpload) {
document.getElementById('welcome-screen').classList.add('hide');
document.getElementById('evaluation-container').classList.add('hide');
document.getElementById('file-upload-section').classList.remove('hide');
} else {
document.getElementById('welcome-screen').classList.add('hide');
document.getElementById('evaluation-container').classList.remove('hide');
document.getElementById('file-upload-section').classList.add('hide');
// Update user info
if (questionData.userName) {
document.getElementById('user-info').textContent = `Evaluator: ${questionData.userName}`;
}
// Update question info
document.getElementById('current-question').textContent = questionData.currentIndex + 1;
document.getElementById('total-questions').textContent = questionData.totalQuestions;
// Update example and samples
document.getElementById('example-text').textContent = questionData.introductoryExample;
document.getElementById('sample-0-text').textContent = questionData.sampleZero;
document.getElementById('sample-1-text').textContent = questionData.sampleOne;
document.getElementById('sample-2-text').textContent = questionData.sampleTwo;
// Update button states
document.getElementById('prev-button').disabled = questionData.currentIndex === 0;
document.getElementById('next-button').disabled = questionData.currentIndex >= questionData.totalQuestions - 1;
document.getElementById('confirm-button').disabled = true;
// Clear selection
document.querySelectorAll('.sample-card').forEach(card => {
card.classList.remove('selected');
});
currentSelection = null;
}
}
function confirmSelection() {
if (currentSelection === null) return;
const sampleMap = ['sample_zero', 'sample_one', 'sample_two'];
// Trigger Gradio function with selection
window.gradioApp().querySelector('#hidden-selection').value = sampleMap[currentSelection];
window.gradioApp().querySelector('#confirm-btn').click();
}
function previousQuestion() {
window.gradioApp().querySelector('#prev-btn').click();
}
function nextQuestion() {
window.gradioApp().querySelector('#next-btn').click();
}
function exportResults() {
window.gradioApp().querySelector('#export-btn').click();
}
function uploadFile() {
const fileInput = document.getElementById('file-input');
if (fileInput.files.length > 0) {
// Trigger Gradio file upload
window.gradioApp().querySelector('#file-upload-btn').click();
}
}
// Listen for Gradio updates
setInterval(() => {
const stateElement = window.gradioApp().querySelector('#current-state');
if (stateElement && stateElement.value) {
updateDisplay(stateElement.value);
}
}, 100);
</script>
"""
def start_session(name):
"""Start a new evaluation session"""
if not name:
return json.dumps({"showWelcome": True})
if not evaluator.data:
return json.dumps({"showFileUpload": True})
evaluator.create_user_session(name)
return get_current_state()
def get_current_state():
"""Get current state as JSON"""
question = evaluator.get_current_question()
if not question:
return json.dumps({"showWelcome": True})
return json.dumps({
"showWelcome": False,
"showFileUpload": False,
"userName": evaluator.current_user_name,
"currentIndex": evaluator.current_index,
"totalQuestions": len(evaluator.data),
"introductoryExample": question.get("introductory_example", ""),
"sampleZero": question.get("sample_zero", ""),
"sampleOne": question.get("sample_one", ""),
"sampleTwo": question.get("sample_two", "")
})
def confirm_selection(selection):
"""Confirm user's selection"""
if selection and evaluator.record_choice(selection):
# Auto-advance to next question
if evaluator.current_index < len(evaluator.data) - 1:
evaluator.current_index += 1
return get_current_state()
def previous_question():
"""Navigate to previous question"""
if evaluator.current_index > 0:
evaluator.current_index -= 1
return get_current_state()
def next_question():
"""Navigate to next question"""
if evaluator.current_index < len(evaluator.data) - 1:
evaluator.current_index += 1
return get_current_state()
def export_results_handler():
"""Export results and return status"""
try:
jsonl_file, md_file, json_file, md_content = evaluator.export_results()
# Read files for download
with open(jsonl_file, 'r') as f:
jsonl_content = f.read()
with open(md_file, 'r') as f:
md_download = f.read()
return (
get_current_state(),
gr.update(value="✅ Results exported successfully!"),
gr.File.update(value=jsonl_file, visible=True),
gr.File.update(value=md_file, visible=True),
md_content
)
except Exception as e:
return (
get_current_state(),
gr.update(value=f"❌ Export failed: {str(e)}"),
gr.File.update(visible=False),
gr.File.update(visible=False),
""
)
def load_file(file):
"""Load data from uploaded file"""
if file:
evaluator.load_from_file(file.name)
return get_current_state()
# Create Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo:
# Hidden components for JavaScript interaction
with gr.Column(visible=False):
name_input = gr.Textbox(elem_id="hidden-name")
start_btn = gr.Button("Start", elem_id="start-session-btn")
current_state = gr.Textbox(elem_id="current-state")
selection_input = gr.Textbox(elem_id="hidden-selection")
confirm_btn = gr.Button("Confirm", elem_id="confirm-btn")
prev_btn = gr.Button("Previous", elem_id="prev-btn")
next_btn = gr.Button("Next", elem_id="next-btn")
export_btn = gr.Button("Export", elem_id="export-btn")
file_upload = gr.File(elem_id="file-upload-input")
file_upload_btn = gr.Button("Upload", elem_id="file-upload-btn")
# Output components (hidden)
with gr.Column(visible=False):
export_status = gr.Textbox()
download_jsonl = gr.File(label="Download JSONL")
download_md = gr.File(label="Download Summary")
summary_display = gr.Markdown()
# Main HTML interface
gr.HTML(html_template)
# Event handlers
start_btn.click(
fn=lambda name: start_session(name),
inputs=[name_input],
outputs=[current_state]
)
confirm_btn.click(
fn=confirm_selection,
inputs=[selection_input],
outputs=[current_state]
)
prev_btn.click(
fn=previous_question,
outputs=[current_state]
)
next_btn.click(
fn=next_question,
outputs=[current_state]
)
export_btn.click(
fn=export_results_handler,
outputs=[current_state, export_status, download_jsonl, download_md, summary_display]
)
file_upload_btn.click(
fn=load_file,
inputs=[file_upload],
outputs=[current_state]
)
# Initial state
demo.load(
fn=lambda: json.dumps({"showWelcome": True}) if not evaluator.data else get_current_state(),
outputs=[current_state]
)
# Create sample data if no file exists
if not os.path.exists("test_legal_tech.jsonl"):
sample_data = [
{
"introductory_example": "Draft a confidentiality clause for a software development agreement",
"sample_zero": "The Receiving Party agrees to maintain the confidentiality of all Confidential Information received from the Disclosing Party and shall not disclose such information to any third party without prior written consent. This obligation shall survive termination of this Agreement for a period of five (5) years.",
"sample_one": "All proprietary information, trade secrets, and confidential data disclosed by either party shall be kept strictly confidential. The receiving party must implement reasonable security measures and limit access to authorized personnel only. Breach of this clause may result in immediate termination and legal action.",
"sample_two": "Confidential Information shall mean any non-public information disclosed by one party to the other, whether orally, in writing, or electronically. Both parties agree to protect such information using the same degree of care used for their own confidential information, but no less than reasonable care."
},
{
"introductory_example": "Create an indemnification provision for a service agreement",
"sample_zero": "The Service Provider shall indemnify, defend, and hold harmless the Client from any claims, damages, or losses arising from the Service Provider's negligence, willful misconduct, or breach of this Agreement, except to the extent caused by the Client's own negligence.",
"sample_one": "Each party agrees to indemnify the other against third-party claims arising from their respective breaches of this Agreement or negligent acts. This indemnification includes reasonable attorneys' fees and costs, subject to prompt notice and cooperation in defense.",
"sample_two": "Provider shall defend, indemnify, and hold Client harmless from all liabilities, costs, and expenses (including reasonable legal fees) resulting from Provider's performance under this Agreement, provided Client gives prompt notice of any claim and allows Provider to control the defense."
},
{
"introductory_example": "Write a limitation of liability clause for a technology services contract",
"sample_zero": "Neither party shall be liable for any indirect, incidental, special, consequential, or punitive damages, regardless of the cause of action. Total liability under this Agreement shall not exceed the fees paid in the twelve months preceding the claim.",
"sample_one": "IN NO EVENT SHALL EITHER PARTY BE LIABLE FOR LOST PROFITS, LOST DATA, OR CONSEQUENTIAL DAMAGES. THE MAXIMUM LIABILITY OF EITHER PARTY SHALL BE LIMITED TO THE TOTAL AMOUNT PAID UNDER THIS AGREEMENT IN THE SIX (6) MONTHS PRIOR TO THE EVENT GIVING RISE TO LIABILITY.",
"sample_two": "Except for breaches of confidentiality, indemnification obligations, or willful misconduct, neither party's liability shall exceed the greater of (a) $100,000 or (b) the fees paid in the prior 12 months. This limitation applies to all claims in aggregate."
}
]
with open("test_legal_tech.jsonl", "w") as f:
for item in sample_data:
f.write(json.dumps(item) + "\n")
if __name__ == "__main__":
demo.launch(share=False, ssr_mode=False)