Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import random | |
import json | |
import os | |
from datetime import datetime | |
# This would be replaced with your actual SLM integration | |
def generate_response(query, context, model_name): | |
"""Placeholder function to generate response from an SLM""" | |
return f"This is a placeholder response from {model_name} based on query: {query} and context: {context}" | |
def save_evaluation(query, context, model_a, model_b, response_a, response_b, preference): | |
"""Save evaluation results to a JSON file""" | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
evaluation = { | |
"timestamp": timestamp, | |
"query": query, | |
"context": context, | |
"models": { | |
"left": model_a, | |
"right": model_b | |
}, | |
"responses": { | |
"left": response_a, | |
"right": response_b | |
}, | |
"preference": preference | |
} | |
# Create directory if it doesn't exist | |
os.makedirs("evaluations", exist_ok=True) | |
# Save to a file | |
with open(f"evaluations/eval_{timestamp.replace(' ', '_').replace(':', '-')}.json", "w") as f: | |
json.dump(evaluation, f, indent=2) | |
return "Evaluation saved successfully!" | |
def process_query(query, context, model_a="SLM-A", model_b="SLM-B"): | |
"""Process query and generate responses from two models""" | |
# Generate responses | |
response_a = generate_response(query, context, model_a) | |
response_b = generate_response(query, context, model_b) | |
# Randomly swap to avoid position bias | |
if random.random() > 0.5: | |
return response_a, response_b, model_a, model_b | |
else: | |
return response_b, response_a, model_b, model_a | |
def submit_evaluation(query, context, response_left, response_right, preference, model_left, model_right): | |
"""Submit and save the evaluation""" | |
if not preference: | |
return "Please select a preference before submitting." | |
save_evaluation(query, context, model_left, model_right, response_left, response_right, preference) | |
return "Thank you for your evaluation!" | |
with gr.Blocks(title="SLM-RAG Arena") as app: | |
gr.Markdown("# SLM-RAG Arena") | |
gr.Markdown("Compare responses from different models for RAG tasks.") | |
with gr.Row(): | |
with gr.Column(): | |
query_input = gr.Textbox(label="Query", placeholder="Enter your query here...") | |
context_input = gr.Textbox(label="Context", placeholder="Enter context information here...", lines=5) | |
generate_btn = gr.Button("Generate Responses") | |
# Hidden state variables | |
model_left = gr.State("") | |
model_right = gr.State("") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("### Response A") | |
response_left = gr.Textbox(label="", lines=10, interactive=False) | |
with gr.Column(): | |
gr.Markdown("### Response B") | |
response_right = gr.Textbox(label="", lines=10, interactive=False) | |
with gr.Row(): | |
preference = gr.Radio( | |
choices=["Prefer Left", "Tie", "Prefer Right", "Neither"], | |
label="Which response do you prefer?" | |
) | |
submit_btn = gr.Button("Submit Evaluation") | |
result = gr.Textbox(label="Result") | |
generate_btn.click( | |
process_query, | |
inputs=[query_input, context_input], | |
outputs=[response_left, response_right, model_left, model_right] | |
) | |
submit_btn.click( | |
submit_evaluation, | |
inputs=[query_input, context_input, response_left, response_right, preference, model_left, model_right], | |
outputs=[result] | |
) | |
app.launch() |