525GradioApp / ui /analysis_screen.py
Ryan
update
6b7b8b5
raw
history blame
7.8 kB
import gradio as gr
import json
# Import analysis modules
#from processors.topic_modeling import extract_topics, compare_topics
#from processors.ngram_analysis import compare_ngrams
#from processors.bias_detection import compare_bias
from processors.bow_analysis import compare_bow
#from processors.metrics import calculate_similarity
#from processors.diff_highlighter import highlight_differences
def create_analysis_screen():
"""
Create the analysis options screen
Returns:
tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output)
"""
with gr.Column() as analysis_screen:
gr.Markdown("## Analysis Options")
gr.Markdown("Select which analyses you want to run on the LLM responses.")
# Analysis selection
with gr.Group():
analysis_options = gr.CheckboxGroup(
choices=[
"Topic Modeling",
"N-gram Analysis",
"Bias Detection",
"Bag of Words",
"Similarity Metrics",
"Difference Highlighting"
],
value=[
"Topic Modeling",
"N-gram Analysis",
"Bag of Words",
"Similarity Metrics"
],
label="Select Analyses to Run"
)
# Parameters for each analysis type
with gr.Accordion("Analysis Parameters", open=False) as analysis_params:
# Topic modeling parameters
with gr.Group():
gr.Markdown("### Topic Modeling Parameters")
topic_count = gr.Slider(minimum=2, maximum=10, value=3, step=1,
label="Number of Topics")
# N-gram parameters
with gr.Group():
gr.Markdown("### N-gram Parameters")
ngram_n = gr.Radio(choices=["1", "2", "3"], value="2",
label="N-gram Size")
ngram_top = gr.Slider(minimum=5, maximum=30, value=10, step=1,
label="Top N-grams to Display")
# Bias detection parameters
with gr.Group():
gr.Markdown("### Bias Detection Parameters")
bias_methods = gr.CheckboxGroup(
choices=["Sentiment Analysis", "Partisan Leaning", "Framing Analysis"],
value=["Sentiment Analysis", "Partisan Leaning"],
label="Bias Detection Methods"
)
# Bag of Words parameters
with gr.Group():
gr.Markdown("### Bag of Words Parameters")
bow_top = gr.Slider(minimum=10, maximum=100, value=25, step=5,
label="Top Words to Compare")
# Similarity metrics parameters
with gr.Group():
gr.Markdown("### Similarity Metrics Parameters")
similarity_metrics = gr.CheckboxGroup(
choices=["Cosine Similarity", "Jaccard Similarity", "Semantic Similarity"],
value=["Cosine Similarity", "Semantic Similarity"],
label="Similarity Metrics to Calculate"
)
# Run analysis button
run_analysis_btn = gr.Button("Run Analysis", variant="primary", size="large")
# Analysis output area
analysis_output = gr.JSON(label="Analysis Results", visible=False)
return analysis_options, analysis_params, run_analysis_btn, analysis_output
def process_analysis_request(dataset, selected_analyses, parameters):
"""
Process the analysis request and run selected analyses
Args:
dataset (dict): The dataset containing prompts and LLM responses
selected_analyses (list): List of selected analysis types
parameters (dict): Parameters for each analysis type
Returns:
tuple: (analysis_results, analysis_output_display)
"""
if not dataset or "entries" not in dataset or not dataset["entries"]:
return {}, gr.update(visible=True, value=json.dumps({"error": "No dataset provided or dataset is empty"}, indent=2))
analysis_results = {"analyses": {}}
# Group responses by prompt
prompts = {}
for entry in dataset["entries"]:
if entry["prompt"] not in prompts:
prompts[entry["prompt"]] = []
prompts[entry["prompt"]].append({
"model": entry["model"],
"response": entry["response"]
})
# Run selected analyses for each prompt
for prompt, responses in prompts.items():
analysis_results["analyses"][prompt] = {}
# Extract just the text responses and model names
response_texts = [r["response"] for r in responses]
model_names = [r["model"] for r in responses]
# Run Topic Modeling
if "Topic Modeling" in selected_analyses:
num_topics = parameters.get("topic_count", 3)
topic_results = compare_topics(response_texts, model_names, num_topics)
analysis_results["analyses"][prompt]["topic_modeling"] = topic_results
# Run N-gram Analysis
if "N-gram Analysis" in selected_analyses:
n = int(parameters.get("ngram_n", 2))
top_n = parameters.get("ngram_top", 10)
ngram_results = compare_ngrams(response_texts, model_names, n, top_n)
analysis_results["analyses"][prompt]["ngram_analysis"] = ngram_results
# Run Bias Detection
if "Bias Detection" in selected_analyses:
bias_methods = parameters.get("bias_methods", ["Sentiment Analysis", "Partisan Leaning"])
bias_results = compare_bias(response_texts, model_names, bias_methods)
analysis_results["analyses"][prompt]["bias_detection"] = bias_results
# Run Bag of Words Analysis
if "Bag of Words" in selected_analyses:
top_words = parameters.get("bow_top", 25)
bow_results = compare_bow(response_texts, model_names, top_words)
analysis_results["analyses"][prompt]["bag_of_words"] = bow_results
# Run Similarity Metrics
if "Similarity Metrics" in selected_analyses:
metrics = parameters.get("similarity_metrics", ["Cosine Similarity"])
similarity_results = {}
# Calculate pairwise similarities
for i in range(len(responses)):
for j in range(i+1, len(responses)):
model_pair = f"{model_names[i]} vs {model_names[j]}"
similarity_results[model_pair] = calculate_similarity(
response_texts[i], response_texts[j], metrics
)
analysis_results["analyses"][prompt]["similarity_metrics"] = similarity_results
# Run Difference Highlighting
if "Difference Highlighting" in selected_analyses:
diff_results = {}
# Calculate pairwise differences
for i in range(len(responses)):
for j in range(i+1, len(responses)):
model_pair = f"{model_names[i]} vs {model_names[j]}"
diff_results[model_pair] = highlight_differences(
response_texts[i], response_texts[j]
)
analysis_results["analyses"][prompt]["difference_highlighting"] = diff_results
return analysis_results, gr.update(visible=True, value=json.dumps(analysis_results, indent=2))