Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
# Import analysis modules | |
#from processors.topic_modeling import extract_topics, compare_topics | |
#from processors.ngram_analysis import compare_ngrams | |
#from processors.bias_detection import compare_bias | |
from processors.bow_analysis import compare_bow | |
#from processors.metrics import calculate_similarity | |
#from processors.diff_highlighter import highlight_differences | |
def create_analysis_screen(): | |
""" | |
Create the analysis options screen | |
Returns: | |
tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output) | |
""" | |
with gr.Column() as analysis_screen: | |
gr.Markdown("## Analysis Options") | |
gr.Markdown("Select which analyses you want to run on the LLM responses.") | |
# Analysis selection | |
with gr.Group(): | |
analysis_options = gr.CheckboxGroup( | |
choices=[ | |
"Topic Modeling", | |
"N-gram Analysis", | |
"Bias Detection", | |
"Bag of Words", | |
"Similarity Metrics", | |
"Difference Highlighting" | |
], | |
value=[ | |
"Topic Modeling", | |
"N-gram Analysis", | |
"Bag of Words", | |
"Similarity Metrics" | |
], | |
label="Select Analyses to Run" | |
) | |
# Parameters for each analysis type | |
with gr.Accordion("Analysis Parameters", open=False) as analysis_params: | |
# Topic modeling parameters | |
with gr.Group(): | |
gr.Markdown("### Topic Modeling Parameters") | |
topic_count = gr.Slider(minimum=2, maximum=10, value=3, step=1, | |
label="Number of Topics") | |
# N-gram parameters | |
with gr.Group(): | |
gr.Markdown("### N-gram Parameters") | |
ngram_n = gr.Radio(choices=["1", "2", "3"], value="2", | |
label="N-gram Size") | |
ngram_top = gr.Slider(minimum=5, maximum=30, value=10, step=1, | |
label="Top N-grams to Display") | |
# Bias detection parameters | |
with gr.Group(): | |
gr.Markdown("### Bias Detection Parameters") | |
bias_methods = gr.CheckboxGroup( | |
choices=["Sentiment Analysis", "Partisan Leaning", "Framing Analysis"], | |
value=["Sentiment Analysis", "Partisan Leaning"], | |
label="Bias Detection Methods" | |
) | |
# Bag of Words parameters | |
with gr.Group(): | |
gr.Markdown("### Bag of Words Parameters") | |
bow_top = gr.Slider(minimum=10, maximum=100, value=25, step=5, | |
label="Top Words to Compare") | |
# Similarity metrics parameters | |
with gr.Group(): | |
gr.Markdown("### Similarity Metrics Parameters") | |
similarity_metrics = gr.CheckboxGroup( | |
choices=["Cosine Similarity", "Jaccard Similarity", "Semantic Similarity"], | |
value=["Cosine Similarity", "Semantic Similarity"], | |
label="Similarity Metrics to Calculate" | |
) | |
# Run analysis button | |
run_analysis_btn = gr.Button("Run Analysis", variant="primary", size="large") | |
# Analysis output area | |
analysis_output = gr.JSON(label="Analysis Results", visible=False) | |
return analysis_options, analysis_params, run_analysis_btn, analysis_output | |
def process_analysis_request(dataset, selected_analyses, parameters): | |
""" | |
Process the analysis request and run selected analyses | |
Args: | |
dataset (dict): The dataset containing prompts and LLM responses | |
selected_analyses (list): List of selected analysis types | |
parameters (dict): Parameters for each analysis type | |
Returns: | |
tuple: (analysis_results, analysis_output_display) | |
""" | |
if not dataset or "entries" not in dataset or not dataset["entries"]: | |
return {}, gr.update(visible=True, value=json.dumps({"error": "No dataset provided or dataset is empty"}, indent=2)) | |
analysis_results = {"analyses": {}} | |
# Group responses by prompt | |
prompts = {} | |
for entry in dataset["entries"]: | |
if entry["prompt"] not in prompts: | |
prompts[entry["prompt"]] = [] | |
prompts[entry["prompt"]].append({ | |
"model": entry["model"], | |
"response": entry["response"] | |
}) | |
# Run selected analyses for each prompt | |
for prompt, responses in prompts.items(): | |
analysis_results["analyses"][prompt] = {} | |
# Extract just the text responses and model names | |
response_texts = [r["response"] for r in responses] | |
model_names = [r["model"] for r in responses] | |
# Run Topic Modeling | |
if "Topic Modeling" in selected_analyses: | |
num_topics = parameters.get("topic_count", 3) | |
topic_results = compare_topics(response_texts, model_names, num_topics) | |
analysis_results["analyses"][prompt]["topic_modeling"] = topic_results | |
# Run N-gram Analysis | |
if "N-gram Analysis" in selected_analyses: | |
n = int(parameters.get("ngram_n", 2)) | |
top_n = parameters.get("ngram_top", 10) | |
ngram_results = compare_ngrams(response_texts, model_names, n, top_n) | |
analysis_results["analyses"][prompt]["ngram_analysis"] = ngram_results | |
# Run Bias Detection | |
if "Bias Detection" in selected_analyses: | |
bias_methods = parameters.get("bias_methods", ["Sentiment Analysis", "Partisan Leaning"]) | |
bias_results = compare_bias(response_texts, model_names, bias_methods) | |
analysis_results["analyses"][prompt]["bias_detection"] = bias_results | |
# Run Bag of Words Analysis | |
if "Bag of Words" in selected_analyses: | |
top_words = parameters.get("bow_top", 25) | |
bow_results = compare_bow(response_texts, model_names, top_words) | |
analysis_results["analyses"][prompt]["bag_of_words"] = bow_results | |
# Run Similarity Metrics | |
if "Similarity Metrics" in selected_analyses: | |
metrics = parameters.get("similarity_metrics", ["Cosine Similarity"]) | |
similarity_results = {} | |
# Calculate pairwise similarities | |
for i in range(len(responses)): | |
for j in range(i+1, len(responses)): | |
model_pair = f"{model_names[i]} vs {model_names[j]}" | |
similarity_results[model_pair] = calculate_similarity( | |
response_texts[i], response_texts[j], metrics | |
) | |
analysis_results["analyses"][prompt]["similarity_metrics"] = similarity_results | |
# Run Difference Highlighting | |
if "Difference Highlighting" in selected_analyses: | |
diff_results = {} | |
# Calculate pairwise differences | |
for i in range(len(responses)): | |
for j in range(i+1, len(responses)): | |
model_pair = f"{model_names[i]} vs {model_names[j]}" | |
diff_results[model_pair] = highlight_differences( | |
response_texts[i], response_texts[j] | |
) | |
analysis_results["analyses"][prompt]["difference_highlighting"] = diff_results | |
return analysis_results, gr.update(visible=True, value=json.dumps(analysis_results, indent=2)) | |