Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
from visualization.bow_visualizer import process_and_visualize_analysis | |
# Import analysis modules | |
# Uncomment these when implemented | |
# from processors.topic_modeling import extract_topics, compare_topics | |
# from processors.ngram_analysis import compare_ngrams | |
# from processors.bias_detection import compare_bias | |
from processors.bow_analysis import compare_bow | |
# from processors.metrics import calculate_similarity | |
# from processors.diff_highlighter import highlight_differences | |
def create_analysis_screen(): | |
""" | |
Create the analysis options screen | |
Returns: | |
tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider) | |
""" | |
with gr.Column() as analysis_screen: | |
gr.Markdown("## Analysis Options") | |
gr.Markdown("Select which analyses you want to run on the LLM responses.") | |
# Analysis selection | |
with gr.Group(): | |
analysis_options = gr.CheckboxGroup( | |
choices=[ | |
"Topic Modeling", | |
"N-gram Analysis", | |
"Bias Detection", | |
"Bag of Words", | |
"Similarity Metrics", | |
"Difference Highlighting" | |
], | |
value=[ | |
"Bag of Words", | |
], | |
label="Select Analyses to Run" | |
) | |
# Create slider directly here for easier access | |
gr.Markdown("### Bag of Words Parameters") | |
bow_top_slider = gr.Slider( | |
minimum=10, maximum=100, value=25, step=5, | |
label="Top Words to Compare", | |
elem_id="bow_top_slider" | |
) | |
# Parameters for each analysis type (these will be hidden/shown based on selections) | |
with gr.Group() as analysis_params: | |
# Topic modeling parameters | |
with gr.Group(visible=False) as topic_params: | |
gr.Markdown("### Topic Modeling Parameters") | |
topic_count = gr.Slider(minimum=2, maximum=10, value=3, step=1, | |
label="Number of Topics") | |
# N-gram parameters | |
with gr.Group(visible=False) as ngram_params: | |
gr.Markdown("### N-gram Parameters") | |
ngram_n = gr.Radio(choices=["1", "2", "3"], value="2", | |
label="N-gram Size") | |
ngram_top = gr.Slider(minimum=5, maximum=30, value=10, step=1, | |
label="Top N-grams to Display") | |
# Bias detection parameters | |
with gr.Group(visible=False) as bias_params: | |
gr.Markdown("### Bias Detection Parameters") | |
bias_methods = gr.CheckboxGroup( | |
choices=["Sentiment Analysis", "Partisan Leaning", "Framing Analysis"], | |
value=["Sentiment Analysis", "Partisan Leaning"], | |
label="Bias Detection Methods" | |
) | |
# Similarity metrics parameters | |
with gr.Group(visible=False) as similarity_params: | |
gr.Markdown("### Similarity Metrics Parameters") | |
similarity_metrics = gr.CheckboxGroup( | |
choices=["Cosine Similarity", "Jaccard Similarity", "Semantic Similarity"], | |
value=["Cosine Similarity", "Semantic Similarity"], | |
label="Similarity Metrics to Calculate" | |
) | |
# Function to update parameter visibility based on selected analyses | |
def update_params_visibility(selected): | |
return { | |
topic_params: gr.update(visible="Topic Modeling" in selected), | |
ngram_params: gr.update(visible="N-gram Analysis" in selected), | |
bias_params: gr.update(visible="Bias Detection" in selected), | |
similarity_params: gr.update(visible="Similarity Metrics" in selected) | |
} | |
# Set up event handler for analysis selection | |
analysis_options.change( | |
fn=update_params_visibility, | |
inputs=[analysis_options], | |
outputs=[topic_params, ngram_params, bias_params, similarity_params] | |
) | |
# Run analysis button | |
run_analysis_btn = gr.Button("Run Analysis", variant="primary", size="large") | |
# Analysis output area - hidden JSON component to store raw results | |
analysis_output = gr.JSON(label="Analysis Results", visible=False) | |
# Visualization components container | |
visualization_container = gr.Column(visible=False) | |
# Return the bow_top_slider directly so app.py can access it | |
return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, visualization_container | |
def process_analysis_request(dataset, selected_analyses, parameters): | |
""" | |
Process the analysis request and run selected analyses | |
Args: | |
dataset (dict): The dataset containing prompts and LLM responses | |
selected_analyses (list): List of selected analysis types | |
parameters (dict): Parameters for each analysis type | |
Returns: | |
tuple: (analysis_results, analysis_output_display) | |
""" | |
try: | |
print(f"Processing analysis request with: {selected_analyses}") | |
print(f"Parameters: {parameters}") | |
if not dataset or "entries" not in dataset or not dataset["entries"]: | |
return {}, gr.update(visible=True, value=json.dumps({"error": "No dataset provided or dataset is empty"}, indent=2)) | |
analysis_results = {"analyses": {}} | |
# Extract prompt and responses | |
prompt = dataset["entries"][0]["prompt"] | |
response_texts = [entry["response"] for entry in dataset["entries"]] | |
model_names = [entry["model"] for entry in dataset["entries"]] | |
print(f"Analyzing prompt: '{prompt[:50]}...'") | |
print(f"Models: {model_names}") | |
analysis_results["analyses"][prompt] = {} | |
# Currently only implement Bag of Words since it's the most complete | |
if "Bag of Words" in selected_analyses: | |
# Set a default value | |
top_words = 25 | |
# Try to get the parameter from the parameters dict | |
if parameters and isinstance(parameters, dict) and "bow_top" in parameters: | |
top_words = parameters["bow_top"] | |
print(f"Running BOW analysis with top_words={top_words}") | |
# Call the BOW comparison function | |
bow_results = compare_bow(response_texts, model_names, top_words) | |
analysis_results["analyses"][prompt]["bag_of_words"] = bow_results | |
print("Analysis complete - results:", analysis_results) | |
# Return results and update the output component | |
return analysis_results, gr.update(visible=False, value=analysis_results) # Hide the raw JSON | |
except Exception as e: | |
import traceback | |
error_msg = f"Analysis error: {str(e)}\n{traceback.format_exc()}" | |
print(error_msg) | |
return {}, gr.update(visible=True, value=json.dumps({"error": error_msg}, indent=2)) | |