Spaces:
Sleeping
Sleeping
import gradio as gr | |
from ui.dataset_input import create_dataset_input, load_example_dataset | |
from ui.analysis_screen import create_analysis_screen, process_analysis_request | |
from visualization.bow_visualizer import process_and_visualize_analysis | |
import nltk | |
import os | |
import json | |
# Download necessary NLTK resources function remains unchanged | |
def download_nltk_resources(): | |
"""Download required NLTK resources if not already downloaded""" | |
try: | |
# Create nltk_data directory in the user's home directory if it doesn't exist | |
nltk_data_path = os.path.expanduser("~/nltk_data") | |
os.makedirs(nltk_data_path, exist_ok=True) | |
# Add this path to NLTK's data path | |
nltk.data.path.append(nltk_data_path) | |
# Download required resources | |
resources = ['punkt', 'wordnet', 'stopwords', 'punkt_tab'] | |
for resource in resources: | |
try: | |
# Different resources can be in different directories in NLTK | |
locations = [ | |
f'tokenizers/{resource}', | |
f'corpora/{resource}', | |
f'taggers/{resource}', | |
f'{resource}' | |
] | |
found = False | |
for location in locations: | |
try: | |
nltk.data.find(location) | |
print(f"Resource {resource} already downloaded") | |
found = True | |
break | |
except LookupError: | |
continue | |
if not found: | |
print(f"Downloading {resource}...") | |
nltk.download(resource, quiet=True) | |
except Exception as e: | |
print(f"Error with resource {resource}: {e}") | |
print("NLTK resources check completed") | |
except Exception as e: | |
print(f"Error downloading NLTK resources: {e}") | |
def create_app(): | |
""" | |
Create a streamlined Gradio app for dataset input and Bag of Words analysis. | |
Returns: | |
gr.Blocks: The Gradio application | |
""" | |
with gr.Blocks(title="LLM Response Comparator") as app: | |
# Application state to share data between tabs | |
dataset_state = gr.State({}) | |
analysis_results_state = gr.State({}) | |
# Dataset Input Tab | |
with gr.Tab("Dataset Input"): | |
dataset_inputs, example_dropdown, load_example_btn, create_btn, prompt, response1, model1, response2, model2 = create_dataset_input() | |
# Add status indicator to show when dataset is created | |
dataset_status = gr.Markdown("*No dataset loaded*") | |
# Load example dataset | |
load_example_btn.click( | |
fn=load_example_dataset, | |
inputs=[example_dropdown], | |
outputs=[prompt, response1, model1, response2, model2] # Update all field values | |
) | |
# Save dataset to state and update status | |
def create_dataset(p, r1, m1, r2, m2): | |
if not p or not r1 or not r2: | |
return {}, "❌ **Error:** Please fill in at least the prompt and both responses" | |
dataset = { | |
"entries": [ | |
{"prompt": p, "response": r1, "model": m1 or "Model 1"}, | |
{"prompt": p, "response": r2, "model": m2 or "Model 2"} | |
] | |
} | |
return dataset, "✅ **Dataset created successfully!** You can now go to the Analysis tab" | |
create_btn.click( | |
fn=create_dataset, | |
inputs=[prompt, response1, model1, response2, model2], | |
outputs=[dataset_state, dataset_status] | |
) | |
# Analysis Tab | |
with gr.Tab("Analysis"): | |
# Use create_analysis_screen to get UI components including visualization container | |
analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider = create_analysis_screen() | |
# Pre-create visualization components (initially hidden) | |
visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible") | |
analysis_title = gr.Markdown("## Analysis Results", visible=False) | |
prompt_title = gr.Markdown(visible=False) | |
models_compared = gr.Markdown(visible=False) | |
# Container for model 1 words | |
model1_title = gr.Markdown(visible=False) | |
model1_words = gr.Markdown(visible=False) | |
# Container for model 2 words | |
model2_title = gr.Markdown(visible=False) | |
model2_words = gr.Markdown(visible=False) | |
# Similarity metrics | |
similarity_metrics_title = gr.Markdown("### Similarity Metrics", visible=False) | |
similarity_metrics = gr.Markdown(visible=False) | |
# Status or error message area | |
status_message_visible = gr.Checkbox(value=False, visible=False, label="Status Message Visible") | |
status_message = gr.Markdown(visible=False) | |
# Define a helper function to extract parameter values and run the analysis | |
def run_analysis(dataset, selected_analyses, bow_top, ngram_n, ngram_top): | |
try: | |
if not dataset or "entries" not in dataset or not dataset["entries"]: | |
return ( | |
{}, # analysis_results_state | |
False, # analysis_output visibility | |
False, # visualization_area_visible | |
gr.update(visible=False), # analysis_title | |
gr.update(visible=False), # prompt_title | |
gr.update(visible=False), # models_compared | |
gr.update(visible=False), # model1_title | |
gr.update(visible=False), # model1_words | |
gr.update(visible=False), # model2_title | |
gr.update(visible=False), # model2_words | |
gr.update(visible=False), # similarity_metrics_title | |
gr.update(visible=False), # similarity_metrics | |
True, # status_message_visible | |
gr.update(visible=True, value="❌ **Error:** No dataset loaded. Please create or load a dataset first.") # status_message | |
) | |
parameters = { | |
"bow_top": bow_top, | |
"ngram_n": ngram_n, | |
"ngram_top": ngram_top | |
} | |
print("Running analysis with parameters:", parameters) | |
# Process the analysis request | |
analysis_results, _ = process_analysis_request(dataset, selected_analyses, parameters) | |
# If there's an error or no results | |
if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]: | |
return ( | |
analysis_results, | |
False, | |
False, | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
True, | |
gr.update(visible=True, value="❌ **No results found.** Try different analysis options.") | |
) | |
# Extract information to display in components | |
prompt = list(analysis_results["analyses"].keys())[0] | |
analyses = analysis_results["analyses"][prompt] | |
if "bag_of_words" not in analyses: | |
return ( | |
analysis_results, | |
False, | |
False, | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
True, | |
gr.update(visible=True, value="❌ **No Bag of Words analysis found.** Make sure to select it in the options.") | |
) | |
bow_results = analyses["bag_of_words"] | |
models = bow_results.get("models", []) | |
if len(models) < 2: | |
return ( | |
analysis_results, | |
False, | |
False, | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
True, | |
gr.update(visible=True, value="❌ **Not enough models to compare.** Please ensure you have two model responses.") | |
) | |
# Extract and format information for display | |
model1_name = models[0] | |
model2_name = models[1] | |
# Format important words for each model | |
important_words = bow_results.get("important_words", {}) | |
model1_words_text = "No important words found" | |
model2_words_text = "No important words found" | |
if model1_name in important_words: | |
word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model1_name][:10]] | |
model1_words_text = ", ".join(word_list) | |
if model2_name in important_words: | |
word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model2_name][:10]] | |
model2_words_text = ", ".join(word_list) | |
# Format similarity metrics | |
similarity_text = "No similarity metrics found" | |
comparisons = bow_results.get("comparisons", {}) | |
comparison_key = f"{model1_name} vs {model2_name}" | |
if comparison_key in comparisons: | |
metrics = comparisons[comparison_key] | |
cosine = metrics.get("cosine_similarity", 0) | |
jaccard = metrics.get("jaccard_similarity", 0) | |
semantic = metrics.get("semantic_similarity", 0) # Add semantic similarity | |
common_words = metrics.get("common_word_count", 0) | |
similarity_text = f""" | |
- **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns) | |
- **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap) | |
- **Semantic Similarity**: {semantic:.2f} (higher means more similar meaning) | |
- **Common Words**: {common_words} words appear in both responses | |
""" | |
# Return all updated component values | |
return ( | |
analysis_results, # analysis_results_state | |
False, # analysis_output visibility | |
True, # visualization_area_visible | |
gr.update(visible=True), # analysis_title | |
gr.update(visible=True, value=f"## Analysis of Prompt: \"{prompt[:100]}...\""), # prompt_title | |
gr.update(visible=True, value=f"### Comparing responses from {model1_name} and {model2_name}"), # models_compared | |
gr.update(visible=True, value=f"#### Top Words Used by {model1_name}"), # model1_title | |
gr.update(visible=True, value=model1_words_text), # model1_words | |
gr.update(visible=True, value=f"#### Top Words Used by {model2_name}"), # model2_title | |
gr.update(visible=True, value=model2_words_text), # model2_words | |
gr.update(visible=True), # similarity_metrics_title | |
gr.update(visible=True, value=similarity_text), # similarity_metrics | |
False, # status_message_visible | |
gr.update(visible=False) # status_message | |
) | |
except Exception as e: | |
import traceback | |
error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}" | |
print(error_msg) | |
return ( | |
{"error": error_msg}, # analysis_results_state | |
True, # analysis_output visibility (show raw JSON for debugging) | |
False, # visualization_area_visible | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
True, # status_message_visible | |
gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message | |
) | |
# Function to update visibility based on checkbox state | |
def update_visibility(viz_visible, status_visible): | |
return [ | |
gr.update(visible=viz_visible), # analysis_title | |
gr.update(visible=viz_visible), # prompt_title | |
gr.update(visible=viz_visible), # models_compared | |
gr.update(visible=viz_visible), # model1_title | |
gr.update(visible=viz_visible), # model1_words | |
gr.update(visible=viz_visible), # model2_title | |
gr.update(visible=viz_visible), # model2_words | |
gr.update(visible=viz_visible), # similarity_metrics_title | |
gr.update(visible=viz_visible), # similarity_metrics | |
gr.update(visible=status_visible) # status_message | |
] | |
# Connect visibility checkboxes to update function | |
visualization_area_visible.change( | |
fn=update_visibility, | |
inputs=[visualization_area_visible, status_message_visible], | |
outputs=[ | |
analysis_title, | |
prompt_title, | |
models_compared, | |
model1_title, | |
model1_words, | |
model2_title, | |
model2_words, | |
similarity_metrics_title, | |
similarity_metrics, | |
status_message | |
] | |
) | |
# Run analysis with proper parameters | |
run_analysis_btn.click( | |
fn=run_analysis, | |
inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top], | |
outputs=[ | |
analysis_results_state, | |
analysis_output, | |
visualization_area_visible, | |
analysis_title, | |
prompt_title, | |
models_compared, | |
model1_title, | |
model1_words, | |
model2_title, | |
model2_words, | |
similarity_metrics_title, | |
similarity_metrics, | |
status_message_visible, | |
status_message | |
] | |
) | |
return app | |
if __name__ == "__main__": | |
# Download required NLTK resources before launching the app | |
download_nltk_resources() | |
app = create_app() | |
app.launch() | |