Spaces:
Sleeping
Sleeping
File size: 6,540 Bytes
39cf944 6334788 39cf944 08f222a 39cf944 08f222a 39cf944 08f222a 39cf944 08f222a 6334788 08f222a 39cf944 08f222a 39cf944 08f222a 39cf944 588b4b8 6334788 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
def create_ngram_visualization(analysis_results):
"""
Create visualizations for n-gram analysis results
Args:
analysis_results (dict): Analysis results from the n-gram analysis
Returns:
list: List of gradio components with visualizations
"""
output_components = []
# Check if we have valid results
if not analysis_results or "analyses" not in analysis_results:
return [gr.Markdown("No analysis results found.")]
# Process each prompt
for prompt, analyses in analysis_results["analyses"].items():
# Process N-gram analysis if available
if "ngram_analysis" in analyses:
ngram_results = analyses["ngram_analysis"]
# Check if there's an error in the analysis
if "error" in ngram_results:
output_components.append(gr.Markdown(f"**Error in N-gram analysis:** {ngram_results['error']}"))
continue
# Show models being compared
models = ngram_results.get("models", [])
ngram_size = ngram_results.get("ngram_size", 2)
size_name = "Unigrams" if ngram_size == 1 else f"{ngram_size}-grams"
if len(models) >= 2:
output_components.append(
gr.Markdown(f"### {size_name} Analysis: Comparing responses from {models[0]} and {models[1]}"))
# Get important n-grams for each model
important_ngrams = ngram_results.get("important_ngrams", {})
# Display important n-grams for each model
if important_ngrams:
for model_name, ngrams in important_ngrams.items():
output_components.append(gr.Markdown(f"#### Top {size_name} Used by {model_name}"))
if ngrams:
# Create a formatted list of n-grams for display
ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in ngrams[:10]]
output_components.append(gr.Markdown(", ".join(ngram_list)))
else:
output_components.append(gr.Markdown("No significant n-grams found."))
# Only if we have enough data, create a bar chart
if len(ngrams) >= 3:
try:
df = pd.DataFrame(ngrams)
# Create bar chart for top n-grams
fig = px.bar(df[:10], x='ngram', y='count',
title=f"Top {size_name} Used by {model_name}",
labels={'ngram': 'N-gram', 'count': 'Frequency'},
height=400)
# Improve layout
fig.update_layout(
xaxis_title="N-gram",
yaxis_title="Frequency",
xaxis={'categoryorder': 'total descending'}
)
output_components.append(gr.Plot(value=fig))
except Exception as e:
output_components.append(gr.Markdown(f"Visualization error: {str(e)}"))
# Visualize differential n-grams (n-grams with biggest frequency difference)
diff_ngrams = ngram_results.get("differential_ngrams", [])
ngram_matrix = ngram_results.get("ngram_count_matrix", {})
if diff_ngrams and ngram_matrix and len(diff_ngrams) > 0:
output_components.append(gr.Markdown(f"### {size_name} with Biggest Frequency Differences"))
# Create dataframe for plotting
model1, model2 = models[0], models[1]
diff_data = []
for ngram in diff_ngrams[:10]: # Limit to top 10 for readability
if ngram in ngram_matrix:
counts = ngram_matrix[ngram]
model1_count = counts.get(model1, 0)
model2_count = counts.get(model2, 0)
# Only include if there's a meaningful difference
if abs(model1_count - model2_count) > 0:
output_components.append(gr.Markdown(
f"- **{ngram}**: {model1}: {model1_count}, {model2}: {model2_count}"
))
# Add similarity comparison if available
if "comparisons" in ngram_results:
output_components.append(gr.Markdown("### N-gram Similarity Metrics"))
comparison_key = f"{models[0]} vs {models[1]}"
if comparison_key in ngram_results["comparisons"]:
metrics = ngram_results["comparisons"][comparison_key]
common_count = metrics.get("common_ngram_count", 0)
metrics_text = f"""
- **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
"""
output_components.append(gr.Markdown(metrics_text))
# If no components were added other than header, show a message
if len(output_components) <= 1:
output_components.append(gr.Markdown(f"No detailed N-gram analysis found in results."))
return output_components
def process_and_visualize_ngram_analysis(analysis_results):
"""
Process the n-gram analysis results and create visualization components
Args:
analysis_results (dict): The analysis results
Returns:
list: List of gradio components for visualization
"""
try:
print(f"Starting visualization of n-gram analysis results")
return create_ngram_visualization(analysis_results)
except Exception as e:
import traceback
error_msg = f"N-gram visualization error: {str(e)}\n{traceback.format_exc()}"
print(error_msg)
return [gr.Markdown(f"**Error during n-gram visualization:**\n\n```\n{error_msg}\n```")]
|