Ryan commited on
Commit
39cf944
·
1 Parent(s): 1a44569
.idea/workspace.xml CHANGED
@@ -5,8 +5,12 @@
5
  </component>
6
  <component name="ChangeListManager">
7
  <list default="true" id="8e67814c-7f04-433c-ab7a-2b65a1106d4c" name="Changes" comment="">
 
 
8
  <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
9
- <change beforePath="$PROJECT_DIR$/processors/bow_analysis.py" beforeDir="false" afterPath="$PROJECT_DIR$/processors/bow_analysis.py" afterDir="false" />
 
 
10
  </list>
11
  <option name="SHOW_DIALOG" value="false" />
12
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -56,7 +60,7 @@
56
  <option name="presentableId" value="Default" />
57
  <updated>1745170754325</updated>
58
  <workItem from="1745170755404" duration="245000" />
59
- <workItem from="1745172030020" duration="4469000" />
60
  </task>
61
  <servers />
62
  </component>
 
5
  </component>
6
  <component name="ChangeListManager">
7
  <list default="true" id="8e67814c-7f04-433c-ab7a-2b65a1106d4c" name="Changes" comment="">
8
+ <change afterPath="$PROJECT_DIR$/processors/ngram_analysis.py" afterDir="false" />
9
+ <change afterPath="$PROJECT_DIR$/visualization/ngram_visualizer.py" afterDir="false" />
10
  <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
11
+ <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
12
+ <change beforePath="$PROJECT_DIR$/ui/analysis_screen.py" beforeDir="false" afterPath="$PROJECT_DIR$/ui/analysis_screen.py" afterDir="false" />
13
+ <change beforePath="$PROJECT_DIR$/visualization/bow_visualizer.py" beforeDir="false" afterPath="$PROJECT_DIR$/visualization/bow_visualizer.py" afterDir="false" />
14
  </list>
15
  <option name="SHOW_DIALOG" value="false" />
16
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
 
60
  <option name="presentableId" value="Default" />
61
  <updated>1745170754325</updated>
62
  <workItem from="1745170755404" duration="245000" />
63
+ <workItem from="1745172030020" duration="7284000" />
64
  </task>
65
  <servers />
66
  </component>
app.py CHANGED
@@ -122,7 +122,7 @@ def create_app():
122
  status_message = gr.Markdown(visible=False)
123
 
124
  # Define a helper function to extract parameter values and run the analysis
125
- def run_analysis(dataset, selected_analyses, bow_top):
126
  try:
127
  if not dataset or "entries" not in dataset or not dataset["entries"]:
128
  return (
@@ -144,6 +144,8 @@ def create_app():
144
 
145
  parameters = {
146
  "bow_top": bow_top,
 
 
147
  }
148
  print("Running analysis with parameters:", parameters)
149
 
@@ -324,7 +326,7 @@ def create_app():
324
  # Run analysis with proper parameters
325
  run_analysis_btn.click(
326
  fn=run_analysis,
327
- inputs=[dataset_state, analysis_options, bow_top_slider],
328
  outputs=[
329
  analysis_results_state,
330
  analysis_output,
 
122
  status_message = gr.Markdown(visible=False)
123
 
124
  # Define a helper function to extract parameter values and run the analysis
125
+ def run_analysis(dataset, selected_analyses, bow_top, ngram_n, ngram_top):
126
  try:
127
  if not dataset or "entries" not in dataset or not dataset["entries"]:
128
  return (
 
144
 
145
  parameters = {
146
  "bow_top": bow_top,
147
+ "ngram_n": ngram_n,
148
+ "ngram_top": ngram_top
149
  }
150
  print("Running analysis with parameters:", parameters)
151
 
 
326
  # Run analysis with proper parameters
327
  run_analysis_btn.click(
328
  fn=run_analysis,
329
+ inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top],
330
  outputs=[
331
  analysis_results_state,
332
  analysis_output,
processors/ngram_analysis.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ N-gram analysis for comparing text responses
3
+ """
4
+ from sklearn.feature_extraction.text import CountVectorizer
5
+ import numpy as np
6
+ from collections import Counter
7
+ import nltk
8
+ from nltk.util import ngrams
9
+ from nltk.tokenize import word_tokenize
10
+ from nltk.corpus import stopwords
11
+
12
+
13
+ def compare_ngrams(texts, model_names, n=2, top_n=25):
14
+ """
15
+ Compare n-gram representations across multiple texts.
16
+
17
+ Args:
18
+ texts (list): List of text responses to compare
19
+ model_names (list): Names of models corresponding to responses
20
+ n (int): Size of n-grams (1 for unigrams, 2 for bigrams, etc.)
21
+ top_n (int): Number of top n-grams to consider
22
+
23
+ Returns:
24
+ dict: N-gram analysis results
25
+ """
26
+ # Initialize the results dictionary
27
+ result = {
28
+ "models": model_names,
29
+ "ngram_size": n,
30
+ "important_ngrams": {},
31
+ "ngram_count_matrix": {},
32
+ "differential_ngrams": []
33
+ }
34
+
35
+ # Make sure we have texts to analyze
36
+ if not texts or len(texts) < 1:
37
+ return result
38
+
39
+ # Create n-gram representations using CountVectorizer
40
+ vectorizer = CountVectorizer(
41
+ ngram_range=(n, n), # Use the specified n-gram size
42
+ max_features=1000,
43
+ stop_words='english'
44
+ )
45
+
46
+ X = vectorizer.fit_transform(texts)
47
+
48
+ # Get feature names (n-grams)
49
+ feature_names = vectorizer.get_feature_names_out()
50
+
51
+ # Create n-gram count matrix
52
+ ngram_counts = {}
53
+ for i, model in enumerate(model_names):
54
+ counts = X[i].toarray()[0]
55
+ ngram_counts[model] = {}
56
+
57
+ # Store n-gram frequencies for this model
58
+ for j, ngram in enumerate(feature_names):
59
+ if counts[j] > 0: # Only store n-grams that appear
60
+ ngram_counts[model][ngram] = int(counts[j])
61
+
62
+ # Add to n-gram count matrix
63
+ if ngram not in result["ngram_count_matrix"]:
64
+ result["ngram_count_matrix"][ngram] = {}
65
+ result["ngram_count_matrix"][ngram][model] = int(counts[j])
66
+
67
+ # Find important n-grams for each model
68
+ for model, ngram_freq in ngram_counts.items():
69
+ # Sort by frequency
70
+ sorted_ngrams = sorted(ngram_freq.items(), key=lambda x: x[1], reverse=True)
71
+
72
+ # Store top N n-grams
73
+ result["important_ngrams"][model] = [
74
+ {"ngram": ngram, "count": count}
75
+ for ngram, count in sorted_ngrams[:top_n]
76
+ ]
77
+
78
+ # Calculate differential n-grams (n-grams with biggest frequency difference between models)
79
+ if len(model_names) >= 2:
80
+ model1, model2 = model_names[0], model_names[1]
81
+
82
+ # Calculate differences
83
+ diff_scores = {}
84
+ for ngram in result["ngram_count_matrix"]:
85
+ count1 = result["ngram_count_matrix"][ngram].get(model1, 0)
86
+ count2 = result["ngram_count_matrix"][ngram].get(model2, 0)
87
+
88
+ # Absolute difference
89
+ diff_scores[ngram] = abs(count1 - count2)
90
+
91
+ # Sort by difference
92
+ sorted_diffs = sorted(diff_scores.items(), key=lambda x: x[1], reverse=True)
93
+ result["differential_ngrams"] = [ngram for ngram, _ in sorted_diffs[:top_n]]
94
+
95
+ # Calculate overlap statistics
96
+ model1_ngrams = set(ngram_counts.get(model1, {}).keys())
97
+ model2_ngrams = set(ngram_counts.get(model2, {}).keys())
98
+ common_ngrams = model1_ngrams.intersection(model2_ngrams)
99
+
100
+ # Initialize comparisons if needed
101
+ if "comparisons" not in result:
102
+ result["comparisons"] = {}
103
+
104
+ comparison_key = f"{model1} vs {model2}"
105
+ result["comparisons"][comparison_key] = {
106
+ "common_ngram_count": len(common_ngrams)
107
+ }
108
+
109
+ return result
ui/analysis_screen.py CHANGED
@@ -10,6 +10,7 @@ from visualization.bow_visualizer import process_and_visualize_analysis
10
  from processors.bow_analysis import compare_bow
11
  # from processors.metrics import calculate_similarity
12
  # from processors.diff_highlighter import highlight_differences
 
13
 
14
  def create_analysis_screen():
15
  """
@@ -105,56 +106,50 @@ def create_analysis_screen():
105
 
106
  # Return the bow_top_slider directly so app.py can access it
107
  # Note: Removed the visualization_container from return values since we'll pre-create it
108
- return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider
109
 
 
110
  def process_analysis_request(dataset, selected_analyses, parameters):
111
  """
112
  Process the analysis request and run selected analyses
113
-
114
- Args:
115
- dataset (dict): The dataset containing prompts and LLM responses
116
- selected_analyses (list): List of selected analysis types
117
- parameters (dict): Parameters for each analysis type
118
-
119
- Returns:
120
- tuple: (analysis_results, analysis_output_display)
121
  """
122
  try:
123
  print(f"Processing analysis request with: {selected_analyses}")
124
  print(f"Parameters: {parameters}")
125
-
126
  if not dataset or "entries" not in dataset or not dataset["entries"]:
127
- return {}, gr.update(visible=True, value=json.dumps({"error": "No dataset provided or dataset is empty"}, indent=2))
128
-
 
129
  analysis_results = {"analyses": {}}
130
-
131
  # Extract prompt and responses
132
  prompt = dataset["entries"][0]["prompt"]
133
  response_texts = [entry["response"] for entry in dataset["entries"]]
134
  model_names = [entry["model"] for entry in dataset["entries"]]
135
-
136
  print(f"Analyzing prompt: '{prompt[:50]}...'")
137
  print(f"Models: {model_names}")
138
-
139
  analysis_results["analyses"][prompt] = {}
140
-
141
- # Currently only implement Bag of Words since it's the most complete
142
  if "Bag of Words" in selected_analyses:
143
- # Set a default value
144
- top_words = 25
145
-
146
- # Try to get the parameter from the parameters dict
147
- if parameters and isinstance(parameters, dict) and "bow_top" in parameters:
148
- top_words = parameters["bow_top"]
149
-
150
  print(f"Running BOW analysis with top_words={top_words}")
151
-
152
- # Call the BOW comparison function
153
  bow_results = compare_bow(response_texts, model_names, top_words)
154
  analysis_results["analyses"][prompt]["bag_of_words"] = bow_results
155
-
 
 
 
 
 
 
 
 
156
  print("Analysis complete - results:", analysis_results)
157
-
158
  # Return results and update the output component
159
  return analysis_results, gr.update(visible=False, value=analysis_results) # Hide the raw JSON
160
  except Exception as e:
 
10
  from processors.bow_analysis import compare_bow
11
  # from processors.metrics import calculate_similarity
12
  # from processors.diff_highlighter import highlight_differences
13
+ from processors.ngram_analysis import compare_ngrams
14
 
15
  def create_analysis_screen():
16
  """
 
106
 
107
  # Return the bow_top_slider directly so app.py can access it
108
  # Note: Removed the visualization_container from return values since we'll pre-create it
109
+ return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top
110
 
111
+ # function
112
  def process_analysis_request(dataset, selected_analyses, parameters):
113
  """
114
  Process the analysis request and run selected analyses
 
 
 
 
 
 
 
 
115
  """
116
  try:
117
  print(f"Processing analysis request with: {selected_analyses}")
118
  print(f"Parameters: {parameters}")
119
+
120
  if not dataset or "entries" not in dataset or not dataset["entries"]:
121
+ return {}, gr.update(visible=True,
122
+ value=json.dumps({"error": "No dataset provided or dataset is empty"}, indent=2))
123
+
124
  analysis_results = {"analyses": {}}
125
+
126
  # Extract prompt and responses
127
  prompt = dataset["entries"][0]["prompt"]
128
  response_texts = [entry["response"] for entry in dataset["entries"]]
129
  model_names = [entry["model"] for entry in dataset["entries"]]
130
+
131
  print(f"Analyzing prompt: '{prompt[:50]}...'")
132
  print(f"Models: {model_names}")
133
+
134
  analysis_results["analyses"][prompt] = {}
135
+
136
+ # Run Bag of Words analysis if selected
137
  if "Bag of Words" in selected_analyses:
138
+ top_words = parameters.get("bow_top", 25)
 
 
 
 
 
 
139
  print(f"Running BOW analysis with top_words={top_words}")
 
 
140
  bow_results = compare_bow(response_texts, model_names, top_words)
141
  analysis_results["analyses"][prompt]["bag_of_words"] = bow_results
142
+
143
+ # Run N-gram analysis if selected
144
+ if "N-gram Analysis" in selected_analyses:
145
+ ngram_n = int(parameters.get("ngram_n", "2"))
146
+ ngram_top = parameters.get("ngram_top", 10)
147
+ print(f"Running N-gram analysis with n={ngram_n}, top_n={ngram_top}")
148
+ ngram_results = compare_ngrams(response_texts, model_names, ngram_n, ngram_top)
149
+ analysis_results["analyses"][prompt]["ngram_analysis"] = ngram_results
150
+
151
  print("Analysis complete - results:", analysis_results)
152
+
153
  # Return results and update the output component
154
  return analysis_results, gr.update(visible=False, value=analysis_results) # Hide the raw JSON
155
  except Exception as e:
visualization/bow_visualizer.py CHANGED
@@ -7,6 +7,8 @@ from plotly.subplots import make_subplots
7
  import pandas as pd
8
  from difflib import SequenceMatcher
9
 
 
 
10
  def create_bow_visualization(analysis_results):
11
  """
12
  Create visualizations for bag of words analysis results
@@ -122,78 +124,89 @@ def create_bow_visualization(analysis_results):
122
 
123
  return output_components
124
 
 
 
125
  def process_and_visualize_analysis(analysis_results):
126
  """
127
  Process the analysis results and create visualization components
128
-
129
  Args:
130
  analysis_results (dict): The analysis results
131
-
132
  Returns:
133
  list: List of gradio components for visualization
134
  """
135
  try:
136
  print(f"Starting visualization of analysis results: {type(analysis_results)}")
137
  components = []
138
-
139
  if not analysis_results or "analyses" not in analysis_results:
140
  print("Warning: Empty or invalid analysis results")
141
  components.append(gr.Markdown("No analysis results to visualize."))
142
  return components
143
-
144
  # For each prompt in the analysis results
145
  for prompt, analyses in analysis_results.get("analyses", {}).items():
146
  print(f"Visualizing results for prompt: {prompt[:30]}...")
147
  components.append(gr.Markdown(f"## Analysis for Prompt:\n\"{prompt}\""))
148
-
149
  # Check for Bag of Words analysis
150
  if "bag_of_words" in analyses:
151
  print("Processing Bag of Words visualization")
152
  components.append(gr.Markdown("### Bag of Words Analysis"))
153
  bow_results = analyses["bag_of_words"]
154
-
155
  # Display models compared
156
  if "models" in bow_results:
157
  models = bow_results["models"]
158
  components.append(gr.Markdown(f"**Models compared**: {', '.join(models)}"))
159
-
160
  # Display important words for each model
161
  if "important_words" in bow_results:
162
  components.append(gr.Markdown("#### Most Common Words by Model"))
163
-
164
  for model, words in bow_results["important_words"].items():
165
  print(f"Creating word list for model {model}")
166
  word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
167
  components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
168
-
169
  # Add visualizations for word frequency differences
170
- if "differential_words" in bow_results and "word_count_matrix" in bow_results and len(bow_results["models"]) >= 2:
 
171
  diff_words = bow_results["differential_words"]
172
  word_matrix = bow_results["word_count_matrix"]
173
  models = bow_results["models"]
174
-
175
  if diff_words and word_matrix and len(diff_words) > 0:
176
  components.append(gr.Markdown("### Words with Biggest Frequency Differences"))
177
-
178
  # Create dataframe for plotting
179
  model1, model2 = models[0], models[1]
180
  diff_data = []
181
-
182
  for word in diff_words[:10]: # Limit to top 10 for readability
183
  if word in word_matrix:
184
  counts = word_matrix[word]
185
  model1_count = counts.get(model1, 0)
186
  model2_count = counts.get(model2, 0)
187
-
188
  # Only include if there's a meaningful difference
189
  if abs(model1_count - model2_count) > 0:
190
  components.append(gr.Markdown(
191
  f"- **{word}**: {model1}: {model1_count}, {model2}: {model2_count}"
192
  ))
193
-
 
 
 
 
 
 
 
 
194
  if not components:
195
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))
196
-
197
  print(f"Visualization complete: generated {len(components)} components")
198
  return components
199
  except Exception as e:
@@ -201,3 +214,4 @@ def process_and_visualize_analysis(analysis_results):
201
  error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
202
  print(error_msg)
203
  return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]
 
 
7
  import pandas as pd
8
  from difflib import SequenceMatcher
9
 
10
+ from visualization.ngram_visualizer import create_ngram_visualization
11
+
12
  def create_bow_visualization(analysis_results):
13
  """
14
  Create visualizations for bag of words analysis results
 
124
 
125
  return output_components
126
 
127
+
128
+ # Then update the process_and_visualize_analysis function
129
  def process_and_visualize_analysis(analysis_results):
130
  """
131
  Process the analysis results and create visualization components
132
+
133
  Args:
134
  analysis_results (dict): The analysis results
135
+
136
  Returns:
137
  list: List of gradio components for visualization
138
  """
139
  try:
140
  print(f"Starting visualization of analysis results: {type(analysis_results)}")
141
  components = []
142
+
143
  if not analysis_results or "analyses" not in analysis_results:
144
  print("Warning: Empty or invalid analysis results")
145
  components.append(gr.Markdown("No analysis results to visualize."))
146
  return components
147
+
148
  # For each prompt in the analysis results
149
  for prompt, analyses in analysis_results.get("analyses", {}).items():
150
  print(f"Visualizing results for prompt: {prompt[:30]}...")
151
  components.append(gr.Markdown(f"## Analysis for Prompt:\n\"{prompt}\""))
152
+
153
  # Check for Bag of Words analysis
154
  if "bag_of_words" in analyses:
155
  print("Processing Bag of Words visualization")
156
  components.append(gr.Markdown("### Bag of Words Analysis"))
157
  bow_results = analyses["bag_of_words"]
158
+
159
  # Display models compared
160
  if "models" in bow_results:
161
  models = bow_results["models"]
162
  components.append(gr.Markdown(f"**Models compared**: {', '.join(models)}"))
163
+
164
  # Display important words for each model
165
  if "important_words" in bow_results:
166
  components.append(gr.Markdown("#### Most Common Words by Model"))
167
+
168
  for model, words in bow_results["important_words"].items():
169
  print(f"Creating word list for model {model}")
170
  word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
171
  components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
172
+
173
  # Add visualizations for word frequency differences
174
+ if "differential_words" in bow_results and "word_count_matrix" in bow_results and len(
175
+ bow_results["models"]) >= 2:
176
  diff_words = bow_results["differential_words"]
177
  word_matrix = bow_results["word_count_matrix"]
178
  models = bow_results["models"]
179
+
180
  if diff_words and word_matrix and len(diff_words) > 0:
181
  components.append(gr.Markdown("### Words with Biggest Frequency Differences"))
182
+
183
  # Create dataframe for plotting
184
  model1, model2 = models[0], models[1]
185
  diff_data = []
186
+
187
  for word in diff_words[:10]: # Limit to top 10 for readability
188
  if word in word_matrix:
189
  counts = word_matrix[word]
190
  model1_count = counts.get(model1, 0)
191
  model2_count = counts.get(model2, 0)
192
+
193
  # Only include if there's a meaningful difference
194
  if abs(model1_count - model2_count) > 0:
195
  components.append(gr.Markdown(
196
  f"- **{word}**: {model1}: {model1_count}, {model2}: {model2_count}"
197
  ))
198
+
199
+ # Check for N-gram analysis
200
+ if "ngram_analysis" in analyses:
201
+ print("Processing N-gram visualization")
202
+ # Use the dedicated n-gram visualization function
203
+ ngram_components = create_ngram_visualization(
204
+ {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}})
205
+ components.extend(ngram_components)
206
+
207
  if not components:
208
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))
209
+
210
  print(f"Visualization complete: generated {len(components)} components")
211
  return components
212
  except Exception as e:
 
214
  error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
215
  print(error_msg)
216
  return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]
217
+
visualization/ngram_visualizer.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from plotly.subplots import make_subplots
7
+
8
+
9
+ def create_ngram_visualization(analysis_results):
10
+ """
11
+ Create visualizations for n-gram analysis results
12
+
13
+ Args:
14
+ analysis_results (dict): Analysis results from the n-gram analysis
15
+
16
+ Returns:
17
+ list: List of gradio components with visualizations
18
+ """
19
+ output_components = []
20
+
21
+ # Check if we have valid results
22
+ if not analysis_results or "analyses" not in analysis_results:
23
+ return [gr.Markdown("No analysis results found.")]
24
+
25
+ # Process each prompt
26
+ for prompt, analyses in analysis_results["analyses"].items():
27
+ # Process N-gram analysis if available
28
+ if "ngram_analysis" in analyses:
29
+ ngram_results = analyses["ngram_analysis"]
30
+
31
+ # Show models being compared
32
+ models = ngram_results.get("models", [])
33
+ ngram_size = ngram_results.get("ngram_size", 2)
34
+ size_name = "Unigrams" if ngram_size == 1 else f"{ngram_size}-grams"
35
+
36
+ if len(models) >= 2:
37
+ output_components.append(
38
+ gr.Markdown(f"### {size_name} Analysis: Comparing responses from {models[0]} and {models[1]}"))
39
+
40
+ # Get important n-grams for each model
41
+ important_ngrams = ngram_results.get("important_ngrams", {})
42
+
43
+ # Prepare data for plotting important n-grams
44
+ if important_ngrams:
45
+ for model_name, ngrams in important_ngrams.items():
46
+ df = pd.DataFrame(ngrams)
47
+
48
+ # Create bar chart for top n-grams
49
+ fig = px.bar(df, x='ngram', y='count',
50
+ title=f"Top {size_name} Used by {model_name}",
51
+ labels={'ngram': 'N-gram', 'count': 'Frequency'},
52
+ height=400)
53
+
54
+ # Improve layout
55
+ fig.update_layout(
56
+ xaxis_title="N-gram",
57
+ yaxis_title="Frequency",
58
+ xaxis={'categoryorder': 'total descending'}
59
+ )
60
+
61
+ output_components.append(gr.Plot(value=fig))
62
+
63
+ # Visualize differential n-grams (n-grams with biggest frequency difference)
64
+ diff_ngrams = ngram_results.get("differential_ngrams", [])
65
+ ngram_matrix = ngram_results.get("ngram_count_matrix", {})
66
+
67
+ if diff_ngrams and ngram_matrix and len(diff_ngrams) > 0:
68
+ output_components.append(gr.Markdown(f"### {size_name} with Biggest Frequency Differences"))
69
+
70
+ # Create dataframe for plotting
71
+ model1, model2 = models[0], models[1]
72
+ diff_data = []
73
+
74
+ for ngram in diff_ngrams[:15]: # Limit to top 15 for readability
75
+ if ngram in ngram_matrix:
76
+ counts = ngram_matrix[ngram]
77
+ diff_data.append({
78
+ "ngram": ngram,
79
+ model1: counts.get(model1, 0),
80
+ model2: counts.get(model2, 0)
81
+ })
82
+
83
+ if diff_data:
84
+ diff_df = pd.DataFrame(diff_data)
85
+
86
+ # Create grouped bar chart
87
+ fig = go.Figure()
88
+ fig.add_trace(go.Bar(
89
+ x=diff_df['ngram'],
90
+ y=diff_df[model1],
91
+ name=model1,
92
+ marker_color='indianred'
93
+ ))
94
+ fig.add_trace(go.Bar(
95
+ x=diff_df['ngram'],
96
+ y=diff_df[model2],
97
+ name=model2,
98
+ marker_color='lightsalmon'
99
+ ))
100
+
101
+ fig.update_layout(
102
+ title=f"{size_name} Frequency Comparison",
103
+ xaxis_title="N-gram",
104
+ yaxis_title="Frequency",
105
+ barmode='group',
106
+ height=500
107
+ )
108
+
109
+ output_components.append(gr.Plot(value=fig))
110
+
111
+ # Add similarity comparison if available
112
+ if "comparisons" in ngram_results:
113
+ output_components.append(gr.Markdown("### N-gram Similarity Metrics"))
114
+ comparison_key = f"{models[0]} vs {models[1]}"
115
+
116
+ if comparison_key in ngram_results["comparisons"]:
117
+ metrics = ngram_results["comparisons"][comparison_key]
118
+ common_count = metrics.get("common_ngram_count", 0)
119
+
120
+ metrics_text = f"""
121
+ - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
122
+ """
123
+
124
+ output_components.append(gr.Markdown(metrics_text))
125
+
126
+ # If no components were added other than header, show a message
127
+ if len(output_components) <= 1:
128
+ output_components.append(gr.Markdown(f"No detailed N-gram analysis found in results."))
129
+
130
+ return output_components
131
+
132
+
133
+ def process_and_visualize_ngram_analysis(analysis_results):
134
+ """
135
+ Process the n-gram analysis results and create visualization components
136
+
137
+ Args:
138
+ analysis_results (dict): The analysis results
139
+
140
+ Returns:
141
+ list: List of gradio components for visualization
142
+ """
143
+ try:
144
+ print(f"Starting visualization of n-gram analysis results")
145
+ return create_ngram_visualization(analysis_results)
146
+ except Exception as e:
147
+ import traceback
148
+ error_msg = f"N-gram visualization error: {str(e)}\n{traceback.format_exc()}"
149
+ print(error_msg)
150
+ return [gr.Markdown(f"**Error during n-gram visualization:**\n\n```\n{error_msg}\n```")]