Spaces:
Sleeping
Sleeping
Ryan
commited on
Commit
·
39cf944
1
Parent(s):
1a44569
update
Browse files- .idea/workspace.xml +6 -2
- app.py +4 -2
- processors/ngram_analysis.py +109 -0
- ui/analysis_screen.py +23 -28
- visualization/bow_visualizer.py +30 -16
- visualization/ngram_visualizer.py +150 -0
.idea/workspace.xml
CHANGED
@@ -5,8 +5,12 @@
|
|
5 |
</component>
|
6 |
<component name="ChangeListManager">
|
7 |
<list default="true" id="8e67814c-7f04-433c-ab7a-2b65a1106d4c" name="Changes" comment="">
|
|
|
|
|
8 |
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
9 |
-
<change beforePath="$PROJECT_DIR$/
|
|
|
|
|
10 |
</list>
|
11 |
<option name="SHOW_DIALOG" value="false" />
|
12 |
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
@@ -56,7 +60,7 @@
|
|
56 |
<option name="presentableId" value="Default" />
|
57 |
<updated>1745170754325</updated>
|
58 |
<workItem from="1745170755404" duration="245000" />
|
59 |
-
<workItem from="1745172030020" duration="
|
60 |
</task>
|
61 |
<servers />
|
62 |
</component>
|
|
|
5 |
</component>
|
6 |
<component name="ChangeListManager">
|
7 |
<list default="true" id="8e67814c-7f04-433c-ab7a-2b65a1106d4c" name="Changes" comment="">
|
8 |
+
<change afterPath="$PROJECT_DIR$/processors/ngram_analysis.py" afterDir="false" />
|
9 |
+
<change afterPath="$PROJECT_DIR$/visualization/ngram_visualizer.py" afterDir="false" />
|
10 |
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
11 |
+
<change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
|
12 |
+
<change beforePath="$PROJECT_DIR$/ui/analysis_screen.py" beforeDir="false" afterPath="$PROJECT_DIR$/ui/analysis_screen.py" afterDir="false" />
|
13 |
+
<change beforePath="$PROJECT_DIR$/visualization/bow_visualizer.py" beforeDir="false" afterPath="$PROJECT_DIR$/visualization/bow_visualizer.py" afterDir="false" />
|
14 |
</list>
|
15 |
<option name="SHOW_DIALOG" value="false" />
|
16 |
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
|
|
60 |
<option name="presentableId" value="Default" />
|
61 |
<updated>1745170754325</updated>
|
62 |
<workItem from="1745170755404" duration="245000" />
|
63 |
+
<workItem from="1745172030020" duration="7284000" />
|
64 |
</task>
|
65 |
<servers />
|
66 |
</component>
|
app.py
CHANGED
@@ -122,7 +122,7 @@ def create_app():
|
|
122 |
status_message = gr.Markdown(visible=False)
|
123 |
|
124 |
# Define a helper function to extract parameter values and run the analysis
|
125 |
-
def run_analysis(dataset, selected_analyses, bow_top):
|
126 |
try:
|
127 |
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
128 |
return (
|
@@ -144,6 +144,8 @@ def create_app():
|
|
144 |
|
145 |
parameters = {
|
146 |
"bow_top": bow_top,
|
|
|
|
|
147 |
}
|
148 |
print("Running analysis with parameters:", parameters)
|
149 |
|
@@ -324,7 +326,7 @@ def create_app():
|
|
324 |
# Run analysis with proper parameters
|
325 |
run_analysis_btn.click(
|
326 |
fn=run_analysis,
|
327 |
-
inputs=[dataset_state, analysis_options, bow_top_slider],
|
328 |
outputs=[
|
329 |
analysis_results_state,
|
330 |
analysis_output,
|
|
|
122 |
status_message = gr.Markdown(visible=False)
|
123 |
|
124 |
# Define a helper function to extract parameter values and run the analysis
|
125 |
+
def run_analysis(dataset, selected_analyses, bow_top, ngram_n, ngram_top):
|
126 |
try:
|
127 |
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
128 |
return (
|
|
|
144 |
|
145 |
parameters = {
|
146 |
"bow_top": bow_top,
|
147 |
+
"ngram_n": ngram_n,
|
148 |
+
"ngram_top": ngram_top
|
149 |
}
|
150 |
print("Running analysis with parameters:", parameters)
|
151 |
|
|
|
326 |
# Run analysis with proper parameters
|
327 |
run_analysis_btn.click(
|
328 |
fn=run_analysis,
|
329 |
+
inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top],
|
330 |
outputs=[
|
331 |
analysis_results_state,
|
332 |
analysis_output,
|
processors/ngram_analysis.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
N-gram analysis for comparing text responses
|
3 |
+
"""
|
4 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
5 |
+
import numpy as np
|
6 |
+
from collections import Counter
|
7 |
+
import nltk
|
8 |
+
from nltk.util import ngrams
|
9 |
+
from nltk.tokenize import word_tokenize
|
10 |
+
from nltk.corpus import stopwords
|
11 |
+
|
12 |
+
|
13 |
+
def compare_ngrams(texts, model_names, n=2, top_n=25):
|
14 |
+
"""
|
15 |
+
Compare n-gram representations across multiple texts.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
texts (list): List of text responses to compare
|
19 |
+
model_names (list): Names of models corresponding to responses
|
20 |
+
n (int): Size of n-grams (1 for unigrams, 2 for bigrams, etc.)
|
21 |
+
top_n (int): Number of top n-grams to consider
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
dict: N-gram analysis results
|
25 |
+
"""
|
26 |
+
# Initialize the results dictionary
|
27 |
+
result = {
|
28 |
+
"models": model_names,
|
29 |
+
"ngram_size": n,
|
30 |
+
"important_ngrams": {},
|
31 |
+
"ngram_count_matrix": {},
|
32 |
+
"differential_ngrams": []
|
33 |
+
}
|
34 |
+
|
35 |
+
# Make sure we have texts to analyze
|
36 |
+
if not texts or len(texts) < 1:
|
37 |
+
return result
|
38 |
+
|
39 |
+
# Create n-gram representations using CountVectorizer
|
40 |
+
vectorizer = CountVectorizer(
|
41 |
+
ngram_range=(n, n), # Use the specified n-gram size
|
42 |
+
max_features=1000,
|
43 |
+
stop_words='english'
|
44 |
+
)
|
45 |
+
|
46 |
+
X = vectorizer.fit_transform(texts)
|
47 |
+
|
48 |
+
# Get feature names (n-grams)
|
49 |
+
feature_names = vectorizer.get_feature_names_out()
|
50 |
+
|
51 |
+
# Create n-gram count matrix
|
52 |
+
ngram_counts = {}
|
53 |
+
for i, model in enumerate(model_names):
|
54 |
+
counts = X[i].toarray()[0]
|
55 |
+
ngram_counts[model] = {}
|
56 |
+
|
57 |
+
# Store n-gram frequencies for this model
|
58 |
+
for j, ngram in enumerate(feature_names):
|
59 |
+
if counts[j] > 0: # Only store n-grams that appear
|
60 |
+
ngram_counts[model][ngram] = int(counts[j])
|
61 |
+
|
62 |
+
# Add to n-gram count matrix
|
63 |
+
if ngram not in result["ngram_count_matrix"]:
|
64 |
+
result["ngram_count_matrix"][ngram] = {}
|
65 |
+
result["ngram_count_matrix"][ngram][model] = int(counts[j])
|
66 |
+
|
67 |
+
# Find important n-grams for each model
|
68 |
+
for model, ngram_freq in ngram_counts.items():
|
69 |
+
# Sort by frequency
|
70 |
+
sorted_ngrams = sorted(ngram_freq.items(), key=lambda x: x[1], reverse=True)
|
71 |
+
|
72 |
+
# Store top N n-grams
|
73 |
+
result["important_ngrams"][model] = [
|
74 |
+
{"ngram": ngram, "count": count}
|
75 |
+
for ngram, count in sorted_ngrams[:top_n]
|
76 |
+
]
|
77 |
+
|
78 |
+
# Calculate differential n-grams (n-grams with biggest frequency difference between models)
|
79 |
+
if len(model_names) >= 2:
|
80 |
+
model1, model2 = model_names[0], model_names[1]
|
81 |
+
|
82 |
+
# Calculate differences
|
83 |
+
diff_scores = {}
|
84 |
+
for ngram in result["ngram_count_matrix"]:
|
85 |
+
count1 = result["ngram_count_matrix"][ngram].get(model1, 0)
|
86 |
+
count2 = result["ngram_count_matrix"][ngram].get(model2, 0)
|
87 |
+
|
88 |
+
# Absolute difference
|
89 |
+
diff_scores[ngram] = abs(count1 - count2)
|
90 |
+
|
91 |
+
# Sort by difference
|
92 |
+
sorted_diffs = sorted(diff_scores.items(), key=lambda x: x[1], reverse=True)
|
93 |
+
result["differential_ngrams"] = [ngram for ngram, _ in sorted_diffs[:top_n]]
|
94 |
+
|
95 |
+
# Calculate overlap statistics
|
96 |
+
model1_ngrams = set(ngram_counts.get(model1, {}).keys())
|
97 |
+
model2_ngrams = set(ngram_counts.get(model2, {}).keys())
|
98 |
+
common_ngrams = model1_ngrams.intersection(model2_ngrams)
|
99 |
+
|
100 |
+
# Initialize comparisons if needed
|
101 |
+
if "comparisons" not in result:
|
102 |
+
result["comparisons"] = {}
|
103 |
+
|
104 |
+
comparison_key = f"{model1} vs {model2}"
|
105 |
+
result["comparisons"][comparison_key] = {
|
106 |
+
"common_ngram_count": len(common_ngrams)
|
107 |
+
}
|
108 |
+
|
109 |
+
return result
|
ui/analysis_screen.py
CHANGED
@@ -10,6 +10,7 @@ from visualization.bow_visualizer import process_and_visualize_analysis
|
|
10 |
from processors.bow_analysis import compare_bow
|
11 |
# from processors.metrics import calculate_similarity
|
12 |
# from processors.diff_highlighter import highlight_differences
|
|
|
13 |
|
14 |
def create_analysis_screen():
|
15 |
"""
|
@@ -105,56 +106,50 @@ def create_analysis_screen():
|
|
105 |
|
106 |
# Return the bow_top_slider directly so app.py can access it
|
107 |
# Note: Removed the visualization_container from return values since we'll pre-create it
|
108 |
-
return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider
|
109 |
|
|
|
110 |
def process_analysis_request(dataset, selected_analyses, parameters):
|
111 |
"""
|
112 |
Process the analysis request and run selected analyses
|
113 |
-
|
114 |
-
Args:
|
115 |
-
dataset (dict): The dataset containing prompts and LLM responses
|
116 |
-
selected_analyses (list): List of selected analysis types
|
117 |
-
parameters (dict): Parameters for each analysis type
|
118 |
-
|
119 |
-
Returns:
|
120 |
-
tuple: (analysis_results, analysis_output_display)
|
121 |
"""
|
122 |
try:
|
123 |
print(f"Processing analysis request with: {selected_analyses}")
|
124 |
print(f"Parameters: {parameters}")
|
125 |
-
|
126 |
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
127 |
-
return {}, gr.update(visible=True,
|
128 |
-
|
|
|
129 |
analysis_results = {"analyses": {}}
|
130 |
-
|
131 |
# Extract prompt and responses
|
132 |
prompt = dataset["entries"][0]["prompt"]
|
133 |
response_texts = [entry["response"] for entry in dataset["entries"]]
|
134 |
model_names = [entry["model"] for entry in dataset["entries"]]
|
135 |
-
|
136 |
print(f"Analyzing prompt: '{prompt[:50]}...'")
|
137 |
print(f"Models: {model_names}")
|
138 |
-
|
139 |
analysis_results["analyses"][prompt] = {}
|
140 |
-
|
141 |
-
#
|
142 |
if "Bag of Words" in selected_analyses:
|
143 |
-
|
144 |
-
top_words = 25
|
145 |
-
|
146 |
-
# Try to get the parameter from the parameters dict
|
147 |
-
if parameters and isinstance(parameters, dict) and "bow_top" in parameters:
|
148 |
-
top_words = parameters["bow_top"]
|
149 |
-
|
150 |
print(f"Running BOW analysis with top_words={top_words}")
|
151 |
-
|
152 |
-
# Call the BOW comparison function
|
153 |
bow_results = compare_bow(response_texts, model_names, top_words)
|
154 |
analysis_results["analyses"][prompt]["bag_of_words"] = bow_results
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
print("Analysis complete - results:", analysis_results)
|
157 |
-
|
158 |
# Return results and update the output component
|
159 |
return analysis_results, gr.update(visible=False, value=analysis_results) # Hide the raw JSON
|
160 |
except Exception as e:
|
|
|
10 |
from processors.bow_analysis import compare_bow
|
11 |
# from processors.metrics import calculate_similarity
|
12 |
# from processors.diff_highlighter import highlight_differences
|
13 |
+
from processors.ngram_analysis import compare_ngrams
|
14 |
|
15 |
def create_analysis_screen():
|
16 |
"""
|
|
|
106 |
|
107 |
# Return the bow_top_slider directly so app.py can access it
|
108 |
# Note: Removed the visualization_container from return values since we'll pre-create it
|
109 |
+
return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top
|
110 |
|
111 |
+
# function
|
112 |
def process_analysis_request(dataset, selected_analyses, parameters):
|
113 |
"""
|
114 |
Process the analysis request and run selected analyses
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
"""
|
116 |
try:
|
117 |
print(f"Processing analysis request with: {selected_analyses}")
|
118 |
print(f"Parameters: {parameters}")
|
119 |
+
|
120 |
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
121 |
+
return {}, gr.update(visible=True,
|
122 |
+
value=json.dumps({"error": "No dataset provided or dataset is empty"}, indent=2))
|
123 |
+
|
124 |
analysis_results = {"analyses": {}}
|
125 |
+
|
126 |
# Extract prompt and responses
|
127 |
prompt = dataset["entries"][0]["prompt"]
|
128 |
response_texts = [entry["response"] for entry in dataset["entries"]]
|
129 |
model_names = [entry["model"] for entry in dataset["entries"]]
|
130 |
+
|
131 |
print(f"Analyzing prompt: '{prompt[:50]}...'")
|
132 |
print(f"Models: {model_names}")
|
133 |
+
|
134 |
analysis_results["analyses"][prompt] = {}
|
135 |
+
|
136 |
+
# Run Bag of Words analysis if selected
|
137 |
if "Bag of Words" in selected_analyses:
|
138 |
+
top_words = parameters.get("bow_top", 25)
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
print(f"Running BOW analysis with top_words={top_words}")
|
|
|
|
|
140 |
bow_results = compare_bow(response_texts, model_names, top_words)
|
141 |
analysis_results["analyses"][prompt]["bag_of_words"] = bow_results
|
142 |
+
|
143 |
+
# Run N-gram analysis if selected
|
144 |
+
if "N-gram Analysis" in selected_analyses:
|
145 |
+
ngram_n = int(parameters.get("ngram_n", "2"))
|
146 |
+
ngram_top = parameters.get("ngram_top", 10)
|
147 |
+
print(f"Running N-gram analysis with n={ngram_n}, top_n={ngram_top}")
|
148 |
+
ngram_results = compare_ngrams(response_texts, model_names, ngram_n, ngram_top)
|
149 |
+
analysis_results["analyses"][prompt]["ngram_analysis"] = ngram_results
|
150 |
+
|
151 |
print("Analysis complete - results:", analysis_results)
|
152 |
+
|
153 |
# Return results and update the output component
|
154 |
return analysis_results, gr.update(visible=False, value=analysis_results) # Hide the raw JSON
|
155 |
except Exception as e:
|
visualization/bow_visualizer.py
CHANGED
@@ -7,6 +7,8 @@ from plotly.subplots import make_subplots
|
|
7 |
import pandas as pd
|
8 |
from difflib import SequenceMatcher
|
9 |
|
|
|
|
|
10 |
def create_bow_visualization(analysis_results):
|
11 |
"""
|
12 |
Create visualizations for bag of words analysis results
|
@@ -122,78 +124,89 @@ def create_bow_visualization(analysis_results):
|
|
122 |
|
123 |
return output_components
|
124 |
|
|
|
|
|
125 |
def process_and_visualize_analysis(analysis_results):
|
126 |
"""
|
127 |
Process the analysis results and create visualization components
|
128 |
-
|
129 |
Args:
|
130 |
analysis_results (dict): The analysis results
|
131 |
-
|
132 |
Returns:
|
133 |
list: List of gradio components for visualization
|
134 |
"""
|
135 |
try:
|
136 |
print(f"Starting visualization of analysis results: {type(analysis_results)}")
|
137 |
components = []
|
138 |
-
|
139 |
if not analysis_results or "analyses" not in analysis_results:
|
140 |
print("Warning: Empty or invalid analysis results")
|
141 |
components.append(gr.Markdown("No analysis results to visualize."))
|
142 |
return components
|
143 |
-
|
144 |
# For each prompt in the analysis results
|
145 |
for prompt, analyses in analysis_results.get("analyses", {}).items():
|
146 |
print(f"Visualizing results for prompt: {prompt[:30]}...")
|
147 |
components.append(gr.Markdown(f"## Analysis for Prompt:\n\"{prompt}\""))
|
148 |
-
|
149 |
# Check for Bag of Words analysis
|
150 |
if "bag_of_words" in analyses:
|
151 |
print("Processing Bag of Words visualization")
|
152 |
components.append(gr.Markdown("### Bag of Words Analysis"))
|
153 |
bow_results = analyses["bag_of_words"]
|
154 |
-
|
155 |
# Display models compared
|
156 |
if "models" in bow_results:
|
157 |
models = bow_results["models"]
|
158 |
components.append(gr.Markdown(f"**Models compared**: {', '.join(models)}"))
|
159 |
-
|
160 |
# Display important words for each model
|
161 |
if "important_words" in bow_results:
|
162 |
components.append(gr.Markdown("#### Most Common Words by Model"))
|
163 |
-
|
164 |
for model, words in bow_results["important_words"].items():
|
165 |
print(f"Creating word list for model {model}")
|
166 |
word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
|
167 |
components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
|
168 |
-
|
169 |
# Add visualizations for word frequency differences
|
170 |
-
if "differential_words" in bow_results and "word_count_matrix" in bow_results and len(
|
|
|
171 |
diff_words = bow_results["differential_words"]
|
172 |
word_matrix = bow_results["word_count_matrix"]
|
173 |
models = bow_results["models"]
|
174 |
-
|
175 |
if diff_words and word_matrix and len(diff_words) > 0:
|
176 |
components.append(gr.Markdown("### Words with Biggest Frequency Differences"))
|
177 |
-
|
178 |
# Create dataframe for plotting
|
179 |
model1, model2 = models[0], models[1]
|
180 |
diff_data = []
|
181 |
-
|
182 |
for word in diff_words[:10]: # Limit to top 10 for readability
|
183 |
if word in word_matrix:
|
184 |
counts = word_matrix[word]
|
185 |
model1_count = counts.get(model1, 0)
|
186 |
model2_count = counts.get(model2, 0)
|
187 |
-
|
188 |
# Only include if there's a meaningful difference
|
189 |
if abs(model1_count - model2_count) > 0:
|
190 |
components.append(gr.Markdown(
|
191 |
f"- **{word}**: {model1}: {model1_count}, {model2}: {model2_count}"
|
192 |
))
|
193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
if not components:
|
195 |
components.append(gr.Markdown("No visualization components could be created from the analysis results."))
|
196 |
-
|
197 |
print(f"Visualization complete: generated {len(components)} components")
|
198 |
return components
|
199 |
except Exception as e:
|
@@ -201,3 +214,4 @@ def process_and_visualize_analysis(analysis_results):
|
|
201 |
error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
|
202 |
print(error_msg)
|
203 |
return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]
|
|
|
|
7 |
import pandas as pd
|
8 |
from difflib import SequenceMatcher
|
9 |
|
10 |
+
from visualization.ngram_visualizer import create_ngram_visualization
|
11 |
+
|
12 |
def create_bow_visualization(analysis_results):
|
13 |
"""
|
14 |
Create visualizations for bag of words analysis results
|
|
|
124 |
|
125 |
return output_components
|
126 |
|
127 |
+
|
128 |
+
# Then update the process_and_visualize_analysis function
|
129 |
def process_and_visualize_analysis(analysis_results):
|
130 |
"""
|
131 |
Process the analysis results and create visualization components
|
132 |
+
|
133 |
Args:
|
134 |
analysis_results (dict): The analysis results
|
135 |
+
|
136 |
Returns:
|
137 |
list: List of gradio components for visualization
|
138 |
"""
|
139 |
try:
|
140 |
print(f"Starting visualization of analysis results: {type(analysis_results)}")
|
141 |
components = []
|
142 |
+
|
143 |
if not analysis_results or "analyses" not in analysis_results:
|
144 |
print("Warning: Empty or invalid analysis results")
|
145 |
components.append(gr.Markdown("No analysis results to visualize."))
|
146 |
return components
|
147 |
+
|
148 |
# For each prompt in the analysis results
|
149 |
for prompt, analyses in analysis_results.get("analyses", {}).items():
|
150 |
print(f"Visualizing results for prompt: {prompt[:30]}...")
|
151 |
components.append(gr.Markdown(f"## Analysis for Prompt:\n\"{prompt}\""))
|
152 |
+
|
153 |
# Check for Bag of Words analysis
|
154 |
if "bag_of_words" in analyses:
|
155 |
print("Processing Bag of Words visualization")
|
156 |
components.append(gr.Markdown("### Bag of Words Analysis"))
|
157 |
bow_results = analyses["bag_of_words"]
|
158 |
+
|
159 |
# Display models compared
|
160 |
if "models" in bow_results:
|
161 |
models = bow_results["models"]
|
162 |
components.append(gr.Markdown(f"**Models compared**: {', '.join(models)}"))
|
163 |
+
|
164 |
# Display important words for each model
|
165 |
if "important_words" in bow_results:
|
166 |
components.append(gr.Markdown("#### Most Common Words by Model"))
|
167 |
+
|
168 |
for model, words in bow_results["important_words"].items():
|
169 |
print(f"Creating word list for model {model}")
|
170 |
word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
|
171 |
components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
|
172 |
+
|
173 |
# Add visualizations for word frequency differences
|
174 |
+
if "differential_words" in bow_results and "word_count_matrix" in bow_results and len(
|
175 |
+
bow_results["models"]) >= 2:
|
176 |
diff_words = bow_results["differential_words"]
|
177 |
word_matrix = bow_results["word_count_matrix"]
|
178 |
models = bow_results["models"]
|
179 |
+
|
180 |
if diff_words and word_matrix and len(diff_words) > 0:
|
181 |
components.append(gr.Markdown("### Words with Biggest Frequency Differences"))
|
182 |
+
|
183 |
# Create dataframe for plotting
|
184 |
model1, model2 = models[0], models[1]
|
185 |
diff_data = []
|
186 |
+
|
187 |
for word in diff_words[:10]: # Limit to top 10 for readability
|
188 |
if word in word_matrix:
|
189 |
counts = word_matrix[word]
|
190 |
model1_count = counts.get(model1, 0)
|
191 |
model2_count = counts.get(model2, 0)
|
192 |
+
|
193 |
# Only include if there's a meaningful difference
|
194 |
if abs(model1_count - model2_count) > 0:
|
195 |
components.append(gr.Markdown(
|
196 |
f"- **{word}**: {model1}: {model1_count}, {model2}: {model2_count}"
|
197 |
))
|
198 |
+
|
199 |
+
# Check for N-gram analysis
|
200 |
+
if "ngram_analysis" in analyses:
|
201 |
+
print("Processing N-gram visualization")
|
202 |
+
# Use the dedicated n-gram visualization function
|
203 |
+
ngram_components = create_ngram_visualization(
|
204 |
+
{"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}})
|
205 |
+
components.extend(ngram_components)
|
206 |
+
|
207 |
if not components:
|
208 |
components.append(gr.Markdown("No visualization components could be created from the analysis results."))
|
209 |
+
|
210 |
print(f"Visualization complete: generated {len(components)} components")
|
211 |
return components
|
212 |
except Exception as e:
|
|
|
214 |
error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
|
215 |
print(error_msg)
|
216 |
return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]
|
217 |
+
|
visualization/ngram_visualizer.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import json
|
3 |
+
import pandas as pd
|
4 |
+
import plotly.express as px
|
5 |
+
import plotly.graph_objects as go
|
6 |
+
from plotly.subplots import make_subplots
|
7 |
+
|
8 |
+
|
9 |
+
def create_ngram_visualization(analysis_results):
|
10 |
+
"""
|
11 |
+
Create visualizations for n-gram analysis results
|
12 |
+
|
13 |
+
Args:
|
14 |
+
analysis_results (dict): Analysis results from the n-gram analysis
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
list: List of gradio components with visualizations
|
18 |
+
"""
|
19 |
+
output_components = []
|
20 |
+
|
21 |
+
# Check if we have valid results
|
22 |
+
if not analysis_results or "analyses" not in analysis_results:
|
23 |
+
return [gr.Markdown("No analysis results found.")]
|
24 |
+
|
25 |
+
# Process each prompt
|
26 |
+
for prompt, analyses in analysis_results["analyses"].items():
|
27 |
+
# Process N-gram analysis if available
|
28 |
+
if "ngram_analysis" in analyses:
|
29 |
+
ngram_results = analyses["ngram_analysis"]
|
30 |
+
|
31 |
+
# Show models being compared
|
32 |
+
models = ngram_results.get("models", [])
|
33 |
+
ngram_size = ngram_results.get("ngram_size", 2)
|
34 |
+
size_name = "Unigrams" if ngram_size == 1 else f"{ngram_size}-grams"
|
35 |
+
|
36 |
+
if len(models) >= 2:
|
37 |
+
output_components.append(
|
38 |
+
gr.Markdown(f"### {size_name} Analysis: Comparing responses from {models[0]} and {models[1]}"))
|
39 |
+
|
40 |
+
# Get important n-grams for each model
|
41 |
+
important_ngrams = ngram_results.get("important_ngrams", {})
|
42 |
+
|
43 |
+
# Prepare data for plotting important n-grams
|
44 |
+
if important_ngrams:
|
45 |
+
for model_name, ngrams in important_ngrams.items():
|
46 |
+
df = pd.DataFrame(ngrams)
|
47 |
+
|
48 |
+
# Create bar chart for top n-grams
|
49 |
+
fig = px.bar(df, x='ngram', y='count',
|
50 |
+
title=f"Top {size_name} Used by {model_name}",
|
51 |
+
labels={'ngram': 'N-gram', 'count': 'Frequency'},
|
52 |
+
height=400)
|
53 |
+
|
54 |
+
# Improve layout
|
55 |
+
fig.update_layout(
|
56 |
+
xaxis_title="N-gram",
|
57 |
+
yaxis_title="Frequency",
|
58 |
+
xaxis={'categoryorder': 'total descending'}
|
59 |
+
)
|
60 |
+
|
61 |
+
output_components.append(gr.Plot(value=fig))
|
62 |
+
|
63 |
+
# Visualize differential n-grams (n-grams with biggest frequency difference)
|
64 |
+
diff_ngrams = ngram_results.get("differential_ngrams", [])
|
65 |
+
ngram_matrix = ngram_results.get("ngram_count_matrix", {})
|
66 |
+
|
67 |
+
if diff_ngrams and ngram_matrix and len(diff_ngrams) > 0:
|
68 |
+
output_components.append(gr.Markdown(f"### {size_name} with Biggest Frequency Differences"))
|
69 |
+
|
70 |
+
# Create dataframe for plotting
|
71 |
+
model1, model2 = models[0], models[1]
|
72 |
+
diff_data = []
|
73 |
+
|
74 |
+
for ngram in diff_ngrams[:15]: # Limit to top 15 for readability
|
75 |
+
if ngram in ngram_matrix:
|
76 |
+
counts = ngram_matrix[ngram]
|
77 |
+
diff_data.append({
|
78 |
+
"ngram": ngram,
|
79 |
+
model1: counts.get(model1, 0),
|
80 |
+
model2: counts.get(model2, 0)
|
81 |
+
})
|
82 |
+
|
83 |
+
if diff_data:
|
84 |
+
diff_df = pd.DataFrame(diff_data)
|
85 |
+
|
86 |
+
# Create grouped bar chart
|
87 |
+
fig = go.Figure()
|
88 |
+
fig.add_trace(go.Bar(
|
89 |
+
x=diff_df['ngram'],
|
90 |
+
y=diff_df[model1],
|
91 |
+
name=model1,
|
92 |
+
marker_color='indianred'
|
93 |
+
))
|
94 |
+
fig.add_trace(go.Bar(
|
95 |
+
x=diff_df['ngram'],
|
96 |
+
y=diff_df[model2],
|
97 |
+
name=model2,
|
98 |
+
marker_color='lightsalmon'
|
99 |
+
))
|
100 |
+
|
101 |
+
fig.update_layout(
|
102 |
+
title=f"{size_name} Frequency Comparison",
|
103 |
+
xaxis_title="N-gram",
|
104 |
+
yaxis_title="Frequency",
|
105 |
+
barmode='group',
|
106 |
+
height=500
|
107 |
+
)
|
108 |
+
|
109 |
+
output_components.append(gr.Plot(value=fig))
|
110 |
+
|
111 |
+
# Add similarity comparison if available
|
112 |
+
if "comparisons" in ngram_results:
|
113 |
+
output_components.append(gr.Markdown("### N-gram Similarity Metrics"))
|
114 |
+
comparison_key = f"{models[0]} vs {models[1]}"
|
115 |
+
|
116 |
+
if comparison_key in ngram_results["comparisons"]:
|
117 |
+
metrics = ngram_results["comparisons"][comparison_key]
|
118 |
+
common_count = metrics.get("common_ngram_count", 0)
|
119 |
+
|
120 |
+
metrics_text = f"""
|
121 |
+
- **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
|
122 |
+
"""
|
123 |
+
|
124 |
+
output_components.append(gr.Markdown(metrics_text))
|
125 |
+
|
126 |
+
# If no components were added other than header, show a message
|
127 |
+
if len(output_components) <= 1:
|
128 |
+
output_components.append(gr.Markdown(f"No detailed N-gram analysis found in results."))
|
129 |
+
|
130 |
+
return output_components
|
131 |
+
|
132 |
+
|
133 |
+
def process_and_visualize_ngram_analysis(analysis_results):
|
134 |
+
"""
|
135 |
+
Process the n-gram analysis results and create visualization components
|
136 |
+
|
137 |
+
Args:
|
138 |
+
analysis_results (dict): The analysis results
|
139 |
+
|
140 |
+
Returns:
|
141 |
+
list: List of gradio components for visualization
|
142 |
+
"""
|
143 |
+
try:
|
144 |
+
print(f"Starting visualization of n-gram analysis results")
|
145 |
+
return create_ngram_visualization(analysis_results)
|
146 |
+
except Exception as e:
|
147 |
+
import traceback
|
148 |
+
error_msg = f"N-gram visualization error: {str(e)}\n{traceback.format_exc()}"
|
149 |
+
print(error_msg)
|
150 |
+
return [gr.Markdown(f"**Error during n-gram visualization:**\n\n```\n{error_msg}\n```")]
|