Ryan commited on
Commit
6aa7fe7
·
1 Parent(s): 087a38a
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.idea/workspace.xml CHANGED
@@ -53,7 +53,7 @@
53
  <option name="presentableId" value="Default" />
54
  <updated>1745170754325</updated>
55
  <workItem from="1745170755404" duration="245000" />
56
- <workItem from="1745172030020" duration="2043000" />
57
  </task>
58
  <servers />
59
  </component>
 
53
  <option name="presentableId" value="Default" />
54
  <updated>1745170754325</updated>
55
  <workItem from="1745170755404" duration="245000" />
56
+ <workItem from="1745172030020" duration="2752000" />
57
  </task>
58
  <servers />
59
  </component>
app.py CHANGED
@@ -6,7 +6,7 @@ import nltk
6
  import os
7
  import json
8
 
9
- # Download necessary NLTK data packages
10
  def download_nltk_resources():
11
  """Download required NLTK resources if not already downloaded"""
12
  try:
@@ -97,38 +97,178 @@ def create_app():
97
  # Analysis Tab
98
  with gr.Tab("Analysis"):
99
  # Use create_analysis_screen to get UI components including visualization container
100
- analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, visualization_container = create_analysis_screen()
101
 
102
- # Define a helper function to extract parameter values and call process_analysis_request
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def run_analysis(dataset, selected_analyses, bow_top):
104
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  parameters = {
106
  "bow_top": bow_top,
107
  }
108
  print("Running analysis with parameters:", parameters)
109
 
110
  # Process the analysis request
111
- analysis_results, output_update = process_analysis_request(dataset, selected_analyses, parameters)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- # Generate visualization components
114
- print("Generating visualization components...")
115
- # Instead of directly returning the components, we'll update the container
116
- visualization_components = process_and_visualize_analysis(analysis_results)
 
 
 
 
 
117
 
118
- # For the third return value, return a simple value that can trigger the update
119
- # The actual components will be placed inside the container
120
- return analysis_results, True, gr.update(value=visualization_components)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  except Exception as e:
122
  import traceback
123
- error_msg = f"Error in run_analysis: {str(e)}\n{traceback.format_exc()}"
124
  print(error_msg)
125
- return {"error": error_msg}, True, gr.update(value=[gr.Markdown(f"**Error:**\n\n```\n{error_msg}\n```")])
 
 
 
 
 
 
 
 
126
 
127
  # Run analysis with proper parameters
128
  run_analysis_btn.click(
129
  fn=run_analysis,
130
  inputs=[dataset_state, analysis_options, bow_top_slider],
131
- outputs=[analysis_results_state, analysis_output, visualization_container]
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  )
133
 
134
  return app
@@ -138,4 +278,4 @@ if __name__ == "__main__":
138
  download_nltk_resources()
139
 
140
  app = create_app()
141
- app.launch()
 
6
  import os
7
  import json
8
 
9
+ # Download necessary NLTK resources function remains unchanged
10
  def download_nltk_resources():
11
  """Download required NLTK resources if not already downloaded"""
12
  try:
 
97
  # Analysis Tab
98
  with gr.Tab("Analysis"):
99
  # Use create_analysis_screen to get UI components including visualization container
100
+ analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider = create_analysis_screen()
101
 
102
+ # Pre-create visualization components (initially hidden)
103
+ with gr.Column(visible=False) as visualization_area:
104
+ analysis_title = gr.Markdown("## Analysis Results")
105
+ prompt_title = gr.Markdown()
106
+ models_compared = gr.Markdown()
107
+
108
+ # Container for model 1 words
109
+ with gr.Column() as model1_words_container:
110
+ model1_title = gr.Markdown()
111
+ model1_words = gr.Markdown()
112
+
113
+ # Container for model 2 words
114
+ with gr.Column() as model2_words_container:
115
+ model2_title = gr.Markdown()
116
+ model2_words = gr.Markdown()
117
+
118
+ # Similarity metrics
119
+ similarity_metrics_title = gr.Markdown("### Similarity Metrics")
120
+ similarity_metrics = gr.Markdown()
121
+
122
+ # Status or error message area
123
+ status_message = gr.Markdown(visible=False)
124
+
125
+ # Define a helper function to extract parameter values and run the analysis
126
  def run_analysis(dataset, selected_analyses, bow_top):
127
  try:
128
+ if not dataset or "entries" not in dataset or not dataset["entries"]:
129
+ return (
130
+ {}, # analysis_results_state
131
+ False, # analysis_output visibility
132
+ False, # visualization_area visibility
133
+ "", # prompt_title
134
+ "", # models_compared
135
+ "", # model1_title
136
+ "", # model1_words
137
+ "", # model2_title
138
+ "", # model2_words
139
+ "", # similarity_metrics
140
+ True, # status_message visibility
141
+ "❌ **Error:** No dataset loaded. Please create or load a dataset first." # status_message
142
+ )
143
+
144
  parameters = {
145
  "bow_top": bow_top,
146
  }
147
  print("Running analysis with parameters:", parameters)
148
 
149
  # Process the analysis request
150
+ analysis_results, _ = process_analysis_request(dataset, selected_analyses, parameters)
151
+
152
+ # If there's an error or no results
153
+ if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
154
+ return (
155
+ analysis_results,
156
+ False,
157
+ False,
158
+ "", "", "", "", "", "", "",
159
+ True,
160
+ "❌ **No results found.** Try different analysis options."
161
+ )
162
+
163
+ # Extract information to display in components
164
+ prompt = list(analysis_results["analyses"].keys())[0]
165
+ analyses = analysis_results["analyses"][prompt]
166
+
167
+ if "bag_of_words" not in analyses:
168
+ return (
169
+ analysis_results,
170
+ False,
171
+ False,
172
+ "", "", "", "", "", "", "",
173
+ True,
174
+ "❌ **No Bag of Words analysis found.** Make sure to select it in the options."
175
+ )
176
+
177
+ bow_results = analyses["bag_of_words"]
178
+ models = bow_results.get("models", [])
179
 
180
+ if len(models) < 2:
181
+ return (
182
+ analysis_results,
183
+ False,
184
+ False,
185
+ "", "", "", "", "", "", "",
186
+ True,
187
+ "❌ **Not enough models to compare.** Please ensure you have two model responses."
188
+ )
189
 
190
+ # Extract and format information for display
191
+ model1_name = models[0]
192
+ model2_name = models[1]
193
+
194
+ # Format important words for each model
195
+ important_words = bow_results.get("important_words", {})
196
+ model1_words_text = "No important words found"
197
+ model2_words_text = "No important words found"
198
+
199
+ if model1_name in important_words:
200
+ word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model1_name][:10]]
201
+ model1_words_text = ", ".join(word_list)
202
+
203
+ if model2_name in important_words:
204
+ word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model2_name][:10]]
205
+ model2_words_text = ", ".join(word_list)
206
+
207
+ # Format similarity metrics
208
+ similarity_text = "No similarity metrics found"
209
+ comparisons = bow_results.get("comparisons", {})
210
+ comparison_key = f"{model1_name} vs {model2_name}"
211
+
212
+ if comparison_key in comparisons:
213
+ metrics = comparisons[comparison_key]
214
+ cosine = metrics.get("cosine_similarity", 0)
215
+ jaccard = metrics.get("jaccard_similarity", 0)
216
+ common_words = metrics.get("common_word_count", 0)
217
+
218
+ similarity_text = f"""
219
+ - **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns)
220
+ - **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap)
221
+ - **Common Words**: {common_words} words appear in both responses
222
+ """
223
+
224
+ # Return all updated component values
225
+ return (
226
+ analysis_results, # analysis_results_state
227
+ False, # analysis_output visibility
228
+ True, # visualization_area visibility
229
+ f"## Analysis of Prompt: \"{prompt[:100]}...\"", # prompt_title
230
+ f"### Comparing responses from {model1_name} and {model2_name}", # models_compared
231
+ f"#### Top Words Used by {model1_name}", # model1_title
232
+ model1_words_text, # model1_words
233
+ f"#### Top Words Used by {model2_name}", # model2_title
234
+ model2_words_text, # model2_words
235
+ similarity_text, # similarity_metrics
236
+ False, # status_message visibility
237
+ "" # status_message
238
+ )
239
+
240
  except Exception as e:
241
  import traceback
242
+ error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
243
  print(error_msg)
244
+
245
+ return (
246
+ {"error": error_msg}, # analysis_results_state
247
+ True, # analysis_output visibility (show raw JSON for debugging)
248
+ False, # visualization_area visibility
249
+ "", "", "", "", "", "", "",
250
+ True, # status_message visibility
251
+ f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```" # status_message
252
+ )
253
 
254
  # Run analysis with proper parameters
255
  run_analysis_btn.click(
256
  fn=run_analysis,
257
  inputs=[dataset_state, analysis_options, bow_top_slider],
258
+ outputs=[
259
+ analysis_results_state,
260
+ analysis_output,
261
+ visualization_area,
262
+ prompt_title,
263
+ models_compared,
264
+ model1_title,
265
+ model1_words,
266
+ model2_title,
267
+ model2_words,
268
+ similarity_metrics,
269
+ status_message,
270
+ status_message
271
+ ]
272
  )
273
 
274
  return app
 
278
  download_nltk_resources()
279
 
280
  app = create_app()
281
+ app.launch()
ui/analysis_screen.py CHANGED
@@ -102,12 +102,10 @@ def create_analysis_screen():
102
 
103
  # Analysis output area - hidden JSON component to store raw results
104
  analysis_output = gr.JSON(label="Analysis Results", visible=False)
105
-
106
- # Visualization components container
107
- visualization_container = gr.Column(visible=False)
108
 
109
  # Return the bow_top_slider directly so app.py can access it
110
- return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, visualization_container
 
111
 
112
  def process_analysis_request(dataset, selected_analyses, parameters):
113
  """
 
102
 
103
  # Analysis output area - hidden JSON component to store raw results
104
  analysis_output = gr.JSON(label="Analysis Results", visible=False)
 
 
 
105
 
106
  # Return the bow_top_slider directly so app.py can access it
107
+ # Note: Removed the visualization_container from return values since we'll pre-create it
108
+ return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider
109
 
110
  def process_analysis_request(dataset, selected_analyses, parameters):
111
  """
visualization/bow_visualizer.py CHANGED
@@ -146,9 +146,6 @@ def create_bow_visualization(analysis_results):
146
 
147
  return output_components
148
 
149
- import gradio as gr
150
- import traceback
151
-
152
  def process_and_visualize_analysis(analysis_results):
153
  """
154
  Process the analysis results and create visualization components
@@ -205,6 +202,31 @@ def process_and_visualize_analysis(analysis_results):
205
  f"- Cosine similarity: {cosine:.2f}\n"
206
  f"- Jaccard similarity: {jaccard:.2f}"
207
  ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  if not components:
210
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))
@@ -212,6 +234,7 @@ def process_and_visualize_analysis(analysis_results):
212
  print(f"Visualization complete: generated {len(components)} components")
213
  return components
214
  except Exception as e:
 
215
  error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
216
  print(error_msg)
217
- return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]
 
146
 
147
  return output_components
148
 
 
 
 
149
  def process_and_visualize_analysis(analysis_results):
150
  """
151
  Process the analysis results and create visualization components
 
202
  f"- Cosine similarity: {cosine:.2f}\n"
203
  f"- Jaccard similarity: {jaccard:.2f}"
204
  ))
205
+
206
+ # Add visualizations for word frequency differences
207
+ if "differential_words" in bow_results and "word_count_matrix" in bow_results and len(bow_results["models"]) >= 2:
208
+ diff_words = bow_results["differential_words"]
209
+ word_matrix = bow_results["word_count_matrix"]
210
+ models = bow_results["models"]
211
+
212
+ if diff_words and word_matrix and len(diff_words) > 0:
213
+ components.append(gr.Markdown("### Words with Biggest Frequency Differences"))
214
+
215
+ # Create dataframe for plotting
216
+ model1, model2 = models[0], models[1]
217
+ diff_data = []
218
+
219
+ for word in diff_words[:10]: # Limit to top 10 for readability
220
+ if word in word_matrix:
221
+ counts = word_matrix[word]
222
+ model1_count = counts.get(model1, 0)
223
+ model2_count = counts.get(model2, 0)
224
+
225
+ # Only include if there's a meaningful difference
226
+ if abs(model1_count - model2_count) > 0:
227
+ components.append(gr.Markdown(
228
+ f"- **{word}**: {model1}: {model1_count}, {model2}: {model2_count}"
229
+ ))
230
 
231
  if not components:
232
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))
 
234
  print(f"Visualization complete: generated {len(components)} components")
235
  return components
236
  except Exception as e:
237
+ import traceback
238
  error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
239
  print(error_msg)
240
+ return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]