Ryan commited on
Commit
d164098
·
1 Parent(s): 3ae6198
Files changed (1) hide show
  1. visualization/bow_visualizer.py +81 -144
visualization/bow_visualizer.py CHANGED
@@ -171,159 +171,96 @@ def process_and_visualize_analysis(analysis_results):
171
  word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
172
  components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
173
 
174
- # Create a button to show visualizations
175
- def show_bow_visualizations():
176
- visualization_components = []
177
-
178
- # Generate model-specific word frequency charts
179
- if "important_words" in bow_results:
180
- for model_name, words in bow_results.get("important_words", {}).items():
181
- df = pd.DataFrame(words)
182
-
183
- # Create bar chart for top words
184
- fig = px.bar(df.head(15), x='word', y='count',
185
- title=f"Top Words Used by {model_name}",
186
- labels={'word': 'Word', 'count': 'Frequency'},
187
- height=400)
188
-
189
- # Improve layout
190
- fig.update_layout(
191
- xaxis_title="Word",
192
- yaxis_title="Frequency",
193
- xaxis={'categoryorder':'total descending'}
194
- )
195
-
196
- visualization_components.append(gr.Plot(value=fig))
197
-
198
- # Visualize differential words
199
- models = bow_results.get("models", [])
200
- diff_words = bow_results.get("differential_words", [])
201
- word_matrix = bow_results.get("word_count_matrix", {})
202
 
203
- if len(models) >= 2 and diff_words and word_matrix:
204
- model1, model2 = models[0], models[1]
205
- diff_data = []
206
 
207
- for word in diff_words[:15]:
208
- if word in word_matrix:
209
- counts = word_matrix[word]
210
- diff_data.append({
211
- "word": word,
212
- model1: counts.get(model1, 0),
213
- model2: counts.get(model2, 0)
214
- })
215
 
216
- if diff_data:
217
- diff_df = pd.DataFrame(diff_data)
218
-
219
- # Create grouped bar chart
220
- fig = go.Figure()
221
- fig.add_trace(go.Bar(
222
- x=diff_df['word'],
223
- y=diff_df[model1],
224
- name=model1,
225
- marker_color='indianred'
226
- ))
227
- fig.add_trace(go.Bar(
228
- x=diff_df['word'],
229
- y=diff_df[model2],
230
- name=model2,
231
- marker_color='lightsalmon'
232
- ))
233
-
234
- fig.update_layout(
235
- title="Word Frequency Comparison",
236
- xaxis_title="Word",
237
- yaxis_title="Frequency",
238
- barmode='group',
239
- height=500
240
- )
241
-
242
- visualization_components.append(gr.Plot(value=fig))
243
-
244
- return visualization_components
245
-
246
- # Add a button to show visualizations
247
- show_button = gr.Button("📊 Show Word Frequency Visualizations")
248
- visualization_container = gr.Column(visible=False)
249
-
250
- def show_visualizations():
251
- return gr.Column(visible=True)
252
 
253
- show_button.click(
254
- fn=show_visualizations,
255
- outputs=visualization_container
256
- )
257
 
258
- with visualization_container:
259
- # Generate model-specific word frequency charts
260
- if "important_words" in bow_results:
261
- for model_name, words in bow_results.get("important_words", {}).items():
262
- df = pd.DataFrame(words)
263
-
264
- # Create bar chart for top words
265
- fig = px.bar(df.head(15), x='word', y='count',
266
- title=f"Top Words Used by {model_name}",
267
- labels={'word': 'Word', 'count': 'Frequency'},
268
- height=400)
269
-
270
- # Improve layout
271
- fig.update_layout(
272
- xaxis_title="Word",
273
- yaxis_title="Frequency",
274
- xaxis={'categoryorder':'total descending'}
275
- )
276
-
277
- gr.Plot(value=fig)
278
 
279
- # Visualize differential words
280
- models = bow_results.get("models", [])
281
- diff_words = bow_results.get("differential_words", [])
282
- word_matrix = bow_results.get("word_count_matrix", {})
 
 
 
 
283
 
284
- if len(models) >= 2 and diff_words and word_matrix:
285
- model1, model2 = models[0], models[1]
286
- diff_data = []
287
 
288
- for word in diff_words[:15]:
289
- if word in word_matrix:
290
- counts = word_matrix[word]
291
- diff_data.append({
292
- "word": word,
293
- model1: counts.get(model1, 0),
294
- model2: counts.get(model2, 0)
295
- })
 
 
 
 
 
 
296
 
297
- if diff_data:
298
- diff_df = pd.DataFrame(diff_data)
299
-
300
- # Create grouped bar chart
301
- fig = go.Figure()
302
- fig.add_trace(go.Bar(
303
- x=diff_df['word'],
304
- y=diff_df[model1],
305
- name=model1,
306
- marker_color='indianred'
307
- ))
308
- fig.add_trace(go.Bar(
309
- x=diff_df['word'],
310
- y=diff_df[model2],
311
- name=model2,
312
- marker_color='lightsalmon'
313
- ))
314
-
315
- fig.update_layout(
316
- title="Word Frequency Comparison",
317
- xaxis_title="Word",
318
- yaxis_title="Frequency",
319
- barmode='group',
320
- height=500
321
- )
322
-
323
- gr.Plot(value=fig)
324
-
325
- components.append(show_button)
326
- components.append(visualization_container)
 
327
 
328
  if not components:
329
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))
 
171
  word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
172
  components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
173
 
174
+ # Generate and display visualizations directly
175
+ if "important_words" in bow_results:
176
+ components.append(gr.Markdown("### Word Frequency Visualizations"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ for model_name, words in bow_results.get("important_words", {}).items():
179
+ print(f"Creating visualization for {model_name}")
180
+ df = pd.DataFrame(words)
181
 
182
+ # Create bar chart for top words
183
+ fig = px.bar(df.head(15), x='word', y='count',
184
+ title=f"Top Words Used by {model_name}",
185
+ labels={'word': 'Word', 'count': 'Frequency'},
186
+ height=400)
 
 
 
187
 
188
+ # Improve layout
189
+ fig.update_layout(
190
+ xaxis_title="Word",
191
+ yaxis_title="Frequency",
192
+ xaxis={'categoryorder':'total descending'}
193
+ )
194
+
195
+ components.append(gr.Plot(value=fig))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
+ # Visualize differential words
198
+ models = bow_results.get("models", [])
199
+ diff_words = bow_results.get("differential_words", [])
200
+ word_matrix = bow_results.get("word_count_matrix", {})
201
 
202
+ if len(models) >= 2 and diff_words and word_matrix:
203
+ model1, model2 = models[0], models[1]
204
+ diff_data = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
+ for word in diff_words[:15]:
207
+ if word in word_matrix:
208
+ counts = word_matrix[word]
209
+ diff_data.append({
210
+ "word": word,
211
+ model1: counts.get(model1, 0),
212
+ model2: counts.get(model2, 0)
213
+ })
214
 
215
+ if diff_data:
216
+ diff_df = pd.DataFrame(diff_data)
 
217
 
218
+ # Create grouped bar chart
219
+ fig = go.Figure()
220
+ fig.add_trace(go.Bar(
221
+ x=diff_df['word'],
222
+ y=diff_df[model1],
223
+ name=model1,
224
+ marker_color='indianred'
225
+ ))
226
+ fig.add_trace(go.Bar(
227
+ x=diff_df['word'],
228
+ y=diff_df[model2],
229
+ name=model2,
230
+ marker_color='lightsalmon'
231
+ ))
232
 
233
+ fig.update_layout(
234
+ title="Word Frequency Comparison Between Models",
235
+ xaxis_title="Word",
236
+ yaxis_title="Frequency",
237
+ barmode='group',
238
+ height=500
239
+ )
240
+
241
+ components.append(gr.Plot(value=fig))
242
+
243
+ # Check for N-gram analysis
244
+ if "ngram_analysis" in analyses:
245
+ print("Processing N-gram visualization")
246
+ components.append(gr.Markdown("### N-gram Analysis"))
247
+ ngram_components = create_ngram_visualization(
248
+ {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}}
249
+ )
250
+ # Skip the first component if it's a duplicate header
251
+ if len(ngram_components) > 1:
252
+ components.extend(ngram_components[1:])
253
+
254
+ # Check for Topic Modeling analysis
255
+ if "topic_modeling" in analyses:
256
+ print("Processing Topic Modeling visualization")
257
+ components.append(gr.Markdown("### Topic Modeling Analysis"))
258
+ topic_components = process_and_visualize_topic_analysis(
259
+ {"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}}
260
+ )
261
+ # Skip the first component if it's a duplicate header
262
+ if len(topic_components) > 1:
263
+ components.extend(topic_components[1:])
264
 
265
  if not components:
266
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))