Spaces:
Sleeping
Sleeping
Ryan
commited on
Commit
·
3ae6198
1
Parent(s):
0ea5f5d
update
Browse files- visualization/bow_visualizer.py +91 -40
visualization/bow_visualizer.py
CHANGED
@@ -171,8 +171,10 @@ def process_and_visualize_analysis(analysis_results):
|
|
171 |
word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
|
172 |
components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
|
173 |
|
174 |
-
# Create
|
175 |
-
|
|
|
|
|
176 |
# Generate model-specific word frequency charts
|
177 |
if "important_words" in bow_results:
|
178 |
for model_name, words in bow_results.get("important_words", {}).items():
|
@@ -180,9 +182,9 @@ def process_and_visualize_analysis(analysis_results):
|
|
180 |
|
181 |
# Create bar chart for top words
|
182 |
fig = px.bar(df.head(15), x='word', y='count',
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
|
187 |
# Improve layout
|
188 |
fig.update_layout(
|
@@ -191,7 +193,7 @@ def process_and_visualize_analysis(analysis_results):
|
|
191 |
xaxis={'categoryorder':'total descending'}
|
192 |
)
|
193 |
|
194 |
-
|
195 |
|
196 |
# Visualize differential words
|
197 |
models = bow_results.get("models", [])
|
@@ -237,41 +239,91 @@ def process_and_visualize_analysis(analysis_results):
|
|
237 |
height=500
|
238 |
)
|
239 |
|
240 |
-
|
|
|
|
|
241 |
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
with gr.Accordion("📊 View N-gram Analysis Visualizations", open=False) as ngram_accordion:
|
249 |
-
ngram_components = create_ngram_visualization(
|
250 |
-
{"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}})
|
251 |
-
components.extend(ngram_components)
|
252 |
-
components.append(ngram_accordion)
|
253 |
-
|
254 |
-
# Check for Topic Modeling analysis
|
255 |
-
if "topic_modeling" in analyses:
|
256 |
-
print("Processing Topic Modeling visualization")
|
257 |
-
# Use the dedicated topic visualization function
|
258 |
-
with gr.Accordion("📊 View Topic Modeling Visualizations", open=False) as topic_accordion:
|
259 |
-
topic_components = process_and_visualize_topic_analysis(
|
260 |
-
{"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}})
|
261 |
-
components.extend(topic_components)
|
262 |
-
components.append(topic_accordion)
|
263 |
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
from visualization.bias_visualizer import process_and_visualize_bias_analysis
|
269 |
|
270 |
-
with
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
if not components:
|
277 |
components.append(gr.Markdown("No visualization components could be created from the analysis results."))
|
@@ -282,5 +334,4 @@ def process_and_visualize_analysis(analysis_results):
|
|
282 |
import traceback
|
283 |
error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
|
284 |
print(error_msg)
|
285 |
-
return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]
|
286 |
-
|
|
|
171 |
word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
|
172 |
components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
|
173 |
|
174 |
+
# Create a button to show visualizations
|
175 |
+
def show_bow_visualizations():
|
176 |
+
visualization_components = []
|
177 |
+
|
178 |
# Generate model-specific word frequency charts
|
179 |
if "important_words" in bow_results:
|
180 |
for model_name, words in bow_results.get("important_words", {}).items():
|
|
|
182 |
|
183 |
# Create bar chart for top words
|
184 |
fig = px.bar(df.head(15), x='word', y='count',
|
185 |
+
title=f"Top Words Used by {model_name}",
|
186 |
+
labels={'word': 'Word', 'count': 'Frequency'},
|
187 |
+
height=400)
|
188 |
|
189 |
# Improve layout
|
190 |
fig.update_layout(
|
|
|
193 |
xaxis={'categoryorder':'total descending'}
|
194 |
)
|
195 |
|
196 |
+
visualization_components.append(gr.Plot(value=fig))
|
197 |
|
198 |
# Visualize differential words
|
199 |
models = bow_results.get("models", [])
|
|
|
239 |
height=500
|
240 |
)
|
241 |
|
242 |
+
visualization_components.append(gr.Plot(value=fig))
|
243 |
+
|
244 |
+
return visualization_components
|
245 |
|
246 |
+
# Add a button to show visualizations
|
247 |
+
show_button = gr.Button("📊 Show Word Frequency Visualizations")
|
248 |
+
visualization_container = gr.Column(visible=False)
|
249 |
+
|
250 |
+
def show_visualizations():
|
251 |
+
return gr.Column(visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
+
show_button.click(
|
254 |
+
fn=show_visualizations,
|
255 |
+
outputs=visualization_container
|
256 |
+
)
|
|
|
257 |
|
258 |
+
with visualization_container:
|
259 |
+
# Generate model-specific word frequency charts
|
260 |
+
if "important_words" in bow_results:
|
261 |
+
for model_name, words in bow_results.get("important_words", {}).items():
|
262 |
+
df = pd.DataFrame(words)
|
263 |
+
|
264 |
+
# Create bar chart for top words
|
265 |
+
fig = px.bar(df.head(15), x='word', y='count',
|
266 |
+
title=f"Top Words Used by {model_name}",
|
267 |
+
labels={'word': 'Word', 'count': 'Frequency'},
|
268 |
+
height=400)
|
269 |
+
|
270 |
+
# Improve layout
|
271 |
+
fig.update_layout(
|
272 |
+
xaxis_title="Word",
|
273 |
+
yaxis_title="Frequency",
|
274 |
+
xaxis={'categoryorder':'total descending'}
|
275 |
+
)
|
276 |
+
|
277 |
+
gr.Plot(value=fig)
|
278 |
+
|
279 |
+
# Visualize differential words
|
280 |
+
models = bow_results.get("models", [])
|
281 |
+
diff_words = bow_results.get("differential_words", [])
|
282 |
+
word_matrix = bow_results.get("word_count_matrix", {})
|
283 |
+
|
284 |
+
if len(models) >= 2 and diff_words and word_matrix:
|
285 |
+
model1, model2 = models[0], models[1]
|
286 |
+
diff_data = []
|
287 |
+
|
288 |
+
for word in diff_words[:15]:
|
289 |
+
if word in word_matrix:
|
290 |
+
counts = word_matrix[word]
|
291 |
+
diff_data.append({
|
292 |
+
"word": word,
|
293 |
+
model1: counts.get(model1, 0),
|
294 |
+
model2: counts.get(model2, 0)
|
295 |
+
})
|
296 |
+
|
297 |
+
if diff_data:
|
298 |
+
diff_df = pd.DataFrame(diff_data)
|
299 |
+
|
300 |
+
# Create grouped bar chart
|
301 |
+
fig = go.Figure()
|
302 |
+
fig.add_trace(go.Bar(
|
303 |
+
x=diff_df['word'],
|
304 |
+
y=diff_df[model1],
|
305 |
+
name=model1,
|
306 |
+
marker_color='indianred'
|
307 |
+
))
|
308 |
+
fig.add_trace(go.Bar(
|
309 |
+
x=diff_df['word'],
|
310 |
+
y=diff_df[model2],
|
311 |
+
name=model2,
|
312 |
+
marker_color='lightsalmon'
|
313 |
+
))
|
314 |
+
|
315 |
+
fig.update_layout(
|
316 |
+
title="Word Frequency Comparison",
|
317 |
+
xaxis_title="Word",
|
318 |
+
yaxis_title="Frequency",
|
319 |
+
barmode='group',
|
320 |
+
height=500
|
321 |
+
)
|
322 |
+
|
323 |
+
gr.Plot(value=fig)
|
324 |
+
|
325 |
+
components.append(show_button)
|
326 |
+
components.append(visualization_container)
|
327 |
|
328 |
if not components:
|
329 |
components.append(gr.Markdown("No visualization components could be created from the analysis results."))
|
|
|
334 |
import traceback
|
335 |
error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
|
336 |
print(error_msg)
|
337 |
+
return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]
|
|