Spaces:
Sleeping
Sleeping
Ryan
commited on
Commit
·
d164098
1
Parent(s):
3ae6198
update
Browse files- visualization/bow_visualizer.py +81 -144
visualization/bow_visualizer.py
CHANGED
@@ -171,159 +171,96 @@ def process_and_visualize_analysis(analysis_results):
|
|
171 |
word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
|
172 |
components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
|
173 |
|
174 |
-
#
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
# Generate model-specific word frequency charts
|
179 |
-
if "important_words" in bow_results:
|
180 |
-
for model_name, words in bow_results.get("important_words", {}).items():
|
181 |
-
df = pd.DataFrame(words)
|
182 |
-
|
183 |
-
# Create bar chart for top words
|
184 |
-
fig = px.bar(df.head(15), x='word', y='count',
|
185 |
-
title=f"Top Words Used by {model_name}",
|
186 |
-
labels={'word': 'Word', 'count': 'Frequency'},
|
187 |
-
height=400)
|
188 |
-
|
189 |
-
# Improve layout
|
190 |
-
fig.update_layout(
|
191 |
-
xaxis_title="Word",
|
192 |
-
yaxis_title="Frequency",
|
193 |
-
xaxis={'categoryorder':'total descending'}
|
194 |
-
)
|
195 |
-
|
196 |
-
visualization_components.append(gr.Plot(value=fig))
|
197 |
-
|
198 |
-
# Visualize differential words
|
199 |
-
models = bow_results.get("models", [])
|
200 |
-
diff_words = bow_results.get("differential_words", [])
|
201 |
-
word_matrix = bow_results.get("word_count_matrix", {})
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
|
207 |
-
for
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
model1: counts.get(model1, 0),
|
213 |
-
model2: counts.get(model2, 0)
|
214 |
-
})
|
215 |
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
name=model1,
|
225 |
-
marker_color='indianred'
|
226 |
-
))
|
227 |
-
fig.add_trace(go.Bar(
|
228 |
-
x=diff_df['word'],
|
229 |
-
y=diff_df[model2],
|
230 |
-
name=model2,
|
231 |
-
marker_color='lightsalmon'
|
232 |
-
))
|
233 |
-
|
234 |
-
fig.update_layout(
|
235 |
-
title="Word Frequency Comparison",
|
236 |
-
xaxis_title="Word",
|
237 |
-
yaxis_title="Frequency",
|
238 |
-
barmode='group',
|
239 |
-
height=500
|
240 |
-
)
|
241 |
-
|
242 |
-
visualization_components.append(gr.Plot(value=fig))
|
243 |
-
|
244 |
-
return visualization_components
|
245 |
-
|
246 |
-
# Add a button to show visualizations
|
247 |
-
show_button = gr.Button("📊 Show Word Frequency Visualizations")
|
248 |
-
visualization_container = gr.Column(visible=False)
|
249 |
-
|
250 |
-
def show_visualizations():
|
251 |
-
return gr.Column(visible=True)
|
252 |
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
)
|
257 |
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
for model_name, words in bow_results.get("important_words", {}).items():
|
262 |
-
df = pd.DataFrame(words)
|
263 |
-
|
264 |
-
# Create bar chart for top words
|
265 |
-
fig = px.bar(df.head(15), x='word', y='count',
|
266 |
-
title=f"Top Words Used by {model_name}",
|
267 |
-
labels={'word': 'Word', 'count': 'Frequency'},
|
268 |
-
height=400)
|
269 |
-
|
270 |
-
# Improve layout
|
271 |
-
fig.update_layout(
|
272 |
-
xaxis_title="Word",
|
273 |
-
yaxis_title="Frequency",
|
274 |
-
xaxis={'categoryorder':'total descending'}
|
275 |
-
)
|
276 |
-
|
277 |
-
gr.Plot(value=fig)
|
278 |
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
|
|
|
|
283 |
|
284 |
-
if
|
285 |
-
|
286 |
-
diff_data = []
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
|
|
327 |
|
328 |
if not components:
|
329 |
components.append(gr.Markdown("No visualization components could be created from the analysis results."))
|
|
|
171 |
word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
|
172 |
components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
|
173 |
|
174 |
+
# Generate and display visualizations directly
|
175 |
+
if "important_words" in bow_results:
|
176 |
+
components.append(gr.Markdown("### Word Frequency Visualizations"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
+
for model_name, words in bow_results.get("important_words", {}).items():
|
179 |
+
print(f"Creating visualization for {model_name}")
|
180 |
+
df = pd.DataFrame(words)
|
181 |
|
182 |
+
# Create bar chart for top words
|
183 |
+
fig = px.bar(df.head(15), x='word', y='count',
|
184 |
+
title=f"Top Words Used by {model_name}",
|
185 |
+
labels={'word': 'Word', 'count': 'Frequency'},
|
186 |
+
height=400)
|
|
|
|
|
|
|
187 |
|
188 |
+
# Improve layout
|
189 |
+
fig.update_layout(
|
190 |
+
xaxis_title="Word",
|
191 |
+
yaxis_title="Frequency",
|
192 |
+
xaxis={'categoryorder':'total descending'}
|
193 |
+
)
|
194 |
+
|
195 |
+
components.append(gr.Plot(value=fig))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
+
# Visualize differential words
|
198 |
+
models = bow_results.get("models", [])
|
199 |
+
diff_words = bow_results.get("differential_words", [])
|
200 |
+
word_matrix = bow_results.get("word_count_matrix", {})
|
201 |
|
202 |
+
if len(models) >= 2 and diff_words and word_matrix:
|
203 |
+
model1, model2 = models[0], models[1]
|
204 |
+
diff_data = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
+
for word in diff_words[:15]:
|
207 |
+
if word in word_matrix:
|
208 |
+
counts = word_matrix[word]
|
209 |
+
diff_data.append({
|
210 |
+
"word": word,
|
211 |
+
model1: counts.get(model1, 0),
|
212 |
+
model2: counts.get(model2, 0)
|
213 |
+
})
|
214 |
|
215 |
+
if diff_data:
|
216 |
+
diff_df = pd.DataFrame(diff_data)
|
|
|
217 |
|
218 |
+
# Create grouped bar chart
|
219 |
+
fig = go.Figure()
|
220 |
+
fig.add_trace(go.Bar(
|
221 |
+
x=diff_df['word'],
|
222 |
+
y=diff_df[model1],
|
223 |
+
name=model1,
|
224 |
+
marker_color='indianred'
|
225 |
+
))
|
226 |
+
fig.add_trace(go.Bar(
|
227 |
+
x=diff_df['word'],
|
228 |
+
y=diff_df[model2],
|
229 |
+
name=model2,
|
230 |
+
marker_color='lightsalmon'
|
231 |
+
))
|
232 |
|
233 |
+
fig.update_layout(
|
234 |
+
title="Word Frequency Comparison Between Models",
|
235 |
+
xaxis_title="Word",
|
236 |
+
yaxis_title="Frequency",
|
237 |
+
barmode='group',
|
238 |
+
height=500
|
239 |
+
)
|
240 |
+
|
241 |
+
components.append(gr.Plot(value=fig))
|
242 |
+
|
243 |
+
# Check for N-gram analysis
|
244 |
+
if "ngram_analysis" in analyses:
|
245 |
+
print("Processing N-gram visualization")
|
246 |
+
components.append(gr.Markdown("### N-gram Analysis"))
|
247 |
+
ngram_components = create_ngram_visualization(
|
248 |
+
{"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}}
|
249 |
+
)
|
250 |
+
# Skip the first component if it's a duplicate header
|
251 |
+
if len(ngram_components) > 1:
|
252 |
+
components.extend(ngram_components[1:])
|
253 |
+
|
254 |
+
# Check for Topic Modeling analysis
|
255 |
+
if "topic_modeling" in analyses:
|
256 |
+
print("Processing Topic Modeling visualization")
|
257 |
+
components.append(gr.Markdown("### Topic Modeling Analysis"))
|
258 |
+
topic_components = process_and_visualize_topic_analysis(
|
259 |
+
{"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}}
|
260 |
+
)
|
261 |
+
# Skip the first component if it's a duplicate header
|
262 |
+
if len(topic_components) > 1:
|
263 |
+
components.extend(topic_components[1:])
|
264 |
|
265 |
if not components:
|
266 |
components.append(gr.Markdown("No visualization components could be created from the analysis results."))
|