Spaces:

RyanS974
/

525GradioApp

Sleeping

525GradioApp / visualization /bow_visualizer.py

Ryan

update

2c58f4e 4 months ago

10.2 kB

	import gradio as gr
	import json
	import numpy as np
	import plotly.graph_objects as go
	import plotly.express as px
	from plotly.subplots import make_subplots
	import pandas as pd
	from difflib import SequenceMatcher

	from visualization.ngram_visualizer import create_ngram_visualization
	from visualization.topic_visualizer import process_and_visualize_topic_analysis # Added import

	def create_bow_visualization(analysis_results):
	"""
	Create visualizations for bag of words analysis results

	Args:
	analysis_results (dict): Analysis results from the bow analysis

	Returns:
	list: List of gradio components with visualizations
	"""
	# Parse analysis results if it's a string
	if isinstance(analysis_results, str):
	try:
	results = json.loads(analysis_results)
	except json.JSONDecodeError:
	return [gr.Markdown("Error parsing analysis results.")]
	else:
	results = analysis_results

	output_components = []

	# Check if we have valid results
	if not results or "analyses" not in results:
	return [gr.Markdown("No analysis results found.")]

	# Process each prompt
	for prompt, analyses in results["analyses"].items():
	output_components.append(gr.Markdown(f"## Analysis of Prompt: \"{prompt}\""))

	# Process Bag of Words analysis if available
	if "bag_of_words" in analyses:
	bow_results = analyses["bag_of_words"]

	# Show models being compared
	models = bow_results.get("models", [])
	if len(models) >= 2:
	output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}"))

	# Get important words for each model
	important_words = bow_results.get("important_words", {})

	# Prepare data for plotting important words
	if important_words:
	for model_name, words in important_words.items():
	df = pd.DataFrame(words)

	# Create bar chart for top words
	fig = px.bar(df, x='word', y='count',
	title=f"Top Words Used by {model_name}",
	labels={'word': 'Word', 'count': 'Frequency'},
	height=400)

	# Improve layout
	fig.update_layout(
	xaxis_title="Word",
	yaxis_title="Frequency",
	xaxis={'categoryorder':'total descending'}
	)

	output_components.append(gr.Plot(value=fig))

	# Visualize differential words (words with biggest frequency difference)
	diff_words = bow_results.get("differential_words", [])
	word_matrix = bow_results.get("word_count_matrix", {})

	if diff_words and word_matrix and len(diff_words) > 0:
	output_components.append(gr.Markdown("### Words with Biggest Frequency Differences"))

	# Create dataframe for plotting
	model1, model2 = models[0], models[1]
	diff_data = []

	for word in diff_words[:15]: # Limit to top 15 for readability
	if word in word_matrix:
	counts = word_matrix[word]
	diff_data.append({
	"word": word,
	model1: counts.get(model1, 0),
	model2: counts.get(model2, 0)
	})

	if diff_data:
	diff_df = pd.DataFrame(diff_data)

	# Create grouped bar chart
	fig = go.Figure()
	fig.add_trace(go.Bar(
	x=diff_df['word'],
	y=diff_df[model1],
	name=model1,
	marker_color='indianred'
	))
	fig.add_trace(go.Bar(
	x=diff_df['word'],
	y=diff_df[model2],
	name=model2,
	marker_color='lightsalmon'
	))

	fig.update_layout(
	title="Word Frequency Comparison",
	xaxis_title="Word",
	yaxis_title="Frequency",
	barmode='group',
	height=500
	)

	output_components.append(gr.Plot(value=fig))

	# If no components were added, show a message
	if len(output_components) <= 1:
	output_components.append(gr.Markdown("No detailed Bag of Words analysis found in results."))

	return output_components


	# update the process_and_visualize_analysis function
	def process_and_visualize_analysis(analysis_results):
	"""
	Process the analysis results and create visualization components

	Args:
	analysis_results (dict): The analysis results

	Returns:
	list: List of gradio components for visualization
	"""
	try:
	print(f"Starting visualization of analysis results: {type(analysis_results)}")
	components = []

	if not analysis_results or "analyses" not in analysis_results:
	print("Warning: Empty or invalid analysis results")
	components.append(gr.Markdown("No analysis results to visualize."))
	return components

	# For each prompt in the analysis results
	for prompt, analyses in analysis_results.get("analyses", {}).items():
	print(f"Visualizing results for prompt: {prompt[:30]}...")
	components.append(gr.Markdown(f"## Analysis for Prompt:\n\"{prompt}\""))

	# Check for Bag of Words analysis
	if "bag_of_words" in analyses:
	print("Processing Bag of Words visualization")
	components.append(gr.Markdown("### Bag of Words Analysis"))
	bow_results = analyses["bag_of_words"]

	# Display models compared
	if "models" in bow_results:
	models = bow_results["models"]
	components.append(gr.Markdown(f"Models compared: {', '.join(models)}"))

	# Display important words for each model
	if "important_words" in bow_results:
	components.append(gr.Markdown("#### Most Common Words by Model"))

	for model, words in bow_results["important_words"].items():
	print(f"Creating word list for model {model}")
	word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
	components.append(gr.Markdown(f"{model}: {', '.join(word_list)}"))

	# Add visualizations for word frequency differences
	if "differential_words" in bow_results and "word_count_matrix" in bow_results and len(
	bow_results["models"]) >= 2:
	diff_words = bow_results["differential_words"]
	word_matrix = bow_results["word_count_matrix"]
	models = bow_results["models"]

	if diff_words and word_matrix and len(diff_words) > 0:
	components.append(gr.Markdown("### Words with Biggest Frequency Differences"))

	# Create dataframe for plotting
	model1, model2 = models[0], models[1]
	diff_data = []

	for word in diff_words[:10]: # Limit to top 10 for readability
	if word in word_matrix:
	counts = word_matrix[word]
	model1_count = counts.get(model1, 0)
	model2_count = counts.get(model2, 0)

	# Only include if there's a meaningful difference
	if abs(model1_count - model2_count) > 0:
	components.append(gr.Markdown(
	f"- {word}: {model1}: {model1_count}, {model2}: {model2_count}"
	))

	# Check for N-gram analysis
	if "ngram_analysis" in analyses:
	print("Processing N-gram visualization")
	# Use the dedicated n-gram visualization function
	ngram_components = create_ngram_visualization(
	{"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}})
	components.extend(ngram_components)

	# Check for Topic Modeling analysis
	if "topic_modeling" in analyses:
	print("Processing Topic Modeling visualization")
	# Use the dedicated topic visualization function
	topic_components = process_and_visualize_topic_analysis(
	{"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}})
	components.extend(topic_components)

	if not components:
	components.append(gr.Markdown("No visualization components could be created from the analysis results."))

	print(f"Visualization complete: generated {len(components)} components")
	return components
	except Exception as e:
	import traceback
	error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
	print(error_msg)
	return [gr.Markdown(f"Error during visualization:\n\n```\n{error_msg}\n```")]