Spaces:
Sleeping
Sleeping
File size: 7,589 Bytes
14bac19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
"""
Visualization for topic modeling analysis results
"""
from visualization.ngram_visualizer import create_ngram_visualization
import gradio as gr
import json
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
def create_topic_visualization(analysis_results):
"""
Create visualizations for topic modeling analysis results
Args:
analysis_results (dict): Analysis results from the topic modeling analysis
Returns:
list: List of gradio components with visualizations
"""
# Initialize output components list
output_components = []
# Check if we have valid results
if not analysis_results or "analyses" not in analysis_results:
return [gr.Markdown("No analysis results found.")]
# Process each prompt
for prompt, analyses in analysis_results["analyses"].items():
# Process Topic Modeling analysis if available
if "topic_modeling" in analyses:
topic_results = analyses["topic_modeling"]
# Show method and number of topics
method = topic_results.get("method", "lda").upper()
n_topics = topic_results.get("n_topics", 3)
output_components.append(gr.Markdown(f"## Topic Modeling Analysis ({method}, {n_topics} topics)"))
# Show models being compared
models = topic_results.get("models", [])
if len(models) >= 2:
output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}"))
# Visualize topics
topics = topic_results.get("topics", [])
if topics:
output_components.append(gr.Markdown("### Discovered Topics"))
for topic in topics:
topic_id = topic.get("id", 0)
words = topic.get("words", [])
weights = topic.get("weights", [])
# Create topic word bar chart
if words and weights and len(words) == len(weights):
# Create dataframe for plotting
df = pd.DataFrame({
'word': words,
'weight': weights
})
# Sort by weight
df = df.sort_values('weight', ascending=False)
# Create bar chart
fig = px.bar(
df, x='word', y='weight',
title=f"Topic {topic_id+1} Top Words",
labels={'word': 'Word', 'weight': 'Weight'},
height=300
)
output_components.append(gr.Plot(value=fig))
# Visualize topic distributions for each model
model_topics = topic_results.get("model_topics", {})
if model_topics and all(model in model_topics for model in models):
output_components.append(gr.Markdown("### Topic Distribution by Model"))
# Create multi-model topic distribution comparison
fig = go.Figure()
for model in models:
if model in model_topics:
distribution = model_topics[model]
fig.add_trace(go.Bar(
x=[f"Topic {i+1}" for i in range(len(distribution))],
y=distribution,
name=model
))
fig.update_layout(
title="Topic Distributions Comparison",
xaxis_title="Topic",
yaxis_title="Weight",
barmode='group',
height=400
)
output_components.append(gr.Plot(value=fig))
# Visualize topic differences
comparisons = topic_results.get("comparisons", {})
if comparisons:
output_components.append(gr.Markdown("### Topic Distribution Differences"))
for comparison_key, comparison_data in comparisons.items():
js_divergence = comparison_data.get("js_divergence", 0)
topic_differences = comparison_data.get("topic_differences", [])
output_components.append(gr.Markdown(
f"**{comparison_key}** - Jensen-Shannon Divergence: {js_divergence:.4f}"
))
if topic_differences:
# Create DataFrame for plotting
model1, model2 = comparison_key.split(" vs ")
df_diff = pd.DataFrame(topic_differences)
# Create bar chart for topic differences
fig = go.Figure()
fig.add_trace(go.Bar(
x=[f"Topic {d['topic_id']+1}" for d in topic_differences],
y=[d["model1_weight"] for d in topic_differences],
name=model1
))
fig.add_trace(go.Bar(
x=[f"Topic {d['topic_id']+1}" for d in topic_differences],
y=[d["model2_weight"] for d in topic_differences],
name=model2
))
fig.update_layout(
title="Topic Weight Comparison",
xaxis_title="Topic",
yaxis_title="Weight",
barmode='group',
height=400
)
output_components.append(gr.Plot(value=fig))
# If no components were added, show a message
if len(output_components) <= 1:
output_components.append(gr.Markdown("No detailed Topic Modeling analysis found in results."))
return output_components
def process_and_visualize_topic_analysis(analysis_results):
"""
Process the topic modeling analysis results and create visualization components
Args:
analysis_results (dict): The analysis results
Returns:
list: List of gradio components for visualization
"""
try:
print(f"Starting visualization of topic modeling analysis results")
return create_topic_visualization(analysis_results)
except Exception as e:
import traceback
error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
print(error_msg)
return [gr.Markdown(f"**Error during topic modeling visualization:**\n\n```\n{error_msg}\n```")]
|