File size: 5,980 Bytes
e66f533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import gradio as gr
import logging

# Set up logging
logger = logging.getLogger('gradio_app.processors.topic')


def process_topic_modeling(analysis_results, prompt, analyses):
    """
    Process Topic Modeling analysis and return UI updates

    Args:
        analysis_results (dict): Complete analysis results
        prompt (str): The prompt being analyzed
        analyses (dict): Analysis data for the prompt

    Returns:
        tuple: UI component updates
    """
    topic_results = analyses["topic_modeling"]

    # Check for errors in topic modeling
    if "error" in topic_results:
        return (
            analysis_results,
            False,  # Don't show raw JSON
            False,  # Don't show visualization area
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            True,  # Show status message
            gr.update(visible=True, value=f"❌ **Topic modeling error:** {topic_results['error']}"),
            gr.update(visible=False)  # bias_visualizations
        )

    visualization_area_visible = True
    models = topic_results.get("models", [])
    method = topic_results.get("method", "lda").upper()
    n_topics = topic_results.get("n_topics", 3)

    if len(models) < 2:
        from analysis_runner import default_no_visualization
        return default_no_visualization(analysis_results)

    prompt_title_visible = True
    prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""

    models_compared_visible = True
    models_compared_value = f"### Topic Modeling Analysis ({method}, {n_topics} topics)"

    # Initialize component visibility and values
    model1_title_visible = False
    model1_title_value = ""
    model1_words_visible = False
    model1_words_value = ""
    model2_title_visible = False
    model2_title_value = ""
    model2_words_visible = False
    model2_words_value = ""
    similarity_title_visible = False
    similarity_metrics_visible = False
    similarity_metrics_value = ""

    # Extract and format topic information
    topics = topic_results.get("topics", [])

    if topics:
        # Format topic info for display
        topic_info = []
        for topic in topics[:5]:  # Show first 5 topics
            topic_id = topic.get("id", 0)
            words = topic.get("words", [])[:5]  # Top 5 words per topic

            if words:
                topic_info.append(f"**Topic {topic_id + 1}**: {', '.join(words)}")

        if topic_info:
            model1_title_visible = True
            model1_title_value = "#### Discovered Topics"
            model1_words_visible = True
            model1_words_value = "\n".join(topic_info)

    # Get topic distributions for models
    model_topics = topic_results.get("model_topics", {})

    if model_topics:
        model1_name = models[0]
        model2_name = models[1]

        # Format topic distribution info
        if model1_name in model_topics and model2_name in model_topics:
            model2_title_visible = True
            model2_title_value = "#### Topic Distribution"
            model2_words_visible = True

            # Simple distribution display
            dist1 = model_topics[model1_name]
            dist2 = model_topics[model2_name]

            model2_words_value = f"""
            **{model1_name}**: {', '.join([f"Topic {i + 1}: {v:.2f}" for i, v in enumerate(dist1[:5])])}

            **{model2_name}**: {', '.join([f"Topic {i + 1}: {v:.2f}" for i, v in enumerate(dist2[:5])])}
            """

    # Add similarity metrics if available
    comparisons = topic_results.get("comparisons", {})
    if comparisons:
        comparison_key = f"{model1_name} vs {model2_name}"

        if comparison_key in comparisons:
            metrics = comparisons[comparison_key]
            js_div = metrics.get("js_divergence", 0)

            # Add interpretation
            similarity_text = ""
            if js_div < 0.2:
                similarity_text = "very similar"
            elif js_div < 0.4:
                similarity_text = "somewhat similar"
            elif js_div < 0.6:
                similarity_text = "moderately different"
            else:
                similarity_text = "very different"

            similarity_title_visible = True
            similarity_metrics_visible = True
            similarity_metrics_value = f"""
            - **Topic Distribution Divergence**: {js_div:.4f}
            - The topic distributions between models are **{similarity_text}**
            - *Lower divergence values indicate more similar topic distributions*
            """

    return (
        analysis_results,  # analysis_results_state
        False,  # analysis_output visibility
        True,  # visualization_area_visible
        gr.update(visible=True),  # analysis_title
        gr.update(visible=prompt_title_visible, value=prompt_title_value),  # prompt_title
        gr.update(visible=models_compared_visible, value=models_compared_value),  # models_compared
        gr.update(visible=model1_title_visible, value=model1_title_value),  # model1_title
        gr.update(visible=model1_words_visible, value=model1_words_value),  # model1_words
        gr.update(visible=model2_title_visible, value=model2_title_value),  # model2_title
        gr.update(visible=model2_words_visible, value=model2_words_value),  # model2_words
        gr.update(visible=similarity_title_visible),  # similarity_metrics_title
        gr.update(visible=similarity_metrics_visible, value=similarity_metrics_value),  # similarity_metrics
        False,  # status_message_visible
        gr.update(visible=False),  # status_message
        gr.update(visible=False)  # bias_visualizations - Not visible for Topic Modeling
    )