Spaces:

RyanS974
/

525GradioApp

Sleeping

App Files Files Community

Ryan commited on Apr 22

Commit

4e77dc0

1 Parent(s): f26268c

update

Browse files

Files changed (8) hide show

.DS_Store +0 -0
app.py +6 -80
processors/bias_detection.py +0 -274
processors/topic_modeling.py +44 -319
ui/analysis_screen.py +119 -144
visualization/__init__.py +2 -4
visualization/bias_visualizer.py +0 -233
visualization/topic_visualizer.py +55 -238

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -97,7 +97,7 @@ def create_app():
         # Analysis Tab
         with gr.Tab("Analysis"):
             # Use create_analysis_screen to get UI components including visualization container
-            analysis_options, analysis_params, run_analysis_btn, analysis_output, ngram_n, topic_count = create_analysis_screen()
             # Pre-create visualization components (initially hidden)
             visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
@@ -122,7 +122,7 @@ def create_app():
             status_message = gr.Markdown(visible=False)
             # Define a helper function to extract parameter values and run the analysis
-            def run_analysis(dataset, selected_analysis, ngram_n, topic_count):
                 try:
                     if not dataset or "entries" not in dataset or not dataset["entries"]:
                         return (
@@ -143,11 +143,10 @@ def create_app():
                         )
                     parameters = {
-                        "bow_top": 25,  # Default fixed value for Bag of Words
                         "ngram_n": ngram_n,
-                        "ngram_top": 10,  # Default fixed value for N-gram analysis
-                        "topic_count": topic_count,
-                        "bias_methods": ["partisan"]  # Default to partisan leaning only
                     }
                     print(f"Running analysis with selected type: {selected_analysis}")
                     print("Parameters:", parameters)
@@ -448,79 +447,6 @@ def create_app():
                                     f"- **{category}**: {diff}"
                                     for category, diff in differences.items()
                                 ])
-                    # Check for Bias Detection analysis
-                    elif selected_analysis == "Bias Detection" and "bias_detection" in analyses:
-                        visualization_area_visible = True
-                        bias_results = analyses["bias_detection"]
-                        models = bias_results.get("models", [])
-                        if len(models) >= 2:
-                            prompt_title_visible = True
-                            prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
-                            models_compared_visible = True
-                            models_compared_value = f"### Bias Analysis: Comparing responses from {models[0]} and {models[1]}"
-                            # Display comparative bias results
-                            model1_name = models[0]
-                            model2_name = models[1]
-                            if "comparative" in bias_results:
-                                comparative = bias_results["comparative"]
-                                # Format summary for display
-                                model1_title_visible = True
-                                model1_title_value = "#### Bias Detection Summary"
-                                model1_words_visible = True
-                                summary_parts = []
-                                # Add partisan comparison (focus on partisan leaning)
-                                if "partisan" in comparative:
-                                    part = comparative["partisan"]
-                                    is_significant = part.get("significant", False)
-                                    summary_parts.append(
-                                        f"**Partisan Leaning**: {model1_name} appears {part.get(model1_name, 'N/A')}, " +
-                                        f"while {model2_name} appears {part.get(model2_name, 'N/A')}. " +
-                                        f"({'Significant' if is_significant else 'Minor'} difference)"
-                                    )
-                                # Add overall assessment
-                                if "overall" in comparative:
-                                    overall = comparative["overall"]
-                                    significant = overall.get("significant_bias_difference", False)
-                                    summary_parts.append(
-                                        f"**Overall Assessment**: " +
-                                        f"Analysis shows a {overall.get('difference', 0):.2f}/1.0 difference in bias patterns. " +
-                                        f"({'Significant' if significant else 'Minor'} overall bias difference)"
-                                    )
-                                # Combine all parts
-                                model1_words_value = "\n\n".join(summary_parts)
-                                # Format detailed term analysis
-                                if (model1_name in bias_results and "partisan" in bias_results[model1_name] and
-                                    model2_name in bias_results and "partisan" in bias_results[model2_name]):
-                                    model2_title_visible = True
-                                    model2_title_value = "#### Partisan Term Analysis"
-                                    model2_words_visible = True
-                                    m1_lib = bias_results[model1_name]["partisan"].get("liberal_terms", [])
-                                    m1_con = bias_results[model1_name]["partisan"].get("conservative_terms", [])
-                                    m2_lib = bias_results[model2_name]["partisan"].get("liberal_terms", [])
-                                    m2_con = bias_results[model2_name]["partisan"].get("conservative_terms", [])
-                                    model2_words_value = f"""
-                                    **{model1_name}**:
-                                    - Liberal terms: {', '.join(m1_lib) if m1_lib else 'None detected'}
-                                    - Conservative terms: {', '.join(m1_con) if m1_con else 'None detected'}
-                                    **{model2_name}**:
-                                    - Liberal terms: {', '.join(m2_lib) if m2_lib else 'None detected'}
-                                    - Conservative terms: {', '.join(m2_con) if m2_con else 'None detected'}
-                                    """
                     # If we don't have visualization data from any analysis
                     if not visualization_area_visible:
@@ -636,7 +562,7 @@ def create_app():
         # Run analysis with proper parameters
         run_analysis_btn.click(
             fn=run_analysis,
-            inputs=[dataset_state, analysis_options, ngram_n, topic_count],
             outputs=[
                 analysis_results_state,
                 analysis_output,

         # Analysis Tab
         with gr.Tab("Analysis"):
             # Use create_analysis_screen to get UI components including visualization container
+            analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count = create_analysis_screen()
             # Pre-create visualization components (initially hidden)
             visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
             status_message = gr.Markdown(visible=False)
             # Define a helper function to extract parameter values and run the analysis
+            def run_analysis(dataset, selected_analysis, bow_top, ngram_n, ngram_top, topic_count):
                 try:
                     if not dataset or "entries" not in dataset or not dataset["entries"]:
                         return (
                         )
                     parameters = {
+                        "bow_top": bow_top,
                         "ngram_n": ngram_n,
+                        "ngram_top": ngram_top,
+                        "topic_count": topic_count
                     }
                     print(f"Running analysis with selected type: {selected_analysis}")
                     print("Parameters:", parameters)
                                     f"- **{category}**: {diff}"
                                     for category, diff in differences.items()
                                 ])
                     # If we don't have visualization data from any analysis
                     if not visualization_area_visible:
         # Run analysis with proper parameters
         run_analysis_btn.click(
             fn=run_analysis,
+            inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count],
             outputs=[
                 analysis_results_state,
                 analysis_output,

processors/bias_detection.py DELETED Viewed

@@ -1,274 +0,0 @@
-"""
-Bias detection processor for analyzing political bias in text responses
-"""
-import nltk
-from nltk.sentiment import SentimentIntensityAnalyzer
-from sklearn.feature_extraction.text import CountVectorizer
-import re
-import json
-import os
-import numpy as np
-# Ensure NLTK resources are available
-def download_nltk_resources():
-    """Download required NLTK resources if not already downloaded"""
-    try:
-        nltk.download('vader_lexicon', quiet=True)
-    except:
-        pass
-download_nltk_resources()
-# Dictionary of partisan-leaning words
-# These are simplified examples; a real implementation would use a more comprehensive lexicon
-PARTISAN_WORDS = {
-    "liberal": [
-        "progressive", "equity", "climate", "reform", "collective",
-        "diversity", "inclusive", "sustainable", "justice", "regulation"
-    ],
-    "conservative": [
-        "traditional", "freedom", "liberty", "individual", "faith",
-        "values", "efficient", "deregulation", "patriot", "security"
-    ]
-}
-# Dictionary of framing patterns
-FRAMING_PATTERNS = {
-    "economic": [
-        r"econom(y|ic|ics)", r"tax(es|ation)", r"budget", r"spend(ing)",
-        r"jobs?", r"wage", r"growth", r"inflation", r"invest(ment)?"
-    ],
-    "moral": [
-        r"values?", r"ethic(s|al)", r"moral(s|ity)", r"right(s|eous)",
-        r"wrong", r"good", r"bad", r"faith", r"belief", r"tradition(al)?"
-    ],
-    "security": [
-        r"secur(e|ity)", r"defense", r"protect(ion)?", r"threat",
-        r"danger(ous)?", r"safe(ty)?", r"nation(al)?", r"terror(ism|ist)"
-    ],
-    "social_welfare": [
-        r"health(care)?", r"education", r"welfare", r"benefit", r"program",
-        r"help", r"assist(ance)?", r"support", r"service", r"care"
-    ]
-}
-def detect_sentiment_bias(text):
-    """
-    Analyze the sentiment of a text to identify potential bias
-    Args:
-        text (str): The text to analyze
-    Returns:
-        dict: Sentiment analysis results
-    """
-    sia = SentimentIntensityAnalyzer()
-    sentiment = sia.polarity_scores(text)
-    # Determine if sentiment indicates bias
-    if sentiment['compound'] >= 0.25:
-        bias_direction = "positive"
-        bias_strength = min(1.0, sentiment['compound'] * 2)  # Scale to 0-1
-    elif sentiment['compound'] <= -0.25:
-        bias_direction = "negative"
-        bias_strength = min(1.0, abs(sentiment['compound'] * 2))  # Scale to 0-1
-    else:
-        bias_direction = "neutral"
-        bias_strength = 0.0
-    return {
-        "sentiment_scores": sentiment,
-        "bias_direction": bias_direction,
-        "bias_strength": bias_strength
-    }
-def detect_partisan_leaning(text):
-    """
-    Analyze text for partisan-leaning language
-    Args:
-        text (str): The text to analyze
-    Returns:
-        dict: Partisan leaning analysis results
-    """
-    text_lower = text.lower()
-    # Count partisan words
-    liberal_count = 0
-    conservative_count = 0
-    liberal_matches = []
-    conservative_matches = []
-    # Search for partisan words in text
-    for word in PARTISAN_WORDS["liberal"]:
-        matches = re.findall(r'\b' + word + r'\b', text_lower)
-        if matches:
-            liberal_count += len(matches)
-            liberal_matches.extend(matches)
-    for word in PARTISAN_WORDS["conservative"]:
-        matches = re.findall(r'\b' + word + r'\b', text_lower)
-        if matches:
-            conservative_count += len(matches)
-            conservative_matches.extend(matches)
-    # Calculate partisan lean score (-1 to 1, negative = liberal, positive = conservative)
-    total_count = liberal_count + conservative_count
-    if total_count > 0:
-        lean_score = (conservative_count - liberal_count) / total_count
-    else:
-        lean_score = 0
-    # Determine leaning based on score
-    if lean_score <= -0.2:
-        leaning = "liberal"
-        strength = min(1.0, abs(lean_score * 2))
-    elif lean_score >= 0.2:
-        leaning = "conservative"
-        strength = min(1.0, lean_score * 2)
-    else:
-        leaning = "balanced"
-        strength = 0.0
-    return {
-        "liberal_count": liberal_count,
-        "conservative_count": conservative_count,
-        "liberal_terms": liberal_matches,
-        "conservative_terms": conservative_matches,
-        "lean_score": lean_score,
-        "leaning": leaning,
-        "strength": strength
-    }
-def detect_framing_bias(text):
-    """
-    Analyze how the text frames issues
-    Args:
-        text (str): The text to analyze
-    Returns:
-        dict: Framing analysis results
-    """
-    text_lower = text.lower()
-    framing_counts = {}
-    framing_examples = {}
-    # Count framing patterns
-    for frame, patterns in FRAMING_PATTERNS.items():
-        framing_counts[frame] = 0
-        framing_examples[frame] = []
-        for pattern in patterns:
-            matches = re.findall(pattern, text_lower)
-            if matches:
-                framing_counts[frame] += len(matches)
-                # Store up to 5 examples of each frame
-                unique_matches = set(matches)
-                framing_examples[frame].extend(list(unique_matches)[:5])
-    # Calculate dominant frame
-    total_framing = sum(framing_counts.values())
-    framing_distribution = {}
-    if total_framing > 0:
-        for frame, count in framing_counts.items():
-            framing_distribution[frame] = count / total_framing
-        dominant_frame = max(framing_counts.items(), key=lambda x: x[1])[0]
-        frame_bias_strength = max(0.0, framing_distribution[dominant_frame] - 0.25)
-    else:
-        dominant_frame = "none"
-        frame_bias_strength = 0.0
-        framing_distribution = {frame: 0.0 for frame in FRAMING_PATTERNS.keys()}
-    return {
-        "framing_counts": framing_counts,
-        "framing_examples": framing_examples,
-        "framing_distribution": framing_distribution,
-        "dominant_frame": dominant_frame,
-        "frame_bias_strength": frame_bias_strength
-    }
-def compare_bias(text1, text2, model_names=None):
-    """
-    Compare potential bias in two texts
-    Args:
-        text1 (str): First text to analyze
-        text2 (str): Second text to analyze
-        model_names (list): Optional names of models being compared
-    Returns:
-        dict: Comparative bias analysis
-    """
-    # Set default model names if not provided
-    if model_names is None or len(model_names) < 2:
-        model_names = ["Model 1", "Model 2"]
-    model1_name, model2_name = model_names[0], model_names[1]
-    # Analyze each text
-    sentiment_results1 = detect_sentiment_bias(text1)
-    sentiment_results2 = detect_sentiment_bias(text2)
-    partisan_results1 = detect_partisan_leaning(text1)
-    partisan_results2 = detect_partisan_leaning(text2)
-    framing_results1 = detect_framing_bias(text1)
-    framing_results2 = detect_framing_bias(text2)
-    # Determine if there's a significant difference in bias
-    sentiment_difference = abs(sentiment_results1["bias_strength"] - sentiment_results2["bias_strength"])
-    # For partisan leaning, compare the scores (negative is liberal, positive is conservative)
-    partisan_difference = abs(partisan_results1["lean_score"] - partisan_results2["lean_score"])
-    # Calculate overall bias difference
-    overall_difference = (sentiment_difference + partisan_difference) / 2
-    # Compare dominant frames
-    frame_difference = framing_results1["dominant_frame"] != framing_results2["dominant_frame"] and \
-                      (framing_results1["frame_bias_strength"] > 0.1 or framing_results2["frame_bias_strength"] > 0.1)
-    # Create comparative analysis
-    comparative = {
-        "sentiment": {
-            model1_name: sentiment_results1["bias_direction"],
-            model2_name: sentiment_results2["bias_direction"],
-            "difference": sentiment_difference,
-            "significant": sentiment_difference > 0.3
-        },
-        "partisan": {
-            model1_name: partisan_results1["leaning"],
-            model2_name: partisan_results2["leaning"],
-            "difference": partisan_difference,
-            "significant": partisan_difference > 0.4
-        },
-        "framing": {
-            model1_name: framing_results1["dominant_frame"],
-            model2_name: framing_results2["dominant_frame"],
-            "different_frames": frame_difference
-        },
-        "overall": {
-            "difference": overall_difference,
-            "significant_bias_difference": overall_difference > 0.35
-        }
-    }
-    return {
-        "models": model_names,
-        model1_name: {
-            "sentiment": sentiment_results1,
-            "partisan": partisan_results1,
-            "framing": framing_results1
-        },
-        model2_name: {
-            "sentiment": sentiment_results2,
-            "partisan": partisan_results2,
-            "framing": framing_results2
-        },
-        "comparative": comparative
-    }

processors/topic_modeling.py CHANGED Viewed

@@ -1,30 +1,16 @@
 """
-Enhanced topic modeling processor for comparing text responses
 """
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation, NMF
 import numpy as np
 import nltk
 from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
 import re
-from scipy.spatial import distance
-def download_nltk_resources():
-    """Download required NLTK resources if not already downloaded"""
-    try:
-        nltk.download('stopwords', quiet=True)
-        nltk.download('wordnet', quiet=True)
-        nltk.download('punkt', quiet=True)
-    except:
-        pass
-# Ensure NLTK resources are available
-download_nltk_resources()
 def preprocess_text(text):
     """
-    Preprocess text for topic modeling with improved tokenization and lemmatization
     Args:
         text (str): Text to preprocess
@@ -43,74 +29,13 @@ def preprocess_text(text):
     # Remove stopwords
     stop_words = set(stopwords.words('english'))
-    # Add custom stopwords (common in political discourse but low information)
-    custom_stopwords = {'the', 'and', 'of', 'to', 'in', 'a', 'is', 'that', 'for', 'on',
-                        'with', 'as', 'by', 'at', 'an', 'this', 'these', 'those', 'from',
-                        'or', 'not', 'be', 'are', 'it', 'was', 'were', 'been', 'being',
-                        'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing',
-                        'would', 'should', 'could', 'might', 'will', 'shall', 'can', 'may',
-                        'political', 'generally', 'policy', 'policies', 'also'}
-    stop_words.update(custom_stopwords)
-    # Lemmatize tokens
-    lemmatizer = WordNetLemmatizer()
-    tokens = [lemmatizer.lemmatize(token) for token in tokens
-              if token not in stop_words and len(token) > 3]
     return ' '.join(tokens)
-def get_coherence_score(model, feature_names, doc_term_matrix):
-    """
-    Calculate topic coherence score (approximation of UMass coherence)
-    Args:
-        model: Topic model (LDA or NMF)
-        feature_names: Feature names (words)
-        doc_term_matrix: Document-term matrix
-    Returns:
-        float: Coherence score
-    """
-    coherence_scores = []
-    for topic_idx, topic in enumerate(model.components_):
-        top_words_idx = topic.argsort()[:-11:-1]  # Top 10 words
-        top_words = [feature_names[i] for i in top_words_idx]
-        # Calculate co-occurrence for all word pairs
-        word_pairs_scores = []
-        for i in range(len(top_words)):
-            for j in range(i+1, len(top_words)):
-                word_i = top_words[i]
-                word_j = top_words[j]
-                # Get indices of these words in feature_names
-                try:
-                    word_i_idx = list(feature_names).index(word_i)
-                    word_j_idx = list(feature_names).index(word_j)
-                    # Calculate co-occurrence (approximation)
-                    doc_i = doc_term_matrix[:, word_i_idx].toarray().flatten()
-                    doc_j = doc_term_matrix[:, word_j_idx].toarray().flatten()
-                    co_occur = sum(1 for x, y in zip(doc_i, doc_j) if x > 0 and y > 0)
-                    word_pairs_scores.append(co_occur)
-                except:
-                    continue
-        if word_pairs_scores:
-            coherence_scores.append(sum(word_pairs_scores) / len(word_pairs_scores))
-    # Average coherence across all topics
-    if coherence_scores:
-        return sum(coherence_scores) / len(coherence_scores)
-    return 0.0
 def get_top_words_per_topic(model, feature_names, n_top_words=10):
     """
-    Get the top words for each topic in the model with improved word selection
     Args:
         model: Topic model (LDA or NMF)
@@ -124,61 +49,17 @@ def get_top_words_per_topic(model, feature_names, n_top_words=10):
     for topic_idx, topic in enumerate(model.components_):
         top_words_idx = topic.argsort()[:-n_top_words - 1:-1]
         top_words = [feature_names[i] for i in top_words_idx]
-        top_weights = topic[top_words_idx].tolist()
-        # Normalize weights for better visualization
-        total_weight = sum(top_weights)
-        if total_weight > 0:
-            normalized_weights = [w/total_weight for w in top_weights]
-        else:
-            normalized_weights = top_weights
         topic_dict = {
             "id": topic_idx,
             "words": top_words,
-            "weights": normalized_weights,
-            "raw_weights": top_weights
         }
         topics.append(topic_dict)
     return topics
-def calculate_topic_diversity(topics):
-    """
-    Calculate topic diversity based on word overlap
-    Args:
-        topics (list): List of topics with their words
-    Returns:
-        float: Topic diversity score (0-1, higher is more diverse)
-    """
-    if not topics or len(topics) < 2:
-        return 1.0  # Maximum diversity for a single topic
-    # Calculate Jaccard distance between all topic pairs
-    jaccard_distances = []
-    for i in range(len(topics)):
-        for j in range(i+1, len(topics)):
-            words_i = set(topics[i]["words"])
-            words_j = set(topics[j]["words"])
-            # Jaccard distance = 1 - Jaccard similarity
-            # Jaccard similarity = |intersection| / |union|
-            intersection = len(words_i.intersection(words_j))
-            union = len(words_i.union(words_j))
-            if union > 0:
-                jaccard_distance = 1 - (intersection / union)
-                jaccard_distances.append(jaccard_distance)
-    # Average Jaccard distance as diversity measure
-    if jaccard_distances:
-        return sum(jaccard_distances) / len(jaccard_distances)
-    return 0.0
 def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
     """
-    Extract topics from a list of texts with enhanced preprocessing and metrics
     Args:
         texts (list): List of text documents
@@ -196,124 +77,49 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
         "document_topics": []
     }
-    # Handle empty input
-    if not texts or all(not text.strip() for text in texts):
-        result["error"] = "No text content to analyze"
-        return result
     # Preprocess texts
     preprocessed_texts = [preprocess_text(text) for text in texts]
-    # Check if we have enough content after preprocessing
-    if all(not text.strip() for text in preprocessed_texts):
-        result["error"] = "No meaningful content after preprocessing"
-        return result
-    try:
-        # Create document-term matrix
-        if method == "nmf":
-            # For NMF, use TF-IDF vectorization
-            # Adjust min_df and max_df for small document sets
-            vectorizer = TfidfVectorizer(max_features=1000, min_df=1, max_df=0.95)
-        else:
-            # For LDA, use CountVectorizer
-            # Adjust min_df and max_df for small document sets
-            vectorizer = CountVectorizer(max_features=1000, min_df=1, max_df=0.95)
-        X = vectorizer.fit_transform(preprocessed_texts)
-        # Check if we have enough features
-        feature_names = vectorizer.get_feature_names_out()
-        if len(feature_names) < n_topics * 2:
-            # Adjust n_topics if we don't have enough features
-            original_n_topics = n_topics
-            n_topics = max(2, len(feature_names) // 2)
-            result["adjusted_n_topics"] = n_topics
-            result["original_n_topics"] = original_n_topics
-        # Apply topic modeling
-        if method == "nmf":
-            # Non-negative Matrix Factorization
-            model = NMF(n_components=n_topics, random_state=42, max_iter=500,
-                        alpha=0.1, l1_ratio=0.5)
-        else:
-            # Latent Dirichlet Allocation with better hyperparameters
-            model = LatentDirichletAllocation(
-                n_components=n_topics,
-                random_state=42,
-                max_iter=30,
-                learning_method='online',
-                learning_offset=50.0,
-                doc_topic_prior=0.1,
-                topic_word_prior=0.01
-            )
-        topic_distribution = model.fit_transform(X)
-        # Get top words for each topic
-        result["topics"] = get_top_words_per_topic(model, feature_names, n_top_words)
-        # Get topic distribution for each document
-        for i, dist in enumerate(topic_distribution):
-            # Normalize for easier comparison
-            normalized_dist = dist / np.sum(dist) if np.sum(dist) > 0 else dist
-            result["document_topics"].append({
-                "document_id": i,
-                "distribution": normalized_dist.tolist()
-            })
-        # Calculate coherence score
-        result["coherence_score"] = get_coherence_score(model, feature_names, X)
-        # Calculate topic diversity
-        result["diversity_score"] = calculate_topic_diversity(result["topics"])
-        return result
-    except Exception as e:
-        import traceback
-        result["error"] = str(e)
-        result["traceback"] = traceback.format_exc()
-        return result
-def calculate_js_divergence(p, q):
-    """
-    Calculate Jensen-Shannon divergence between two distributions
-    Args:
-        p (list): First probability distribution
-        q (list): Second probability distribution
-    Returns:
-        float: JS divergence (0-1, lower means more similar)
-    """
-    # Convert to numpy arrays
-    p = np.array(p)
-    q = np.array(q)
-    # Convert to proper probability distributions
-    p = p / np.sum(p) if np.sum(p) > 0 else p
-    q = q / np.sum(q) if np.sum(q) > 0 else q
-    # Calculate JS divergence
-    m = (p + q) / 2
-    # Handle potential errors
-    kl_pm = 0
-    for pi, mi in zip(p, m):
-        if pi > 0 and mi > 0:
-            kl_pm += pi * np.log2(pi / mi)
-    kl_qm = 0
-    for qi, mi in zip(q, m):
-        if qi > 0 and mi > 0:
-            kl_qm += qi * np.log2(qi / mi)
-    js_divergence = (kl_pm + kl_qm) / 2
-    return js_divergence
 def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
     """
-    Compare topics between two sets of texts with enhanced metrics
     Args:
         texts_set_1 (list): First list of text documents
@@ -330,35 +136,10 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
     if model_names is None:
         model_names = ["Model 1", "Model 2"]
-    # Handle case where both sets are the same (e.g., comparing same document against itself)
-    if texts_set_1 == texts_set_2:
-        texts_set_2 = texts_set_2.copy()  # Create a copy to avoid reference issues
-    # Extract topics for each set individually
     topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
     topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
-    # Extract topics for combined set (for a common topic space)
-    combined_texts = texts_set_1 + texts_set_2
-    combined_topics = extract_topics(combined_texts, n_topics, n_top_words, method)
-    # Check for errors
-    if "error" in topics_set_1 or "error" in topics_set_2 or "error" in combined_topics:
-        errors = []
-        if "error" in topics_set_1:
-            errors.append(f"Error in set 1: {topics_set_1['error']}")
-        if "error" in topics_set_2:
-            errors.append(f"Error in set 2: {topics_set_2['error']}")
-        if "error" in combined_topics:
-            errors.append(f"Error in combined set: {combined_topics['error']}")
-        return {
-            "error": " | ".join(errors),
-            "method": method,
-            "n_topics": n_topics,
-            "models": model_names
-        }
     # Calculate similarity between topics
     similarity_matrix = []
     for topic1 in topics_set_1["topics"]:
@@ -385,72 +166,16 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
             "similarity": similarities[best_match_idx]
         })
-    # Calculate topic distribution differences
-    topic_differences = []
-    if (len(topics_set_1["document_topics"]) > 0 and
-        len(topics_set_2["document_topics"]) > 0):
-        # Get average topic distribution for each set
-        dist1 = np.mean([doc["distribution"] for doc in topics_set_1["document_topics"]], axis=0)
-        dist2 = np.mean([doc["distribution"] for doc in topics_set_2["document_topics"]], axis=0)
-        for i in range(min(len(dist1), len(dist2))):
-            topic_differences.append({
-                "topic_id": i,
-                "model1_weight": float(dist1[i]),
-                "model2_weight": float(dist2[i]),
-                "difference": float(abs(dist1[i] - dist2[i]))
-            })
-    # Calculate Jensen-Shannon Divergence
-    js_divergence = 0
-    if (len(topics_set_1["document_topics"]) > 0 and
-        len(topics_set_2["document_topics"]) > 0):
-        # Get topic distributions
-        dist1 = topics_set_1["document_topics"][0]["distribution"]
-        dist2 = topics_set_2["document_topics"][0]["distribution"]
-        # Calculate JS divergence
-        js_divergence = calculate_js_divergence(dist1, dist2)
     # Construct result
     result = {
         "method": method,
         "n_topics": n_topics,
-        "models": model_names,
         "set1_topics": topics_set_1["topics"],
         "set2_topics": topics_set_2["topics"],
-        "combined_topics": combined_topics["topics"],
         "similarity_matrix": similarity_matrix,
         "matched_topics": matched_topics,
         "average_similarity": np.mean([match["similarity"] for match in matched_topics]),
-        "topic_differences": topic_differences,
-        "js_divergence": js_divergence,
-        "model_topics": {
-            model_names[0]: topics_set_1["document_topics"][0]["distribution"] if topics_set_1["document_topics"] else [],
-            model_names[1]: topics_set_2["document_topics"][0]["distribution"] if topics_set_2["document_topics"] else []
-        },
-        "comparisons": {
-            f"{model_names[0]} vs {model_names[1]}": {
-                "js_divergence": js_divergence,
-                "topic_differences": topic_differences,
-                "average_topic_similarity": np.mean([match["similarity"] for match in matched_topics])
-            }
-        }
-    }
-    # Add coherence and diversity scores
-    result["coherence_scores"] = {
-        model_names[0]: topics_set_1.get("coherence_score", 0),
-        model_names[1]: topics_set_2.get("coherence_score", 0),
-        "combined": combined_topics.get("coherence_score", 0)
-    }
-    result["diversity_scores"] = {
-        model_names[0]: topics_set_1.get("diversity_score", 0),
-        model_names[1]: topics_set_2.get("diversity_score", 0),
-        "combined": combined_topics.get("diversity_score", 0)
     }
     return result

 """
+Topic modeling processor for comparing text responses
 """
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation, NMF
 import numpy as np
 import nltk
 from nltk.corpus import stopwords
 import re
 def preprocess_text(text):
     """
+    Preprocess text for topic modeling
     Args:
         text (str): Text to preprocess
     # Remove stopwords
     stop_words = set(stopwords.words('english'))
+    tokens = [token for token in tokens if token not in stop_words and len(token) > 3]
     return ' '.join(tokens)
 def get_top_words_per_topic(model, feature_names, n_top_words=10):
     """
+    Get the top words for each topic in the model
     Args:
         model: Topic model (LDA or NMF)
     for topic_idx, topic in enumerate(model.components_):
         top_words_idx = topic.argsort()[:-n_top_words - 1:-1]
         top_words = [feature_names[i] for i in top_words_idx]
         topic_dict = {
             "id": topic_idx,
             "words": top_words,
+            "weights": topic[top_words_idx].tolist()
         }
         topics.append(topic_dict)
     return topics
 def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
     """
+    Extract topics from a list of texts
     Args:
         texts (list): List of text documents
         "document_topics": []
     }
     # Preprocess texts
     preprocessed_texts = [preprocess_text(text) for text in texts]
+    # Create document-term matrix
+    if method == "nmf":
+        # For NMF, use TF-IDF vectorization
+        # Adjust min_df and max_df for small document sets
+        vectorizer = TfidfVectorizer(max_features=1000, min_df=1, max_df=1.0)
+    else:
+        # For LDA, use CountVectorizer
+        # Adjust min_df and max_df for small document sets
+        vectorizer = CountVectorizer(max_features=1000, min_df=1, max_df=1.0)
+    X = vectorizer.fit_transform(preprocessed_texts)
+    feature_names = vectorizer.get_feature_names_out()
+    # Apply topic modeling
+    if method == "nmf":
+        # Non-negative Matrix Factorization
+        model = NMF(n_components=n_topics, random_state=42, max_iter=1000)
+    else:
+        # Latent Dirichlet Allocation
+        model = LatentDirichletAllocation(n_components=n_topics, random_state=42, max_iter=20)
+    topic_distribution = model.fit_transform(X)
+    # Get top words for each topic
+    result["topics"] = get_top_words_per_topic(model, feature_names, n_top_words)
+    # Get topic distribution for each document
+    for i, dist in enumerate(topic_distribution):
+        # Normalize for easier comparison
+        normalized_dist = dist / np.sum(dist) if np.sum(dist) > 0 else dist
+        result["document_topics"].append({
+            "document_id": i,
+            "distribution": normalized_dist.tolist()
+        })
+    return result
 def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
     """
+    Compare topics between two sets of texts
     Args:
         texts_set_1 (list): First list of text documents
     if model_names is None:
         model_names = ["Model 1", "Model 2"]
+    # Extract topics for each set
     topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
     topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
     # Calculate similarity between topics
     similarity_matrix = []
     for topic1 in topics_set_1["topics"]:
             "similarity": similarities[best_match_idx]
         })
     # Construct result
     result = {
         "method": method,
         "n_topics": n_topics,
         "set1_topics": topics_set_1["topics"],
         "set2_topics": topics_set_2["topics"],
         "similarity_matrix": similarity_matrix,
         "matched_topics": matched_topics,
         "average_similarity": np.mean([match["similarity"] for match in matched_topics]),
+        "models": model_names  # Add model names to result
     }
     return result

ui/analysis_screen.py CHANGED Viewed

@@ -7,7 +7,123 @@ from processors.topic_modeling import compare_topics
 from processors.ngram_analysis import compare_ngrams
 from processors.bow_analysis import compare_bow
 from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
-from processors.bias_detection import compare_bias
 # Add the implementation of these helper functions
 def extract_important_words(text, top_n=20):
@@ -207,6 +323,7 @@ def perform_topic_modeling(texts, model_names, n_topics=3):
     return result
 def process_analysis_request(dataset, selected_analysis, parameters):
     """
     Process the analysis request based on the selected options.
@@ -250,7 +367,6 @@ def process_analysis_request(dataset, selected_analysis, parameters):
         print(f"Using top_n value: {top_n}")  # Debug print
         # Perform Bag of Words analysis using the processor
-        from processors.bow_analysis import compare_bow
         bow_results = compare_bow(
             [model1_response, model2_response],
             [model1_name, model2_name],
@@ -264,7 +380,7 @@ def process_analysis_request(dataset, selected_analysis, parameters):
         if isinstance(ngram_size, str):
             ngram_size = int(ngram_size)
-        top_n = parameters.get("ngram_top", 10)  # Using default 10
         if isinstance(top_n, str):
             top_n = int(top_n)
@@ -285,9 +401,6 @@ def process_analysis_request(dataset, selected_analysis, parameters):
             topic_count = int(topic_count)
         try:
-            # Import the enhanced topic modeling function
-            from processors.topic_modeling import compare_topics
             topic_results = compare_topics(
                 texts_set_1=[model1_response],
                 texts_set_2=[model2_response],
@@ -295,13 +408,6 @@ def process_analysis_request(dataset, selected_analysis, parameters):
                 model_names=[model1_name, model2_name])
             results["analyses"][prompt_text]["topic_modeling"] = topic_results
-            # Add helpful message if text is very short
-            if (len(model1_response.split()) < 50 or len(model2_response.split()) < 50):
-                if "error" not in topic_results:
-                    # Add a warning message about short text
-                    results["analyses"][prompt_text]["topic_modeling"]["warning"] = "One or both texts are relatively short. Topic modeling works best with longer texts."
         except Exception as e:
             import traceback
             print(f"Topic modeling error: {str(e)}\n{traceback.format_exc()}")
@@ -313,8 +419,6 @@ def process_analysis_request(dataset, selected_analysis, parameters):
     elif selected_analysis == "Classifier":
         # Perform classifier analysis
-        from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
         results["analyses"][prompt_text]["classifier"] = {
             "models": [model1_name, model2_name],
             "classifications": {
@@ -331,28 +435,6 @@ def process_analysis_request(dataset, selected_analysis, parameters):
             },
             "differences": compare_classifications(model1_response, model2_response)
         }
-    elif selected_analysis == "Bias Detection":
-        try:
-            # Perform bias detection analysis, always focusing on partisan leaning
-            from processors.bias_detection import compare_bias
-            bias_results = compare_bias(
-                model1_response,
-                model2_response,
-                model_names=[model1_name, model2_name]
-            )
-            results["analyses"][prompt_text]["bias_detection"] = bias_results
-        except Exception as e:
-            import traceback
-            print(f"Bias detection error: {str(e)}\n{traceback.format_exc()}")
-            results["analyses"][prompt_text]["bias_detection"] = {
-                "models": [model1_name, model2_name],
-                "error": str(e),
-                "message": "Bias detection failed. Try with different parameters."
-            }
     else:
         # Unknown analysis type
@@ -360,110 +442,3 @@ def process_analysis_request(dataset, selected_analysis, parameters):
     # Return both the analysis results and a placeholder for visualization data
     return results, None
-def create_analysis_screen():
-    """
-    Create the analysis options screen with enhanced topic modeling options
-    Returns:
-        tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, ngram_n, topic_count)
-    """
-    import gradio as gr
-    with gr.Column() as analysis_screen:
-        gr.Markdown("## Analysis Options")
-        gr.Markdown("Select which analysis you want to run on the LLM responses.")
-        # Change from CheckboxGroup to Radio for analysis selection
-        with gr.Group():
-            analysis_options = gr.Radio(
-                choices=[
-                    "Bag of Words",
-                    "N-gram Analysis",
-                    "Topic Modeling",
-                    "Bias Detection",
-                    "Classifier"
-                ],
-                value="Bag of Words",  # Default selection
-                label="Select Analysis Type"
-            )
-        # Create N-gram parameters accessible at top level
-        ngram_n = gr.Radio(
-            choices=["1", "2", "3"], value="2",
-            label="N-gram Size",
-            visible=False
-        )
-        # Create enhanced topic modeling parameter accessible at top level
-        topic_count = gr.Slider(
-            minimum=2, maximum=10, value=3, step=1,
-            label="Number of Topics",
-            info="Choose fewer topics for shorter texts, more topics for longer texts",
-            visible=False
-        )
-        # Parameters for each analysis type
-        with gr.Group() as analysis_params:
-            # Topic modeling parameters with enhanced options
-            with gr.Group(visible=False) as topic_params:
-                gr.Markdown("### Topic Modeling Parameters")
-                gr.Markdown("""
-                Topic modeling extracts thematic patterns from text.
-                For best results:
-                - Use longer text samples (100+ words)
-                - Adjust topic count based on text length
-                - For political content, 3-5 topics usually works well
-                """)
-                # We're already using topic_count defined above
-            # N-gram parameters group (using external ngram_n)
-            with gr.Group(visible=False) as ngram_params:
-                gr.Markdown("### N-gram Parameters")
-                # We're already using ngram_n defined above
-            # Bias detection parameters
-            with gr.Group(visible=False) as bias_params:
-                gr.Markdown("### Bias Detection Parameters")
-                gr.Markdown("Analysis will focus on detecting partisan leaning.")
-            # Classifier parameters
-            with gr.Group(visible=False) as classifier_params:
-                gr.Markdown("### Classifier Parameters")
-                gr.Markdown("Classifies responses based on formality, sentiment, and complexity")
-            # Function to update parameter visibility based on selected analysis
-            def update_params_visibility(selected):
-                return {
-                    topic_params: gr.update(visible=selected == "Topic Modeling"),
-                    ngram_params: gr.update(visible=selected == "N-gram Analysis"),
-                    bias_params: gr.update(visible=selected == "Bias Detection"),
-                    classifier_params: gr.update(visible=selected == "Classifier"),
-                    ngram_n: gr.update(visible=selected == "N-gram Analysis"),
-                    topic_count: gr.update(visible=selected == "Topic Modeling")
-                }
-            # Set up event handler for analysis selection
-            analysis_options.change(
-                fn=update_params_visibility,
-                inputs=[analysis_options],
-                outputs=[
-                    topic_params,
-                    ngram_params,
-                    bias_params,
-                    classifier_params,
-                    ngram_n,
-                    topic_count
-                ]
-            )
-        # Run analysis button
-        run_analysis_btn = gr.Button("Run Analysis", variant="primary", size="large")
-        # Analysis output area - hidden JSON component to store raw results
-        analysis_output = gr.JSON(label="Analysis Results", visible=False)
-    # Return the components needed by app.py
-    return analysis_options, analysis_params, run_analysis_btn, analysis_output, ngram_n, topic_count

 from processors.ngram_analysis import compare_ngrams
 from processors.bow_analysis import compare_bow
 from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
+def create_analysis_screen():
+    """
+    Create the analysis options screen
+    Returns:
+        tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count)
+    """
+    with gr.Column() as analysis_screen:
+        gr.Markdown("## Analysis Options")
+        gr.Markdown("Select which analysis you want to run on the LLM responses.")
+        # Change from CheckboxGroup to Radio for analysis selection
+        with gr.Group():
+            analysis_options = gr.Radio(
+                choices=[
+                    "Bag of Words",
+                    "N-gram Analysis",
+                    "Topic Modeling",
+                    "Bias Detection",
+                    "Classifier"
+                    # Removed "LLM Analysis" as requested
+                ],
+                value="Bag of Words",  # Default selection
+                label="Select Analysis Type"
+            )
+        # Create slider directly here for easier access
+        gr.Markdown("### Bag of Words Parameters")
+        bow_top_slider = gr.Slider(
+            minimum=10, maximum=100, value=25, step=5,
+            label="Top Words to Compare",
+            elem_id="bow_top_slider"
+        )
+        # Create N-gram parameters accessible at top level
+        ngram_n = gr.Radio(
+            choices=["1", "2", "3"], value="2",
+            label="N-gram Size",
+            visible=False
+        )
+        ngram_top = gr.Slider(
+            minimum=5, maximum=30, value=10, step=1,
+            label="Top N-grams to Display",
+            visible=False
+        )
+        # Create topic modeling parameter accessible at top level
+        topic_count = gr.Slider(
+            minimum=2, maximum=10, value=3, step=1,
+            label="Number of Topics",
+            visible=False
+        )
+        # Parameters for each analysis type
+        with gr.Group() as analysis_params:
+            # Topic modeling parameters
+            with gr.Group(visible=False) as topic_params:
+                gr.Markdown("### Topic Modeling Parameters")
+                # We'll use the topic_count defined above
+            # N-gram parameters group (using external ngram_n and ngram_top)
+            with gr.Group(visible=False) as ngram_params:
+                gr.Markdown("### N-gram Parameters")
+                # We're already using ngram_n and ngram_top defined above
+            # Bias detection parameters
+            with gr.Group(visible=False) as bias_params:
+                gr.Markdown("### Bias Detection Parameters")
+                bias_methods = gr.CheckboxGroup(
+                    choices=["Sentiment Analysis", "Partisan Leaning", "Framing Analysis"],
+                    value=["Sentiment Analysis", "Partisan Leaning"],
+                    label="Bias Detection Methods"
+                )
+            # Classifier parameters
+            with gr.Group(visible=False) as classifier_params:
+                gr.Markdown("### Classifier Parameters")
+                gr.Markdown("Classifies responses based on formality, sentiment, and complexity")
+            # Function to update parameter visibility based on selected analysis
+            def update_params_visibility(selected):
+                return {
+                    topic_params: gr.update(visible=selected == "Topic Modeling"),
+                    ngram_params: gr.update(visible=selected == "N-gram Analysis"),
+                    bias_params: gr.update(visible=selected == "Bias Detection"),
+                    classifier_params: gr.update(visible=selected == "Classifier"),
+                    ngram_n: gr.update(visible=selected == "N-gram Analysis"),
+                    ngram_top: gr.update(visible=selected == "N-gram Analysis"),
+                    topic_count: gr.update(visible=selected == "Topic Modeling"),
+                    bow_top_slider: gr.update(visible=selected == "Bag of Words")
+                }
+            # Set up event handler for analysis selection
+            analysis_options.change(
+                fn=update_params_visibility,
+                inputs=[analysis_options],
+                outputs=[
+                    topic_params,
+                    ngram_params,
+                    bias_params,
+                    classifier_params,
+                    ngram_n,
+                    ngram_top,
+                    topic_count,
+                    bow_top_slider
+                ]
+            )
+        # Run analysis button
+        run_analysis_btn = gr.Button("Run Analysis", variant="primary", size="large")
+        # Analysis output area - hidden JSON component to store raw results
+        analysis_output = gr.JSON(label="Analysis Results", visible=False)
+    # Return the components needed by app.py
+    return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count
 # Add the implementation of these helper functions
 def extract_important_words(text, top_n=20):
     return result
+# Process analysis request function
 def process_analysis_request(dataset, selected_analysis, parameters):
     """
     Process the analysis request based on the selected options.
         print(f"Using top_n value: {top_n}")  # Debug print
         # Perform Bag of Words analysis using the processor
         bow_results = compare_bow(
             [model1_response, model2_response],
             [model1_name, model2_name],
         if isinstance(ngram_size, str):
             ngram_size = int(ngram_size)
+        top_n = parameters.get("ngram_top", 15)
         if isinstance(top_n, str):
             top_n = int(top_n)
             topic_count = int(topic_count)
         try:
             topic_results = compare_topics(
                 texts_set_1=[model1_response],
                 texts_set_2=[model2_response],
                 model_names=[model1_name, model2_name])
             results["analyses"][prompt_text]["topic_modeling"] = topic_results
         except Exception as e:
             import traceback
             print(f"Topic modeling error: {str(e)}\n{traceback.format_exc()}")
     elif selected_analysis == "Classifier":
         # Perform classifier analysis
         results["analyses"][prompt_text]["classifier"] = {
             "models": [model1_name, model2_name],
             "classifications": {
             },
             "differences": compare_classifications(model1_response, model2_response)
         }
     else:
         # Unknown analysis type
     # Return both the analysis results and a placeholder for visualization data
     return results, None

visualization/__init__.py CHANGED Viewed

@@ -5,11 +5,9 @@ Visualization components for LLM Response Comparator
 from .bow_visualizer import process_and_visualize_analysis
 from .topic_visualizer import process_and_visualize_topic_analysis
 from .ngram_visualizer import process_and_visualize_ngram_analysis
-from .bias_visualizer import process_and_visualize_bias_analysis
 __all__ = [
     'process_and_visualize_analysis',
     'process_and_visualize_topic_analysis',
-    'process_and_visualize_ngram_analysis',
-    'process_and_visualize_bias_analysis'
-]

 from .bow_visualizer import process_and_visualize_analysis
 from .topic_visualizer import process_and_visualize_topic_analysis
 from .ngram_visualizer import process_and_visualize_ngram_analysis
 __all__ = [
     'process_and_visualize_analysis',
     'process_and_visualize_topic_analysis',
+    'process_and_visualize_ngram_analysis'
+]

visualization/bias_visualizer.py DELETED Viewed

@@ -1,233 +0,0 @@
-import gradio as gr
-import plotly.graph_objects as go
-import plotly.express as px
-import pandas as pd
-def create_bias_visualization(analysis_results):
-    """
-    Create visualizations for bias detection analysis results
-    Args:
-        analysis_results (dict): Analysis results from the bias detection
-    Returns:
-        list: List of gradio components with visualizations
-    """
-    output_components = []
-    # Check if we have valid results
-    if not analysis_results or "analyses" not in analysis_results:
-        return [gr.Markdown("No analysis results found.")]
-    # Process each prompt
-    for prompt, analyses in analysis_results["analyses"].items():
-        # Process Bias Detection analysis if available
-        if "bias_detection" in analyses:
-            bias_results = analyses["bias_detection"]
-            # Show models being compared
-            models = bias_results.get("models", [])
-            if len(models) >= 2:
-                output_components.append(gr.Markdown(f"### Bias Analysis: Comparing responses from {models[0]} and {models[1]}"))
-                # Check if there's an error
-                if "error" in bias_results:
-                    output_components.append(gr.Markdown(f"**Error in bias detection:** {bias_results['error']}"))
-                    continue
-                model1_name, model2_name = models[0], models[1]
-                # Comparative results
-                if "comparative" in bias_results:
-                    comparative = bias_results["comparative"]
-                    output_components.append(gr.Markdown("#### Comparative Bias Analysis"))
-                    # Create summary table
-                    summary_html = f"""
-                    <table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">
-                    <tr>
-                        <th style="border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f2f2f2;">Bias Category</th>
-                        <th style="border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f2f2f2;">{model1_name}</th>
-                        <th style="border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f2f2f2;">{model2_name}</th>
-                        <th style="border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f2f2f2;">Significant Difference?</th>
-                    </tr>
-                    """
-                    # Sentiment row
-                    if "sentiment" in comparative:
-                        sent_sig = comparative["sentiment"].get("significant", False)
-                        summary_html += f"""
-                        <tr>
-                            <td style="border: 1px solid #ddd; padding: 8px;">Sentiment Bias</td>
-                            <td style="border: 1px solid #ddd; padding: 8px;">{comparative["sentiment"].get(model1_name, "N/A").title()}</td>
-                            <td style="border: 1px solid #ddd; padding: 8px;">{comparative["sentiment"].get(model2_name, "N/A").title()}</td>
-                            <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold; color: {'red' if sent_sig else 'green'}">{"Yes" if sent_sig else "No"}</td>
-                        </tr>
-                        """
-                    # Partisan row
-                    if "partisan" in comparative:
-                        part_sig = comparative["partisan"].get("significant", False)
-                        summary_html += f"""
-                        <tr>
-                            <td style="border: 1px solid #ddd; padding: 8px;">Partisan Leaning</td>
-                            <td style="border: 1px solid #ddd; padding: 8px;">{comparative["partisan"].get(model1_name, "N/A").title()}</td>
-                            <td style="border: 1px solid #ddd; padding: 8px;">{comparative["partisan"].get(model2_name, "N/A").title()}</td>
-                            <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold; color: {'red' if part_sig else 'green'}">{"Yes" if part_sig else "No"}</td>
-                        </tr>
-                        """
-                    # Framing row
-                    if "framing" in comparative:
-                        frame_diff = comparative["framing"].get("different_frames", False)
-                        summary_html += f"""
-                        <tr>
-                            <td style="border: 1px solid #ddd; padding: 8px;">Dominant Frame</td>
-                            <td style="border: 1px solid #ddd; padding: 8px;">{comparative["framing"].get(model1_name, "N/A").title().replace('_', ' ')}</td>
-                            <td style="border: 1px solid #ddd; padding: 8px;">{comparative["framing"].get(model2_name, "N/A").title().replace('_', ' ')}</td>
-                            <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold; color: {'red' if frame_diff else 'green'}">{"Yes" if frame_diff else "No"}</td>
-                        </tr>
-                        """
-                    # Overall row
-                    if "overall" in comparative:
-                        overall_sig = comparative["overall"].get("significant_bias_difference", False)
-                        summary_html += f"""
-                        <tr>
-                            <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold;">Overall Bias Difference</td>
-                            <td colspan="2" style="border: 1px solid #ddd; padding: 8px; text-align: center;">{comparative["overall"].get("difference", 0):.2f} / 1.0</td>
-                            <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold; color: {'red' if overall_sig else 'green'}">{"Yes" if overall_sig else "No"}</td>
-                        </tr>
-                        """
-                    summary_html += "</table>"
-                    # Add the HTML table to the components
-                    output_components.append(gr.HTML(summary_html))
-                # Create detailed visualizations for each model if available
-                for model_name in [model1_name, model2_name]:
-                    if model_name in bias_results:
-                        model_data = bias_results[model_name]
-                        # Sentiment visualization
-                        if "sentiment" in model_data:
-                            sentiment = model_data["sentiment"]
-                            if "sentiment_scores" in sentiment:
-                                # Create sentiment score chart
-                                sentiment_df = pd.DataFrame({
-                                    'Score': [
-                                        sentiment["sentiment_scores"]["pos"],
-                                        sentiment["sentiment_scores"]["neg"],
-                                        sentiment["sentiment_scores"]["neu"]
-                                    ],
-                                    'Category': ['Positive', 'Negative', 'Neutral']
-                                })
-                                fig = px.bar(
-                                    sentiment_df,
-                                    x='Category',
-                                    y='Score',
-                                    title=f"Sentiment Analysis for {model_name}",
-                                    height=300
-                                )
-                                output_components.append(gr.Plot(value=fig))
-                        # Partisan leaning visualization
-                        if "partisan" in model_data:
-                            partisan = model_data["partisan"]
-                            if "liberal_count" in partisan and "conservative_count" in partisan:
-                                # Create partisan terms chart
-                                partisan_df = pd.DataFrame({
-                                    'Count': [partisan["liberal_count"], partisan["conservative_count"]],
-                                    'Category': ['Liberal Terms', 'Conservative Terms']
-                                })
-                                fig = px.bar(
-                                    partisan_df,
-                                    x='Category',
-                                    y='Count',
-                                    title=f"Partisan Term Usage for {model_name}",
-                                    color='Category',
-                                    color_discrete_map={
-                                        'Liberal Terms': 'blue',
-                                        'Conservative Terms': 'red'
-                                    },
-                                    height=300
-                                )
-                                output_components.append(gr.Plot(value=fig))
-                            # Show example partisan terms
-                            if "liberal_terms" in partisan or "conservative_terms" in partisan:
-                                lib_terms = ", ".join(partisan.get("liberal_terms", []))
-                                con_terms = ", ".join(partisan.get("conservative_terms", []))
-                                if lib_terms or con_terms:
-                                    terms_md = f"**Partisan Terms Used by {model_name}**\n\n"
-                                    if lib_terms:
-                                        terms_md += f"- Liberal terms: {lib_terms}\n"
-                                    if con_terms:
-                                        terms_md += f"- Conservative terms: {con_terms}\n"
-                                    output_components.append(gr.Markdown(terms_md))
-                        # Framing visualization
-                        if "framing" in model_data:
-                            framing = model_data["framing"]
-                            if "framing_distribution" in framing:
-                                # Create framing distribution chart
-                                frame_items = []
-                                for frame, value in framing["framing_distribution"].items():
-                                    frame_items.append({
-                                        'Frame': frame.replace('_', ' ').title(),
-                                        'Proportion': value
-                                    })
-                                frame_df = pd.DataFrame(frame_items)
-                                fig = px.pie(
-                                    frame_df,
-                                    values='Proportion',
-                                    names='Frame',
-                                    title=f"Issue Framing Distribution for {model_name}",
-                                    height=400
-                                )
-                                output_components.append(gr.Plot(value=fig))
-                            # Show example framing terms
-                            if "framing_examples" in framing:
-                                examples_md = f"**Example Framing Terms Used by {model_name}**\n\n"
-                                for frame, examples in framing["framing_examples"].items():
-                                    if examples:
-                                        examples_md += f"- {frame.replace('_', ' ').title()}: {', '.join(examples)}\n"
-                                output_components.append(gr.Markdown(examples_md))
-    # If no components were added, show a message
-    if len(output_components) <= 1:
-        output_components.append(gr.Markdown("No detailed bias detection analysis found in results."))
-    return output_components
-def process_and_visualize_bias_analysis(analysis_results):
-    """
-    Process the bias detection analysis results and create visualization components
-    Args:
-        analysis_results (dict): The analysis results
-    Returns:
-        list: List of gradio components for visualization
-    """
-    try:
-        print(f"Starting visualization of bias detection analysis results")
-        return create_bias_visualization(analysis_results)
-    except Exception as e:
-        import traceback
-        error_msg = f"Bias detection visualization error: {str(e)}\n{traceback.format_exc()}"
-        print(error_msg)
-        return [gr.Markdown(f"**Error during bias detection visualization:**\n\n```\n{error_msg}\n```")]

visualization/topic_visualizer.py CHANGED Viewed

@@ -1,16 +1,18 @@
 """
-Enhanced visualization for topic modeling analysis results
 """
 import gradio as gr
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
-import numpy as np
 def create_topic_visualization(analysis_results):
     """
-    Create enhanced visualizations for topic modeling analysis results
     Args:
         analysis_results (dict): Analysis results from the topic modeling analysis
@@ -31,127 +33,27 @@ def create_topic_visualization(analysis_results):
         if "topic_modeling" in analyses:
             topic_results = analyses["topic_modeling"]
-            # Check for errors first
-            if "error" in topic_results:
-                output_components.append(gr.Markdown(f"## ⚠️ Topic Modeling Error"))
-                output_components.append(gr.Markdown(f"Error: {topic_results['error']}"))
-                output_components.append(gr.Markdown("Try adjusting the number of topics or using longer text samples."))
-                continue
             # Show method and number of topics
             method = topic_results.get("method", "lda").upper()
             n_topics = topic_results.get("n_topics", 3)
-            # Check if n_topics was adjusted
-            if "adjusted_n_topics" in topic_results and topic_results["adjusted_n_topics"] != topic_results.get("original_n_topics", n_topics):
-                output_components.append(gr.Markdown(
-                    f"## Topic Modeling Analysis ({method}, {topic_results['adjusted_n_topics']} topics) " +
-                    f"*Adjusted from {topic_results['original_n_topics']} due to limited text content*"
-                ))
-                n_topics = topic_results["adjusted_n_topics"]
-            else:
-                output_components.append(gr.Markdown(f"## Topic Modeling Analysis ({method}, {n_topics} topics)"))
             # Show models being compared
             models = topic_results.get("models", [])
             if len(models) >= 2:
                 output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}"))
-                # Show topic quality metrics if available
-                if "coherence_scores" in topic_results:
-                    coherence_html = f"""
-                    <div style="margin: 20px 0; padding: 15px; background-color: #f8f9fa; border-radius: 5px;">
-                    <h4 style="margin-top: 0;">Topic Quality Metrics</h4>
-                    <table style="width: 100%; border-collapse: collapse;">
-                    <tr>
-                        <th style="text-align: left; padding: 8px; border-bottom: 1px solid #ddd;">Metric</th>
-                        <th style="text-align: center; padding: 8px; border-bottom: 1px solid #ddd;">{models[0]}</th>
-                        <th style="text-align: center; padding: 8px; border-bottom: 1px solid #ddd;">{models[1]}</th>
-                        <th style="text-align: center; padding: 8px; border-bottom: 1px solid #ddd;">Combined</th>
-                    </tr>
-                    <tr>
-                        <td style="padding: 8px; border-bottom: 1px solid #ddd;">Topic Coherence</td>
-                        <td style="text-align: center; padding: 8px; border-bottom: 1px solid #ddd;">
-                            {topic_results["coherence_scores"].get(models[0], 0):.2f}
-                        </td>
-                        <td style="text-align: center; padding: 8px; border-bottom: 1px solid #ddd;">
-                            {topic_results["coherence_scores"].get(models[1], 0):.2f}
-                        </td>
-                        <td style="text-align: center; padding: 8px; border-bottom: 1px solid #ddd;">
-                            {topic_results["coherence_scores"].get("combined", 0):.2f}
-                        </td>
-                    </tr>
-                    <tr>
-                        <td style="padding: 8px;">Topic Diversity</td>
-                        <td style="text-align: center; padding: 8px;">
-                            {topic_results["diversity_scores"].get(models[0], 0):.2f}
-                        </td>
-                        <td style="text-align: center; padding: 8px;">
-                            {topic_results["diversity_scores"].get(models[1], 0):.2f}
-                        </td>
-                        <td style="text-align: center; padding: 8px;">
-                            {topic_results["diversity_scores"].get("combined", 0):.2f}
-                        </td>
-                    </tr>
-                    </table>
-                    <p style="margin-bottom: 0; font-size: 0.9em; color: #666;">
-                        Higher coherence scores indicate more semantically coherent topics.<br>
-                        Higher diversity scores indicate less overlap between topics.
-                    </p>
-                    </div>
-                    """
-                    output_components.append(gr.HTML(coherence_html))
                 # Visualize topics
                 topics = topic_results.get("topics", [])
                 if topics:
                     output_components.append(gr.Markdown("### Discovered Topics"))
-                    # Create a topic word cloud using HTML/CSS for better visibility
                     for topic in topics:
                         topic_id = topic.get("id", 0)
                         words = topic.get("words", [])
                         weights = topic.get("weights", [])
-                        if words and weights and len(words) == len(weights):
-                            # Generate a word cloud-like div using HTML/CSS
-                            word_cloud_html = f"""
-                            <div style="margin-bottom: 25px;">
-                                <h4 style="margin-bottom: 10px;">Topic {topic_id+1}</h4>
-                                <div style="display: flex; flex-wrap: wrap; gap: 10px; background: #f9f9f9; padding: 15px; border-radius: 5px;">
-                            """
-                            # Sort words by weight for better visualization
-                            word_weight_pairs = sorted(zip(words, weights), key=lambda x: x[1], reverse=True)
-                            # Add each word with size based on weight
-                            for word, weight in word_weight_pairs:
-                                # Scale weight to a reasonable font size (min 14px, max 28px)
-                                font_size = 14 + min(14, round(weight * 30))
-                                # Color based on weight (darker = higher weight)
-                                color_intensity = max(0, min(90, int(100 - weight * 100)))
-                                color = f"hsl(210, 70%, {color_intensity}%)"
-                                word_cloud_html += f"""
-                                <span style="font-size: {font_size}px; color: {color}; margin: 3px;
-                                padding: 5px; border-radius: 3px; background: rgba(0,0,0,0.03);">
-                                {word}
-                                </span>
-                                """
-                            word_cloud_html += """
-                                </div>
-                            </div>
-                            """
-                            output_components.append(gr.HTML(word_cloud_html))
-                    # Add a proper bar chart visualization for topic words
-                    for topic in topics[:min(3, len(topics))]:  # Show charts for max 3 topics to avoid clutter
-                        topic_id = topic.get("id", 0)
-                        words = topic.get("words", [])
-                        weights = topic.get("weights", [])
                         if words and weights and len(words) == len(weights):
                             # Create dataframe for plotting
                             df = pd.DataFrame({
@@ -162,22 +64,12 @@ def create_topic_visualization(analysis_results):
                             # Sort by weight
                             df = df.sort_values('weight', ascending=False)
-                            # Limit to top N words for clarity
-                            df = df.head(10)
                             # Create bar chart
                             fig = px.bar(
-                                df, x='weight', y='word',
                                 title=f"Topic {topic_id+1} Top Words",
                                 labels={'word': 'Word', 'weight': 'Weight'},
-                                height=300,
-                                orientation='h'  # Horizontal bars
-                            )
-                            # Improve layout
-                            fig.update_layout(
-                                margin=dict(l=10, r=10, t=40, b=10),
-                                yaxis={'categoryorder': 'total ascending'}
                             )
                             output_components.append(gr.Plot(value=fig))
@@ -188,135 +80,66 @@ def create_topic_visualization(analysis_results):
                     output_components.append(gr.Markdown("### Topic Distribution by Model"))
                     # Create multi-model topic distribution comparison
-                    distribution_data = []
                     for model in models:
                         if model in model_topics:
                             distribution = model_topics[model]
-                            for i, weight in enumerate(distribution):
-                                distribution_data.append({
-                                    'Model': model,
-                                    'Topic': f"Topic {i+1}",
-                                    'Weight': weight
-                                })
-                    if distribution_data:
-                        df = pd.DataFrame(distribution_data)
-                        # Create grouped bar chart
-                        fig = px.bar(
-                            df, x='Topic', y='Weight', color='Model',
-                            barmode='group',
-                            title="Topic Distribution Comparison",
-                            height=400
-                        )
-                        output_components.append(gr.Plot(value=fig))
-                # Visualize topic differences as a heatmap
                 comparisons = topic_results.get("comparisons", {})
                 if comparisons:
-                    comparison_key = f"{models[0]} vs {models[1]}"
-                    if comparison_key in comparisons:
-                        output_components.append(gr.Markdown("### Topic Similarity Analysis"))
-                        # Get JS divergence
-                        js_divergence = comparisons[comparison_key].get("js_divergence", 0)
-                        # Create a divergence meter
-                        divergence_html = f"""
-                        <div style="margin: 20px 0; padding: 20px; background-color: #f8f9fa; border-radius: 5px; text-align: center;">
-                            <h4 style="margin-top: 0;">Topic Distribution Divergence</h4>
-                            <div style="display: flex; align-items: center; justify-content: center;">
-                                <div style="width: 300px; height: 40px; background: linear-gradient(to right, #1a9850, #ffffbf, #d73027); border-radius: 5px; position: relative; margin: 10px 0;">
-                                    <div style="position: absolute; height: 40px; width: 2px; background-color: #000; left: {min(300, max(0, js_divergence * 300))}px;"></div>
-                                </div>
-                            </div>
-                            <div style="display: flex; justify-content: space-between; width: 300px; margin: 0 auto;">
-                                <span>Similar (0.0)</span>
-                                <span>Different (1.0)</span>
-                            </div>
-                            <p style="margin-top: 10px; font-weight: bold;">Score: {js_divergence:.3f}</p>
-                            <p style="margin-bottom: 0; font-size: 0.9em; color: #666;">
-                                Jensen-Shannon Divergence measures the similarity between topic distributions.<br>
-                                Lower values indicate more similar topic distributions between models.
-                            </p>
-                        </div>
-                        """
-                        output_components.append(gr.HTML(divergence_html))
-                        # Create similarity matrix heatmap if available
-                        similarity_matrix = topic_results.get("similarity_matrix", [])
-                        if similarity_matrix and len(similarity_matrix) > 0:
-                            # Convert to format for heatmap
-                            z_data = similarity_matrix
-                            # Create heatmap
-                            fig = go.Figure(data=go.Heatmap(
-                                z=z_data,
-                                x=[f"{models[1]} Topic {i+1}" for i in range(len(similarity_matrix[0]))],
-                                y=[f"{models[0]} Topic {i+1}" for i in range(len(similarity_matrix))],
-                                colorscale='Viridis',
-                                showscale=True,
-                                colorbar=dict(title="Similarity")
                             ))
                             fig.update_layout(
-                                title="Topic Similarity Matrix",
-                                height=400,
-                                margin=dict(l=50, r=50, t=50, b=50)
                             )
                             output_components.append(gr.Plot(value=fig))
-                # Show best matching topics
-                matched_topics = topic_results.get("matched_topics", [])
-                if matched_topics:
-                    output_components.append(gr.Markdown("### Most Similar Topic Pairs"))
-                    # Create HTML table for matched topics
-                    matched_topics_html = """
-                    <div style="margin: 20px 0;">
-                    <table style="width: 100%; border-collapse: collapse;">
-                    <tr>
-                        <th style="padding: 8px; border-bottom: 2px solid #ddd; text-align: left;">Topic Pair</th>
-                        <th style="padding: 8px; border-bottom: 2px solid #ddd; text-align: left;">Top Words in Model 1</th>
-                        <th style="padding: 8px; border-bottom: 2px solid #ddd; text-align: left;">Top Words in Model 2</th>
-                        <th style="padding: 8px; border-bottom: 2px solid #ddd; text-align: center;">Similarity</th>
-                    </tr>
-                    """
-                    # Sort by similarity, highest first
-                    sorted_matches = sorted(matched_topics, key=lambda x: x['similarity'], reverse=True)
-                    for match in sorted_matches:
-                        # Format words with commas
-                        words1 = ", ".join(match["set1_topic_words"][:5])  # Show top 5 words
-                        words2 = ", ".join(match["set2_topic_words"][:5])  # Show top 5 words
-                        # Calculate color based on similarity (green for high, red for low)
-                        similarity = match["similarity"]
-                        color = f"hsl({int(120 * similarity)}, 70%, 50%)"
-                        matched_topics_html += f"""
-                        <tr>
-                            <td style="padding: 8px; border-bottom: 1px solid #ddd;">
-                                {models[0]} Topic {match['set1_topic_id']+1} ↔ {models[1]} Topic {match['set2_topic_id']+1}
-                            </td>
-                            <td style="padding: 8px; border-bottom: 1px solid #ddd;">{words1}</td>
-                            <td style="padding: 8px; border-bottom: 1px solid #ddd;">{words2}</td>
-                            <td style="padding: 8px; border-bottom: 1px solid #ddd; text-align: center; font-weight: bold; color: {color};">
-                                {similarity:.2f}
-                            </td>
-                        </tr>
-                        """
-                    matched_topics_html += """
-                    </table>
-                    </div>
-                    """
-                    output_components.append(gr.HTML(matched_topics_html))
     # If no components were added, show a message
     if len(output_components) <= 1:
@@ -337,15 +160,9 @@ def process_and_visualize_topic_analysis(analysis_results):
     """
     try:
         print(f"Starting visualization of topic modeling analysis results")
-        components = create_topic_visualization(analysis_results)
-        print(f"Completed topic modeling visualization with {len(components)} components")
-        return components
     except Exception as e:
         import traceback
         error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
         print(error_msg)
-        return [
-            gr.Markdown(f"**Error during topic modeling visualization:**"),
-            gr.Markdown(f"```\n{str(e)}\n```"),
-            gr.Markdown("Try adjusting the number of topics or using longer text inputs.")
-        ]

 """
+Visualization for topic modeling analysis results
 """
+from visualization.ngram_visualizer import create_ngram_visualization
 import gradio as gr
+import json
+import numpy as np
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 def create_topic_visualization(analysis_results):
     """
+    Create visualizations for topic modeling analysis results
     Args:
         analysis_results (dict): Analysis results from the topic modeling analysis
         if "topic_modeling" in analyses:
             topic_results = analyses["topic_modeling"]
             # Show method and number of topics
             method = topic_results.get("method", "lda").upper()
             n_topics = topic_results.get("n_topics", 3)
+            output_components.append(gr.Markdown(f"## Topic Modeling Analysis ({method}, {n_topics} topics)"))
             # Show models being compared
             models = topic_results.get("models", [])
             if len(models) >= 2:
                 output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}"))
                 # Visualize topics
                 topics = topic_results.get("topics", [])
                 if topics:
                     output_components.append(gr.Markdown("### Discovered Topics"))
                     for topic in topics:
                         topic_id = topic.get("id", 0)
                         words = topic.get("words", [])
                         weights = topic.get("weights", [])
+                        # Create topic word bar chart
                         if words and weights and len(words) == len(weights):
                             # Create dataframe for plotting
                             df = pd.DataFrame({
                             # Sort by weight
                             df = df.sort_values('weight', ascending=False)
                             # Create bar chart
                             fig = px.bar(
+                                df, x='word', y='weight',
                                 title=f"Topic {topic_id+1} Top Words",
                                 labels={'word': 'Word', 'weight': 'Weight'},
+                                height=300
                             )
                             output_components.append(gr.Plot(value=fig))
                     output_components.append(gr.Markdown("### Topic Distribution by Model"))
                     # Create multi-model topic distribution comparison
+                    fig = go.Figure()
                     for model in models:
                         if model in model_topics:
                             distribution = model_topics[model]
+                            fig.add_trace(go.Bar(
+                                x=[f"Topic {i+1}" for i in range(len(distribution))],
+                                y=distribution,
+                                name=model
+                            ))
+                    fig.update_layout(
+                        title="Topic Distributions Comparison",
+                        xaxis_title="Topic",
+                        yaxis_title="Weight",
+                        barmode='group',
+                        height=400
+                    )
+                    output_components.append(gr.Plot(value=fig))
+                # Visualize topic differences
                 comparisons = topic_results.get("comparisons", {})
                 if comparisons:
+                    output_components.append(gr.Markdown("### Topic Distribution Differences"))
+                    for comparison_key, comparison_data in comparisons.items():
+                        js_divergence = comparison_data.get("js_divergence", 0)
+                        topic_differences = comparison_data.get("topic_differences", [])
+                        output_components.append(gr.Markdown(
+                            f"**{comparison_key}** - Jensen-Shannon Divergence: {js_divergence:.4f}"
+                        ))
+                        if topic_differences:
+                            # Create DataFrame for plotting
+                            model1, model2 = comparison_key.split(" vs ")
+                            df_diff = pd.DataFrame(topic_differences)
+                            # Create bar chart for topic differences
+                            fig = go.Figure()
+                            fig.add_trace(go.Bar(
+                                x=[f"Topic {d['topic_id']+1}" for d in topic_differences],
+                                y=[d["model1_weight"] for d in topic_differences],
+                                name=model1
+                            ))
+                            fig.add_trace(go.Bar(
+                                x=[f"Topic {d['topic_id']+1}" for d in topic_differences],
+                                y=[d["model2_weight"] for d in topic_differences],
+                                name=model2
                             ))
                             fig.update_layout(
+                                title="Topic Weight Comparison",
+                                xaxis_title="Topic",
+                                yaxis_title="Weight",
+                                barmode='group',
+                                height=400
                             )
                             output_components.append(gr.Plot(value=fig))
     # If no components were added, show a message
     if len(output_components) <= 1:
     """
     try:
         print(f"Starting visualization of topic modeling analysis results")
+        return create_topic_visualization(analysis_results)
     except Exception as e:
         import traceback
         error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
         print(error_msg)
+        return [gr.Markdown(f"**Error during topic modeling visualization:**\n\n```\n{error_msg}\n```")]