Spaces:

RyanS974
/

525GradioApp

Sleeping

App Files Files Community

Ryan commited on Apr 23

Commit

7731b47

1 Parent(s): 6334788

update

Browse files

Files changed (7) hide show

app.py +79 -4
bert_classifier_function.py +45 -0
processors/roberta_processor.py +246 -0
processors/text_classifiers.py +81 -0
requirements.txt +2 -0
visualization/__init__.py +3 -1
visualization/roberta_visualizer.py +240 -0

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import gradio as gr
 from ui.dataset_input import create_dataset_input, load_example_dataset
 from ui.analysis_screen import create_analysis_screen, process_analysis_request
 from visualization.bow_visualizer import process_and_visualize_analysis
 import nltk
 import os
 import json
@@ -51,7 +53,7 @@ def download_nltk_resources():
 def create_app():
     """
-    Create a streamlined Gradio app for dataset input and Bag of Words analysis.
     Returns:
         gr.Blocks: The Gradio application
@@ -60,6 +62,7 @@ def create_app():
         # Application state to share data between tabs
         dataset_state = gr.State({})
         analysis_results_state = gr.State({})
         # Dataset Input Tab
         with gr.Tab("Dataset Input"):
@@ -218,7 +221,7 @@ def create_app():
                             gr.update(visible=False),
                             gr.update(visible=False),
                             True,
-                            gr.update(visible=True, value=f"ℹ️ **{analyses['message']}**")
                         )
                     # Process based on the selected analysis type
@@ -539,8 +542,7 @@ def create_app():
                             gr.update(visible=False),
                             gr.update(visible=False),
                             gr.update(visible=False),
-                            gr.update(visible=False),
-                            True,
                             gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
                         )
@@ -583,7 +585,80 @@ def create_app():
                         True,  # status_message_visible
                         gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```")  # status_message
                     )
         # Add a Summary tab
         with gr.Tab("Summary"):
             gr.Markdown("## Analysis Summaries")

 import gradio as gr
 from ui.dataset_input import create_dataset_input, load_example_dataset
 from ui.analysis_screen import create_analysis_screen, process_analysis_request
+from ui.roberta_screen import create_roberta_screen, process_roberta_request
 from visualization.bow_visualizer import process_and_visualize_analysis
+from visualization.roberta_visualizer import process_and_visualize_sentiment_analysis
 import nltk
 import os
 import json
 def create_app():
     """
+    Create a streamlined Gradio app for dataset input and analysis.
     Returns:
         gr.Blocks: The Gradio application
         # Application state to share data between tabs
         dataset_state = gr.State({})
         analysis_results_state = gr.State({})
+        roberta_results_state = gr.State({})
         # Dataset Input Tab
         with gr.Tab("Dataset Input"):
                             gr.update(visible=False),
                             gr.update(visible=False),
                             True,
+                            gr.update(visible=True, value=f"ℹ️ **{analyses['message']}**")  # status_message
                         )
                     # Process based on the selected analysis type
                             gr.update(visible=False),
                             gr.update(visible=False),
                             gr.update(visible=False),
+                            True,  # status_message_visible
                             gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
                         )
                         True,  # status_message_visible
                         gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```")  # status_message
                     )
+        # RoBERTa Sentiment Analysis Tab (NEW)
+        with gr.Tab("RoBERTa Sentiment"):
+            # Create the RoBERTa analysis UI components
+            run_roberta_btn, roberta_output, sentence_level, visualization_style, visualization_container, roberta_status = create_roberta_screen()
+            # Container for visualization results
+            with gr.Column() as roberta_viz_container:
+                roberta_viz_components = []
+            # Function to run RoBERTa sentiment analysis
+            def run_roberta_analysis(dataset, sentence_level, visualization_style):
+                try:
+                    if not dataset or "entries" not in dataset or not dataset["entries"]:
+                        return (
+                            {},  # roberta_results_state
+                            True,  # status_message_visible
+                            gr.update(visible=True, value="❌ **Error:** No dataset loaded. Please create or load a dataset first."),  # status_message
+                            False,  # roberta_output visibility
+                            []  # empty visualization components
+                        )
+                    print(f"Running RoBERTa sentiment analysis with sentence-level={sentence_level}, style={visualization_style}")
+                    # Process the analysis request
+                    roberta_results = process_roberta_request(dataset, sentence_level, visualization_style)
+                    # Check if we have results
+                    if "error" in roberta_results:
+                        return (
+                            roberta_results,  # Store in state anyway for debugging
+                            True,  # status_message_visible
+                            gr.update(visible=True, value=f"❌ **Error:** {roberta_results['error']}"),  # status_message
+                            False,  # Hide raw output
+                            []  # empty visualization components
+                        )
+                    # Create visualization components
+                    viz_components = process_and_visualize_sentiment_analysis(roberta_results)
+                    return (
+                        roberta_results,  # roberta_results_state
+                        False,  # status_message_visible
+                        gr.update(visible=False),  # status_message
+                        False,  # roberta_output visibility (hide raw output)
+                        viz_components  # visualization components
+                    )
+                except Exception as e:
+                    import traceback
+                    error_msg = f"Error in RoBERTa analysis: {str(e)}\n{traceback.format_exc()}"
+                    print(error_msg)
+                    return (
+                        {"error": error_msg},  # roberta_results_state
+                        True,  # status_message_visible
+                        gr.update(visible=True, value=f"❌ **Error during RoBERTa analysis:**\n\n```\n{str(e)}\n```"),  # status_message
+                        False,  # Hide raw output
+                        []  # empty visualization components
+                    )
+            # Connect the run button to the analysis function
+            run_roberta_btn.click(
+                fn=run_roberta_analysis,
+                inputs=[dataset_state, sentence_level, visualization_style],
+                outputs=[
+                    roberta_results_state,
+                    gr.Checkbox(visible=False, value=False),  # Hidden checkbox for status visibility
+                    roberta_status,
+                    roberta_output,
+                    roberta_viz_container
+                ]
+            )
         # Add a Summary tab
         with gr.Tab("Summary"):
             gr.Markdown("## Analysis Summaries")

bert_classifier_function.py ADDED Viewed

	@@ -0,0 +1,45 @@

+def classify_with_transformer(text, task="sentiment", model_name="distilbert-base-uncased"):
+    """
+    Classify text using a pre-trained transformer model (BERT, RoBERTa, etc.)
+    Args:
+        text (str): Text to analyze
+        task (str): Classification task ('sentiment', 'emotion', etc.)
+        model_name (str): Name of the pre-trained model to use
+    Returns:
+        dict: Classification results with labels and scores
+    """
+    try:
+        from transformers import pipeline
+        # Map tasks to appropriate models if not specified
+        task_model_map = {
+            "sentiment": "distilbert-base-uncased-finetuned-sst-2-english",
+            "emotion": "j-hartmann/emotion-english-distilroberta-base",
+            "toxicity": "unitary/toxic-bert"
+        }
+        # Use mapped model if using default and task is in the map
+        if model_name == "distilbert-base-uncased" and task in task_model_map:
+            model_to_use = task_model_map[task]
+        else:
+            model_to_use = model_name
+        # Initialize the classification pipeline
+        classifier = pipeline(task, model=model_to_use)
+        # Get classification results
+        results = classifier(text)
+        # Format results based on return type (list or dict)
+        if isinstance(results, list):
+            if len(results) == 1:
+                return results[0]
+            return results
+        return results
+    except ImportError:
+        return {"error": "Required packages not installed. Please install transformers and torch."}
+    except Exception as e:
+        return {"error": f"Classification failed: {str(e)}"}

processors/roberta_processor.py ADDED Viewed

	@@ -0,0 +1,246 @@

+"""
+RoBERTa-based sentiment analysis for comparing LLM responses
+"""
+import torch
+import numpy as np
+from transformers import RobertaTokenizer, RobertaForSequenceClassification
+import nltk
+from nltk.tokenize import sent_tokenize
+# Global variables to store models once loaded
+ROBERTA_TOKENIZER = None
+ROBERTA_MODEL = None
+def ensure_nltk_resources():
+    """Make sure necessary NLTK resources are downloaded"""
+    try:
+        nltk.data.find('tokenizers/punkt')
+    except LookupError:
+        nltk.download('punkt', quiet=True)
+def load_roberta_model():
+    """
+    Load the RoBERTa model and tokenizer for sentiment analysis
+    Returns:
+        tuple: (tokenizer, model) for RoBERTa sentiment analysis
+    """
+    global ROBERTA_TOKENIZER, ROBERTA_MODEL
+    # Return cached model if already loaded
+    if ROBERTA_TOKENIZER is not None and ROBERTA_MODEL is not None:
+        return ROBERTA_TOKENIZER, ROBERTA_MODEL
+    print("Loading RoBERTa model and tokenizer...")
+    try:
+        # Load tokenizer and model for sentiment analysis
+        ROBERTA_TOKENIZER = RobertaTokenizer.from_pretrained('roberta-base')
+        ROBERTA_MODEL = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli')
+        return ROBERTA_TOKENIZER, ROBERTA_MODEL
+    except Exception as e:
+        print(f"Error loading RoBERTa model: {str(e)}")
+        # Return None values if loading fails
+        return None, None
+def analyze_sentiment_roberta(text):
+    """
+    Analyze sentiment using RoBERTa model
+    Args:
+        text (str): Text to analyze
+    Returns:
+        dict: Sentiment analysis results with label and scores
+    """
+    ensure_nltk_resources()
+    # Handle empty text
+    if not text or not text.strip():
+        return {
+            "label": "neutral",
+            "scores": {
+                "contradiction": 0.33,
+                "neutral": 0.34,
+                "entailment": 0.33
+            },
+            "sentiment_score": 0.0,
+            "sentence_scores": []
+        }
+    # Load model
+    tokenizer, model = load_roberta_model()
+    if tokenizer is None or model is None:
+        return {
+            "error": "Failed to load RoBERTa model",
+            "label": "neutral",
+            "scores": {
+                "contradiction": 0.33,
+                "neutral": 0.34,
+                "entailment": 0.33
+            },
+            "sentiment_score": 0.0
+        }
+    try:
+        # Set device
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        model.to(device)
+        # Process the whole text
+        encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
+        encoded_text = {k: v.to(device) for k, v in encoded_text.items()}
+        with torch.no_grad():
+            outputs = model(**encoded_text)
+            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        # Get prediction
+        contradiction_score = predictions[0, 0].item()
+        neutral_score = predictions[0, 1].item()
+        entailment_score = predictions[0, 2].item()
+        # Map to sentiment
+        # contradiction = negative, entailment = positive, with a scale
+        sentiment_score = (entailment_score - contradiction_score) * 2  # Scale from -2 to 2
+        # Determine sentiment label
+        if sentiment_score > 0.5:
+            label = "positive"
+        elif sentiment_score < -0.5:
+            label = "negative"
+        else:
+            label = "neutral"
+        # Analyze individual sentences if text is long enough
+        sentences = sent_tokenize(text)
+        sentence_scores = []
+        # Only process sentences if there are more than one and text is substantial
+        if len(sentences) > 1 and len(text) > 100:
+            for sentence in sentences:
+                if len(sentence.split()) >= 3:  # Only analyze meaningful sentences
+                    encoded_sentence = tokenizer(sentence, return_tensors='pt', truncation=True)
+                    encoded_sentence = {k: v.to(device) for k, v in encoded_sentence.items()}
+                    with torch.no_grad():
+                        outputs = model(**encoded_sentence)
+                        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+                    # Calculate sentence sentiment score
+                    sent_contradiction = predictions[0, 0].item()
+                    sent_neutral = predictions[0, 1].item()
+                    sent_entailment = predictions[0, 2].item()
+                    sent_score = (sent_entailment - sent_contradiction) * 2
+                    # Determine sentiment label for this sentence
+                    if sent_score > 0.5:
+                        sent_label = "positive"
+                    elif sent_score < -0.5:
+                        sent_label = "negative"
+                    else:
+                        sent_label = "neutral"
+                    sentence_scores.append({
+                        "text": sentence,
+                        "score": sent_score,
+                        "label": sent_label,
+                        "scores": {
+                            "contradiction": sent_contradiction,
+                            "neutral": sent_neutral,
+                            "entailment": sent_entailment
+                        }
+                    })
+        return {
+            "label": label,
+            "scores": {
+                "contradiction": contradiction_score,
+                "neutral": neutral_score,
+                "entailment": entailment_score
+            },
+            "sentiment_score": sentiment_score,
+            "sentence_scores": sentence_scores
+        }
+    except Exception as e:
+        import traceback
+        print(f"Error analyzing sentiment with RoBERTa: {str(e)}")
+        print(traceback.format_exc())
+        return {
+            "error": str(e),
+            "label": "neutral",
+            "scores": {
+                "contradiction": 0.33,
+                "neutral": 0.34,
+                "entailment": 0.33
+            },
+            "sentiment_score": 0.0
+        }
+def compare_sentiment_roberta(texts, model_names=None):
+    """
+    Compare sentiment between two texts using RoBERTa
+    Args:
+        texts (list): List of texts to compare
+        model_names (list): Names of models corresponding to texts
+    Returns:
+        dict: Comparative sentiment analysis results
+    """
+    # Set default model names if not provided
+    if model_names is None or len(model_names) < 2:
+        model_names = ["Model 1", "Model 2"]
+    # Handle case with fewer than 2 texts
+    if len(texts) < 2:
+        return {
+            "error": "Need at least 2 texts to compare",
+            "models": model_names[:len(texts)]
+        }
+    # Get sentiment analysis for each text
+    sentiment_results = []
+    for text in texts:
+        sentiment_results.append(analyze_sentiment_roberta(text))
+    # Create result dictionary
+    result = {
+        "models": model_names[:len(texts)],
+        "sentiment_analysis": {}
+    }
+    # Add individual model results
+    for i, model_name in enumerate(model_names[:len(texts)]):
+        result["sentiment_analysis"][model_name] = sentiment_results[i]
+    # Compare sentiment scores
+    if len(sentiment_results) >= 2:
+        model1_name, model2_name = model_names[0], model_names[1]
+        score1 = sentiment_results[0]["sentiment_score"]
+        score2 = sentiment_results[1]["sentiment_score"]
+        # Calculate difference and determine which is more positive/negative
+        difference = abs(score1 - score2)
+        result["comparison"] = {
+            "sentiment_difference": difference,
+            "significant_difference": difference > 0.5,  # Threshold for significant difference
+        }
+        if score1 > score2:
+            result["comparison"]["more_positive"] = model1_name
+            result["comparison"]["more_negative"] = model2_name
+            result["comparison"]["difference_direction"] = f"{model1_name} is more positive than {model2_name}"
+        elif score2 > score1:
+            result["comparison"]["more_positive"] = model2_name
+            result["comparison"]["more_negative"] = model1_name
+            result["comparison"]["difference_direction"] = f"{model2_name} is more positive than {model1_name}"
+        else:
+            result["comparison"]["equal_sentiment"] = True
+            result["comparison"]["difference_direction"] = f"{model1_name} and {model2_name} have similar sentiment"
+    return result

processors/text_classifiers.py CHANGED Viewed

@@ -149,4 +149,85 @@ def compare_classifications(text1, text2):
     if not results:
         results["Summary"] = "Both responses have similar writing characteristics"
     return results

     if not results:
         results["Summary"] = "Both responses have similar writing characteristics"
+    return results
+def classify_with_roberta(text, task="sentiment", model_name=None):
+    """
+    Classify text using a RoBERTa model from the dataset directory
+    Args:
+        text (str): Text to analyze
+        task (str): Classification task ('sentiment', 'toxicity', 'topic', 'person')
+        model_name (str, optional): Specific model to use, if None will use task-appropriate model
+    Returns:
+        dict: Classification results with labels and scores
+    """
+    try:
+        import torch
+        from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+        # Map tasks to appropriate pre-trained models
+        task_model_map = {
+            "sentiment": "cardiffnlp/twitter-roberta-base-sentiment",
+            "toxicity": "cardiffnlp/twitter-roberta-base-hate",
+            "topic": "facebook/bart-large-mnli",  # Zero-shot classification for topics
+            "person": "roberta-base"  # Default for person detection - could be fine-tuned
+        }
+        # Use mapped model if not specified
+        if model_name is None and task in task_model_map:
+            model_to_use = task_model_map[task]
+        elif model_name is not None:
+            model_to_use = model_name
+        else:
+            model_to_use = "roberta-base"
+        # Special handling for zero-shot topic classification
+        if task == "topic":
+            classifier = pipeline("zero-shot-classification", model=model_to_use)
+            topics = ["economy", "foreign policy", "healthcare", "environment", "immigration"]
+            results = classifier(text, topics, multi_label=False)
+            return {
+                "labels": results["labels"],
+                "scores": results["scores"]
+            }
+        else:
+            # Initialize the classification pipeline
+            classifier = pipeline("text-classification", model=model_to_use, return_all_scores=True)
+            # Get classification results
+            results = classifier(text)
+            # Format results for consistent output
+            if isinstance(results, list) and len(results) == 1:
+                results = results[0]
+            return {
+                "task": task,
+                "model": model_to_use,
+                "results": results
+            }
+    except ImportError:
+        return {"error": "Required packages not installed. Please install transformers and torch."}
+    except Exception as e:
+        return {"error": f"Classification failed: {str(e)}"}
+def analyze_dataset_with_roberta(dataset_texts, task="topic"):
+    """
+    Analyze a collection of dataset texts using RoBERTa models
+    Args:
+        dataset_texts (dict): Dictionary with keys as text identifiers and values as text content
+        task (str): Classification task to perform
+    Returns:
+        dict: Classification results keyed by text identifier
+    """
+    results = {}
+    for text_id, text_content in dataset_texts.items():
+        results[text_id] = classify_with_roberta(text_content, task=task)
     return results

requirements.txt CHANGED Viewed

@@ -5,3 +5,5 @@ nltk>=3.6.0
 pandas>=1.3.0
 plotly>=5.3.0
 matplotlib>=3.4.0

 pandas>=1.3.0
 plotly>=5.3.0
 matplotlib>=3.4.0
+transformers>=4.15.0
+torch>=1.9.0

visualization/__init__.py CHANGED Viewed

@@ -6,10 +6,12 @@ from .bow_visualizer import process_and_visualize_analysis
 from .topic_visualizer import process_and_visualize_topic_analysis
 from .ngram_visualizer import process_and_visualize_ngram_analysis
 from .bias_visualizer import process_and_visualize_bias_analysis
 __all__ = [
     'process_and_visualize_analysis',
     'process_and_visualize_topic_analysis',
     'process_and_visualize_ngram_analysis',
-    'process_and_visualize_bias_analysis'
 ]

 from .topic_visualizer import process_and_visualize_topic_analysis
 from .ngram_visualizer import process_and_visualize_ngram_analysis
 from .bias_visualizer import process_and_visualize_bias_analysis
+from .roberta_visualizer import process_and_visualize_sentiment_analysis
 __all__ = [
     'process_and_visualize_analysis',
     'process_and_visualize_topic_analysis',
     'process_and_visualize_ngram_analysis',
+    'process_and_visualize_bias_analysis',
+    'process_and_visualize_sentiment_analysis'
 ]

visualization/roberta_visualizer.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Visualization components for RoBERTa sentiment analysis
+"""
+import gradio as gr
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import numpy as np
+import json
+def create_sentiment_visualization(analysis_results):
+    """
+    Create visualizations for RoBERTa sentiment analysis results
+    Args:
+        analysis_results (dict): Analysis results from the sentiment analysis
+    Returns:
+        list: List of gradio components with visualizations
+    """
+    output_components = []
+    # Check if we have valid results
+    if not analysis_results or "analyses" not in analysis_results:
+        return [gr.Markdown("No analysis results found.")]
+    # Process each prompt
+    for prompt, analyses in analysis_results["analyses"].items():
+        output_components.append(gr.Markdown(f"## Analysis of Prompt: \"{prompt[:100]}{'...' if len(prompt) > 100 else ''}\""))
+        # Process RoBERTa sentiment analysis if available
+        if "roberta_sentiment" in analyses:
+            sentiment_results = analyses["roberta_sentiment"]
+            # Check if there's an error
+            if "error" in sentiment_results:
+                output_components.append(gr.Markdown(f"**Error in sentiment analysis:** {sentiment_results['error']}"))
+                continue
+            # Show models being compared
+            models = sentiment_results.get("models", [])
+            if len(models) >= 2:
+                output_components.append(gr.Markdown(f"### RoBERTa Sentiment Analysis: Comparing {models[0]} and {models[1]}"))
+                # Create a sentiment comparison chart
+                sa_data = sentiment_results.get("sentiment_analysis", {})
+                if sa_data and len(models) >= 2:
+                    # Extract sentiment scores and labels for comparison
+                    model_data = []
+                    for model_name in models:
+                        if model_name in sa_data:
+                            model_result = sa_data[model_name]
+                            model_data.append({
+                                "model": model_name,
+                                "sentiment_score": model_result.get("sentiment_score", 0),
+                                "label": model_result.get("label", "neutral"),
+                                "contradiction": model_result.get("scores", {}).get("contradiction", 0),
+                                "neutral": model_result.get("scores", {}).get("neutral", 0),
+                                "entailment": model_result.get("scores", {}).get("entailment", 0)
+                            })
+                    if model_data:
+                        df = pd.DataFrame(model_data)
+                        # Create gauge chart for sentiment scores
+                        fig = go.Figure()
+                        # Add gauge for each model
+                        for i, row in df.iterrows():
+                            # Set color based on sentiment
+                            color = "green" if row["sentiment_score"] > 0.5 else "red" if row["sentiment_score"] < -0.5 else "gray"
+                            fig.add_trace(go.Indicator(
+                                mode="gauge+number",
+                                value=row["sentiment_score"],
+                                title={"text": f"{row['model']}<br><span style='font-size:0.8em;color:{color}'>{row['label'].capitalize()}</span>"},
+                                gauge={
+                                    "axis": {"range": [-2, 2], "tickmode": "array", "tickvals": [-2, -1, 0, 1, 2],
+                                             "ticktext": ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"]},
+                                    "bar": {"color": color},
+                                    "threshold": {
+                                        "line": {"color": "black", "width": 2},
+                                        "thickness": 0.75,
+                                        "value": row["sentiment_score"]
+                                    },
+                                    "steps": [
+                                        {"range": [-2, -0.5], "color": "rgba(255, 0, 0, 0.2)"},
+                                        {"range": [-0.5, 0.5], "color": "rgba(128, 128, 128, 0.2)"},
+                                        {"range": [0.5, 2], "color": "rgba(0, 128, 0, 0.2)"}
+                                    ]
+                                },
+                                domain={"row": 0, "column": i}
+                            ))
+                        # Layout adjustments
+                        fig.update_layout(
+                            title="Sentiment Score Comparison",
+                            grid={"rows": 1, "columns": len(df), "pattern": "independent"},
+                            height=300,
+                            margin=dict(t=70, b=30, l=30, r=30)
+                        )
+                        output_components.append(gr.Plot(value=fig))
+                        # Create detailed scores visualization
+                        fig2 = make_subplots(rows=1, cols=len(df),
+                                           subplot_titles=[f"{row['model']} Detailed Scores" for i, row in df.iterrows()])
+                        for i, row in df.iterrows():
+                            fig2.add_trace(
+                                go.Bar(
+                                    x=["Contradiction (Negative)", "Neutral", "Entailment (Positive)"],
+                                    y=[row["contradiction"], row["neutral"], row["entailment"]],
+                                    marker_color=["rgba(255, 0, 0, 0.6)", "rgba(128, 128, 128, 0.6)", "rgba(0, 128, 0, 0.6)"]
+                                ),
+                                row=1, col=i+1
+                            )
+                        fig2.update_layout(
+                            title="RoBERTa Classification Scores",
+                            showlegend=False,
+                            height=350,
+                            margin=dict(t=70, b=30, l=30, r=30)
+                        )
+                        output_components.append(gr.Plot(value=fig2))
+                # Display comparison summary
+                if "comparison" in sentiment_results:
+                    comparison = sentiment_results["comparison"]
+                    summary_html = """
+                    <div style="margin: 20px 0; padding: 15px; background-color: #f8f9fa; border-radius: 5px;">
+                        <h4 style="margin-top: 0;">Sentiment Comparison Summary</h4>
+                    """
+                    # Add difference direction
+                    if "difference_direction" in comparison:
+                        summary_html += f"""
+                        <p style="font-weight: 500; margin-bottom: 10px;">
+                            {comparison["difference_direction"]}
+                        </p>
+                        """
+                    # Add significance info
+                    if "significant_difference" in comparison:
+                        color = "red" if comparison["significant_difference"] else "green"
+                        significance = "Significant" if comparison["significant_difference"] else "Minor"
+                        summary_html += f"""
+                        <p>
+                            <span style="font-weight: bold; color: {color};">{significance} difference</span> in sentiment
+                            (difference score: {comparison.get("sentiment_difference", 0):.2f})
+                        </p>
+                        """
+                    summary_html += "</div>"
+                    output_components.append(gr.HTML(summary_html))
+                # Display sentence-level sentiment analysis for both responses
+                model_sentences = {}
+                for model_name in models:
+                    if model_name in sa_data and "sentence_scores" in sa_data[model_name] and sa_data[model_name]["sentence_scores"]:
+                        model_sentences[model_name] = sa_data[model_name]["sentence_scores"]
+                if model_sentences and any(len(sentences) > 0 for sentences in model_sentences.values()):
+                    output_components.append(gr.Markdown("### Sentence-Level Sentiment Analysis"))
+                    for model_name, sentences in model_sentences.items():
+                        if sentences:
+                            output_components.append(gr.Markdown(f"#### {model_name} Response Breakdown"))
+                            # Create HTML visualization for sentences with sentiment
+                            sentences_html = """
+                            <div style="margin-bottom: 20px;">
+                            """
+                            for i, sentence in enumerate(sentences):
+                                score = sentence.get("score", 0)
+                                label = sentence.get("label", "neutral")
+                                text = sentence.get("text", "")
+                                # Skip very short sentences or empty text
+                                if len(text.split()) < 3:
+                                    continue
+                                # Color based on sentiment
+                                if label == "positive":
+                                    color = f"rgba(0, 128, 0, {min(1.0, abs(score) * 0.5)})"
+                                    border = "rgba(0, 128, 0, 0.3)"
+                                elif label == "negative":
+                                    color = f"rgba(255, 0, 0, {min(1.0, abs(score) * 0.5)})"
+                                    border = "rgba(255, 0, 0, 0.3)"
+                                else:
+                                    color = "rgba(128, 128, 128, 0.1)"
+                                    border = "rgba(128, 128, 128, 0.3)"
+                                sentences_html += f"""
+                                <div style="padding: 10px; margin-bottom: 10px; background-color: {color};
+                                            border-radius: 5px; border: 1px solid {border};">
+                                    <div style="display: flex; justify-content: space-between;">
+                                        <span>{text}</span>
+                                        <span style="margin-left: 10px; font-weight: bold;">
+                                            {score:.2f} ({label.capitalize()})
+                                        </span>
+                                    </div>
+                                </div>
+                                """
+                            sentences_html += "</div>"
+                            output_components.append(gr.HTML(sentences_html))
+    # If no components were added, show a message
+    if len(output_components) <= 1:
+        output_components.append(gr.Markdown("No detailed sentiment analysis found in results."))
+    return output_components
+def process_and_visualize_sentiment_analysis(analysis_results):
+    """
+    Process the sentiment analysis results and create visualization components
+    Args:
+        analysis_results (dict): The analysis results
+    Returns:
+        list: List of gradio components for visualization
+    """
+    try:
+        print(f"Starting visualization of sentiment analysis results")
+        components = create_sentiment_visualization(analysis_results)
+        return components
+    except Exception as e:
+        import traceback
+        error_msg = f"Sentiment visualization error: {str(e)}\n{traceback.format_exc()}"
+        print(error_msg)
+        return [gr.Markdown(f"**Error during sentiment visualization:**\n\n```\n{str(e)}\n```")]