Spaces:

RyanS974
/

525GradioApp

Sleeping

File size: 15,478 Bytes

import gradio as gr
import json
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
from difflib import SequenceMatcher

from visualization.ngram_visualizer import create_ngram_visualization
from visualization.topic_visualizer import process_and_visualize_topic_analysis  # Added import

def create_bow_visualization(analysis_results):
    """
    Create visualizations for bag of words analysis results
    
    Args:
        analysis_results (dict): Analysis results from the bow analysis
        
    Returns:
        list: List of gradio components with visualizations
    """
    # Parse analysis results if it's a string
    if isinstance(analysis_results, str):
        try:
            results = json.loads(analysis_results)
        except json.JSONDecodeError:
            return [gr.Markdown("Error parsing analysis results.")]
    else:
        results = analysis_results
    
    output_components = []
    
    # Check if we have valid results
    if not results or "analyses" not in results:
        return [gr.Markdown("No analysis results found.")]
    
    # Process each prompt
    for prompt, analyses in results["analyses"].items():
        output_components.append(gr.Markdown(f"## Analysis of Prompt: \"{prompt}\""))
        
        # Process Bag of Words analysis if available
        if "bag_of_words" in analyses:
            bow_results = analyses["bag_of_words"]
            
            # Show models being compared
            models = bow_results.get("models", [])
            if len(models) >= 2:
                output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}"))
                
                # Get important words for each model
                important_words = bow_results.get("important_words", {})
                
                # Prepare data for plotting important words
                if important_words:
                    for model_name, words in important_words.items():
                        df = pd.DataFrame(words)
                        
                        # Create bar chart for top words
                        fig = px.bar(df, x='word', y='count', 
                                     title=f"Top Words Used by {model_name}",
                                     labels={'word': 'Word', 'count': 'Frequency'},
                                     height=400)
                        
                        # Improve layout
                        fig.update_layout(
                            xaxis_title="Word",
                            yaxis_title="Frequency",
                            xaxis={'categoryorder':'total descending'}
                        )
                        
                        output_components.append(gr.Plot(value=fig))
                
                # Visualize differential words (words with biggest frequency difference)
                diff_words = bow_results.get("differential_words", [])
                word_matrix = bow_results.get("word_count_matrix", {})
                
                if diff_words and word_matrix and len(diff_words) > 0:
                    output_components.append(gr.Markdown("### Words with Biggest Frequency Differences"))
                    
                    # Create dataframe for plotting
                    model1, model2 = models[0], models[1]
                    diff_data = []
                    
                    for word in diff_words[:15]:  # Limit to top 15 for readability
                        if word in word_matrix:
                            counts = word_matrix[word]
                            diff_data.append({
                                "word": word,
                                model1: counts.get(model1, 0),
                                model2: counts.get(model2, 0)
                            })
                    
                    if diff_data:
                        diff_df = pd.DataFrame(diff_data)
                        
                        # Create grouped bar chart
                        fig = go.Figure()
                        fig.add_trace(go.Bar(
                            x=diff_df['word'],
                            y=diff_df[model1],
                            name=model1,
                            marker_color='indianred'
                        ))
                        fig.add_trace(go.Bar(
                            x=diff_df['word'],
                            y=diff_df[model2],
                            name=model2,
                            marker_color='lightsalmon'
                        ))
                        
                        fig.update_layout(
                            title="Word Frequency Comparison",
                            xaxis_title="Word",
                            yaxis_title="Frequency",
                            barmode='group',
                            height=500
                        )
                        
                        output_components.append(gr.Plot(value=fig))
    
    # If no components were added, show a message
    if len(output_components) <= 1:
        output_components.append(gr.Markdown("No detailed Bag of Words analysis found in results."))
    
    return output_components


# Then update the process_and_visualize_analysis function 
def process_and_visualize_analysis(analysis_results):
    """
    Process the analysis results and create visualization components

    Args:
        analysis_results (dict): The analysis results

    Returns:
        list: List of gradio components for visualization
    """
    try:
        print(f"Starting visualization of analysis results: {type(analysis_results)}")
        components = []

        if not analysis_results or "analyses" not in analysis_results:
            print("Warning: Empty or invalid analysis results")
            components.append(gr.Markdown("No analysis results to visualize."))
            return components

        # For each prompt in the analysis results
        for prompt, analyses in analysis_results.get("analyses", {}).items():
            print(f"Visualizing results for prompt: {prompt[:30]}...")
            components.append(gr.Markdown(f"## Analysis for Prompt:\n\"{prompt}\""))

            # Check for Bag of Words analysis
            if "bag_of_words" in analyses:
                print("Processing Bag of Words visualization")
                components.append(gr.Markdown("### Bag of Words Analysis"))
                bow_results = analyses["bag_of_words"]
                
                # Display models compared
                if "models" in bow_results:
                    models = bow_results["models"]
                    components.append(gr.Markdown(f"**Models compared**: {', '.join(models)}"))

                # Display important words for each model
                if "important_words" in bow_results:
                    components.append(gr.Markdown("#### Most Common Words by Model"))

                    for model, words in bow_results["important_words"].items():
                        print(f"Creating word list for model {model}")
                        word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
                        components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
                
                # Create a button to show visualizations
                def show_bow_visualizations():
                    visualization_components = []
                    
                    # Generate model-specific word frequency charts
                    if "important_words" in bow_results:
                        for model_name, words in bow_results.get("important_words", {}).items():
                            df = pd.DataFrame(words)
                            
                            # Create bar chart for top words
                            fig = px.bar(df.head(15), x='word', y='count', 
                                      title=f"Top Words Used by {model_name}",
                                      labels={'word': 'Word', 'count': 'Frequency'},
                                      height=400)
                            
                            # Improve layout
                            fig.update_layout(
                                xaxis_title="Word",
                                yaxis_title="Frequency",
                                xaxis={'categoryorder':'total descending'}
                            )
                            
                            visualization_components.append(gr.Plot(value=fig))
                    
                    # Visualize differential words
                    models = bow_results.get("models", [])
                    diff_words = bow_results.get("differential_words", [])
                    word_matrix = bow_results.get("word_count_matrix", {})
                    
                    if len(models) >= 2 and diff_words and word_matrix:
                        model1, model2 = models[0], models[1]
                        diff_data = []
                        
                        for word in diff_words[:15]:
                            if word in word_matrix:
                                counts = word_matrix[word]
                                diff_data.append({
                                    "word": word,
                                    model1: counts.get(model1, 0),
                                    model2: counts.get(model2, 0)
                                })
                        
                        if diff_data:
                            diff_df = pd.DataFrame(diff_data)
                            
                            # Create grouped bar chart
                            fig = go.Figure()
                            fig.add_trace(go.Bar(
                                x=diff_df['word'],
                                y=diff_df[model1],
                                name=model1,
                                marker_color='indianred'
                            ))
                            fig.add_trace(go.Bar(
                                x=diff_df['word'],
                                y=diff_df[model2],
                                name=model2,
                                marker_color='lightsalmon'
                            ))
                            
                            fig.update_layout(
                                title="Word Frequency Comparison",
                                xaxis_title="Word",
                                yaxis_title="Frequency",
                                barmode='group',
                                height=500
                            )
                            
                            visualization_components.append(gr.Plot(value=fig))
                    
                    return visualization_components
                
                # Add a button to show visualizations
                show_button = gr.Button("📊 Show Word Frequency Visualizations")
                visualization_container = gr.Column(visible=False)
                
                def show_visualizations():
                    return gr.Column(visible=True)
                
                show_button.click(
                    fn=show_visualizations,
                    outputs=visualization_container
                )
                
                with visualization_container:
                    # Generate model-specific word frequency charts
                    if "important_words" in bow_results:
                        for model_name, words in bow_results.get("important_words", {}).items():
                            df = pd.DataFrame(words)
                            
                            # Create bar chart for top words
                            fig = px.bar(df.head(15), x='word', y='count', 
                                      title=f"Top Words Used by {model_name}",
                                      labels={'word': 'Word', 'count': 'Frequency'},
                                      height=400)
                            
                            # Improve layout
                            fig.update_layout(
                                xaxis_title="Word",
                                yaxis_title="Frequency",
                                xaxis={'categoryorder':'total descending'}
                            )
                            
                            gr.Plot(value=fig)
                    
                    # Visualize differential words
                    models = bow_results.get("models", [])
                    diff_words = bow_results.get("differential_words", [])
                    word_matrix = bow_results.get("word_count_matrix", {})
                    
                    if len(models) >= 2 and diff_words and word_matrix:
                        model1, model2 = models[0], models[1]
                        diff_data = []
                        
                        for word in diff_words[:15]:
                            if word in word_matrix:
                                counts = word_matrix[word]
                                diff_data.append({
                                    "word": word,
                                    model1: counts.get(model1, 0),
                                    model2: counts.get(model2, 0)
                                })
                        
                        if diff_data:
                            diff_df = pd.DataFrame(diff_data)
                            
                            # Create grouped bar chart
                            fig = go.Figure()
                            fig.add_trace(go.Bar(
                                x=diff_df['word'],
                                y=diff_df[model1],
                                name=model1,
                                marker_color='indianred'
                            ))
                            fig.add_trace(go.Bar(
                                x=diff_df['word'],
                                y=diff_df[model2],
                                name=model2,
                                marker_color='lightsalmon'
                            ))
                            
                            fig.update_layout(
                                title="Word Frequency Comparison",
                                xaxis_title="Word",
                                yaxis_title="Frequency",
                                barmode='group',
                                height=500
                            )
                            
                            gr.Plot(value=fig)
                
                components.append(show_button)
                components.append(visualization_container)

        if not components:
            components.append(gr.Markdown("No visualization components could be created from the analysis results."))

        print(f"Visualization complete: generated {len(components)} components")
        return components
    except Exception as e:
        import traceback
        error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}"
        print(error_msg)
        return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]