Spaces:

RyanS974
/

525GradioApp

Sleeping

App Files Files Community

Ryan commited on Apr 20

Commit

6528c77

1 Parent(s): 1b8fad7

update

Browse files

Files changed (5) hide show

app.py +13 -26
requirements.txt +7 -9
ui/analysis_screen.py +7 -3
visualizers/__init__.py +5 -6
visualizers/bow_visualizer.py +180 -0

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from ui.dataset_input import create_dataset_input, load_example_dataset
 from ui.analysis_screen import create_analysis_screen, process_analysis_request
 import nltk
 import os
 import json
@@ -95,56 +96,42 @@ def create_app():
         # Analysis Tab
         with gr.Tab("Analysis"):
-            # Use create_analysis_screen to get UI components
-            analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider = create_analysis_screen()
             # Define a helper function to extract parameter values and call process_analysis_request
             def run_analysis(dataset, selected_analyses, bow_top_value):
                 # Check if dataset exists
                 if not dataset or "entries" not in dataset or not dataset["entries"]:
-                    return {}, gr.update(visible=True, value={"error": "No dataset provided. Please create a dataset in the Dataset Input tab first."})
                 # Create parameters dictionary with the slider value
                 params = {"bow_top": bow_top_value}
                 # Call the process_analysis_request function with proper parameters
                 try:
-                    results, output = process_analysis_request(dataset, selected_analyses, params)
                     print(f"Analysis completed successfully")
-                    # Fix double-encoded JSON issue
-                    if isinstance(output, dict) and "value" in output:
-                        try:
-                            # Remove any extraneous whitespace and quotes
-                            json_str = output["value"].strip()
-                            # Check if the value is a string that looks like JSON
-                            if isinstance(json_str, str) and json_str.startswith("{") and json_str.endswith("}"):
-                                # Parse the first JSON string into a Python dictionary
-                                parsed_output = json.loads(json_str)
-                                # Return the cleaned data directly
-                                return results, gr.update(visible=True, value=parsed_output)
-                            else:
-                                return results, output
-                        except json.JSONDecodeError as e:
-                            print(f"JSON parsing error: {e}")
-                            return results, gr.update(visible=True, value={"error": f"Error parsing results: {str(e)}"})
-                    else:
-                        return results, output
                 except Exception as e:
                     import traceback
                     error_trace = traceback.format_exc()
                     print(f"Error in analysis: {e}")
                     print(f"Full traceback: {error_trace}")
-                    return {}, gr.update(visible=True, value={"error": f"Analysis error: {str(e)}"})
             # Run analysis with proper parameters
             run_analysis_btn.click(
                 fn=run_analysis,
                 inputs=[dataset_state, analysis_options, bow_top_slider],
-                outputs=[analysis_results_state, analysis_output]
             )
     return app

 import gradio as gr
 from ui.dataset_input import create_dataset_input, load_example_dataset
 from ui.analysis_screen import create_analysis_screen, process_analysis_request
+from visualization.bow_visualizer import process_and_visualize_analysis
 import nltk
 import os
 import json
         # Analysis Tab
         with gr.Tab("Analysis"):
+            # Use create_analysis_screen to get UI components including visualization container
+            analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, visualization_container = create_analysis_screen()
             # Define a helper function to extract parameter values and call process_analysis_request
             def run_analysis(dataset, selected_analyses, bow_top_value):
                 # Check if dataset exists
                 if not dataset or "entries" not in dataset or not dataset["entries"]:
+                    error_components = [gr.Markdown("❌ **Error:** No dataset provided. Please create a dataset in the Dataset Input tab first.")]
+                    return {}, gr.update(visible=False), gr.update(visible=True, value=error_components)
                 # Create parameters dictionary with the slider value
                 params = {"bow_top": bow_top_value}
                 # Call the process_analysis_request function with proper parameters
                 try:
+                    results, _ = process_analysis_request(dataset, selected_analyses, params)
                     print(f"Analysis completed successfully")
+                    # Process and visualize the results
+                    visualization_components = process_and_visualize_analysis(results)
+                    return results, gr.update(visible=False, value=results), gr.update(visible=True, value=visualization_components)
                 except Exception as e:
                     import traceback
                     error_trace = traceback.format_exc()
                     print(f"Error in analysis: {e}")
                     print(f"Full traceback: {error_trace}")
+                    error_components = [gr.Markdown(f"❌ **Error during analysis:** {str(e)}")]
+                    return {}, gr.update(visible=False), gr.update(visible=True, value=error_components)
             # Run analysis with proper parameters
             run_analysis_btn.click(
                 fn=run_analysis,
                 inputs=[dataset_state, analysis_options, bow_top_slider],
+                outputs=[analysis_results_state, analysis_output, visualization_container]
             )
     return app

requirements.txt CHANGED Viewed

@@ -1,9 +1,7 @@
-gradio>=3.50.2
-numpy>=1.24.3
-scikit-learn>=1.2.2
-matplotlib>=3.7.1
-nltk>=3.8.1
-scipy>=1.10.1
-pandas>=2.0.1
-markdown>=3.4.3
-requests>=2.31.0

+gradio>=4.0.0
+numpy>=1.20.0
+scikit-learn>=1.0.0
+nltk>=3.6.0
+pandas>=1.3.0
+plotly>=5.3.0
+matplotlib>=3.4.0

ui/analysis_screen.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 import json
 # Import analysis modules
 # Uncomment these when implemented
@@ -99,11 +100,14 @@ def create_analysis_screen():
         # Run analysis button
         run_analysis_btn = gr.Button("Run Analysis", variant="primary", size="large")
-        # Analysis output area
         analysis_output = gr.JSON(label="Analysis Results", visible=False)
     # Return the bow_top_slider directly so app.py can access it
-    return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider
 def process_analysis_request(dataset, selected_analyses, parameters):
     """
@@ -149,4 +153,4 @@ def process_analysis_request(dataset, selected_analyses, parameters):
     print("Analysis complete - results:", analysis_results)
     # Return results and update the output component
-    return analysis_results, gr.update(visible=True, value=json.dumps(analysis_results, indent=2))

 import gradio as gr
 import json
+from visualization.bow_visualizer import process_and_visualize_analysis
 # Import analysis modules
 # Uncomment these when implemented
         # Run analysis button
         run_analysis_btn = gr.Button("Run Analysis", variant="primary", size="large")
+        # Analysis output area - hidden JSON component to store raw results
         analysis_output = gr.JSON(label="Analysis Results", visible=False)
+        # Visualization components container
+        visualization_container = gr.Column(visible=False)
     # Return the bow_top_slider directly so app.py can access it
+    return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, visualization_container
 def process_analysis_request(dataset, selected_analyses, parameters):
     """
     print("Analysis complete - results:", analysis_results)
     # Return results and update the output component
+    return analysis_results, gr.update(visible=False, value=analysis_results)  # Hide the raw JSON

visualizers/__init__.py CHANGED Viewed

@@ -1,8 +1,7 @@
-# processors/__init__.py
-# Empty file to make the directory a Python package
-# ui/__init__.py
-# Empty file to make the directory a Python package
-# utils/__init__.py
-# Empty file to make the directory a Python package

+"""
+Visualization components for LLM Response Comparator
+"""
+from .bow_visualizer import process_and_visualize_analysis
+__all__ = ['process_and_visualize_analysis']

visualizers/bow_visualizer.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import gradio as gr
+import json
+import numpy as np
+import plotly.graph_objects as go
+import plotly.express as px
+from plotly.subplots import make_subplots
+import pandas as pd
+from difflib import SequenceMatcher
+def create_bow_visualization(analysis_results):
+    """
+    Create visualizations for bag of words analysis results
+    Args:
+        analysis_results (dict): Analysis results from the bow analysis
+    Returns:
+        list: List of gradio components with visualizations
+    """
+    # Parse analysis results if it's a string
+    if isinstance(analysis_results, str):
+        try:
+            results = json.loads(analysis_results)
+        except json.JSONDecodeError:
+            return [gr.Markdown("Error parsing analysis results.")]
+    else:
+        results = analysis_results
+    output_components = []
+    # Check if we have valid results
+    if not results or "analyses" not in results:
+        return [gr.Markdown("No analysis results found.")]
+    # Process each prompt
+    for prompt, analyses in results["analyses"].items():
+        output_components.append(gr.Markdown(f"## Analysis of Prompt: \"{prompt}\""))
+        # Process Bag of Words analysis if available
+        if "bag_of_words" in analyses:
+            bow_results = analyses["bag_of_words"]
+            # Show models being compared
+            models = bow_results.get("models", [])
+            if len(models) >= 2:
+                output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}"))
+                # Get important words for each model
+                important_words = bow_results.get("important_words", {})
+                # Prepare data for plotting important words
+                if important_words:
+                    for model_name, words in important_words.items():
+                        df = pd.DataFrame(words)
+                        # Create bar chart for top words
+                        fig = px.bar(df, x='word', y='count',
+                                     title=f"Top Words Used by {model_name}",
+                                     labels={'word': 'Word', 'count': 'Frequency'},
+                                     height=400)
+                        # Improve layout
+                        fig.update_layout(
+                            xaxis_title="Word",
+                            yaxis_title="Frequency",
+                            xaxis={'categoryorder':'total descending'}
+                        )
+                        output_components.append(gr.Plot(value=fig))
+                # Show comparison metrics
+                comparisons = bow_results.get("comparisons", {})
+                if comparisons:
+                    for comparison_key, metrics in comparisons.items():
+                        output_components.append(gr.Markdown(f"### Similarity Metrics for {comparison_key}"))
+                        # Format metrics for better display
+                        if "jaccard_similarity" in metrics:
+                            output_components.append(gr.Markdown(
+                                f"- **Jaccard Similarity**: {metrics['jaccard_similarity']:.2f} "
+                                f"(measures word overlap between responses)"
+                            ))
+                        if "cosine_similarity" in metrics:
+                            output_components.append(gr.Markdown(
+                                f"- **Cosine Similarity**: {metrics['cosine_similarity']:.2f} "
+                                f"(measures how similar the word frequency distributions are)"
+                            ))
+                        if "common_word_count" in metrics:
+                            output_components.append(gr.Markdown(
+                                f"- **Common Words**: {metrics['common_word_count']} words appear in both responses"
+                            ))
+                # Visualize differential words (words with biggest frequency difference)
+                diff_words = bow_results.get("differential_words", [])
+                word_matrix = bow_results.get("word_count_matrix", {})
+                if diff_words and word_matrix and len(diff_words) > 0:
+                    output_components.append(gr.Markdown("### Words with Biggest Frequency Differences"))
+                    # Create dataframe for plotting
+                    model1, model2 = models[0], models[1]
+                    diff_data = []
+                    for word in diff_words[:15]:  # Limit to top 15 for readability
+                        if word in word_matrix:
+                            counts = word_matrix[word]
+                            diff_data.append({
+                                "word": word,
+                                model1: counts.get(model1, 0),
+                                model2: counts.get(model2, 0)
+                            })
+                    if diff_data:
+                        diff_df = pd.DataFrame(diff_data)
+                        # Create grouped bar chart
+                        fig = go.Figure()
+                        fig.add_trace(go.Bar(
+                            x=diff_df['word'],
+                            y=diff_df[model1],
+                            name=model1,
+                            marker_color='indianred'
+                        ))
+                        fig.add_trace(go.Bar(
+                            x=diff_df['word'],
+                            y=diff_df[model2],
+                            name=model2,
+                            marker_color='lightsalmon'
+                        ))
+                        fig.update_layout(
+                            title="Word Frequency Comparison",
+                            xaxis_title="Word",
+                            yaxis_title="Frequency",
+                            barmode='group',
+                            height=500
+                        )
+                        output_components.append(gr.Plot(value=fig))
+    # If no components were added, show a message
+    if len(output_components) <= 1:
+        output_components.append(gr.Markdown("No detailed Bag of Words analysis found in results."))
+    return output_components
+def process_and_visualize_analysis(analysis_results):
+    """
+    Process analysis results and create visualizations
+    Args:
+        analysis_results (dict): Analysis results
+    Returns:
+        list: List of gradio components with visualizations
+    """
+    if not analysis_results:
+        return [gr.Markdown("No analysis results available. Please run an analysis first.")]
+    all_components = []
+    # Display the JSON output in a collapsible section for debugging
+    json_text = json.dumps(analysis_results, indent=2)
+    all_components.append(gr.Markdown("### Raw Analysis Results (Expandable)"))
+    all_components.append(gr.Markdown("<details><summary>Click to view raw JSON results</summary>\n\n```json\n" + json_text + "\n```\n\n</details>"))
+    # Check if bag of words analysis is present in any prompt's results
+    has_bow = False
+    for prompt_results in analysis_results.get("analyses", {}).values():
+        if "bag_of_words" in prompt_results:
+            has_bow = True
+            break
+    # Create visualizations for Bag of Words if present
+    if has_bow:
+        all_components.extend(create_bow_visualization(analysis_results))
+    return all_components