Spaces:

RyanS974
/

525GradioApp

Sleeping

App Files Files Community

Ryan commited on Apr 23

Commit

d164098

1 Parent(s): 3ae6198

update

Browse files

Files changed (1) hide show

visualization/bow_visualizer.py +81 -144

visualization/bow_visualizer.py CHANGED Viewed

@@ -171,159 +171,96 @@ def process_and_visualize_analysis(analysis_results):
                         word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
                         components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
-                # Create a button to show visualizations
-                def show_bow_visualizations():
-                    visualization_components = []
-                    # Generate model-specific word frequency charts
-                    if "important_words" in bow_results:
-                        for model_name, words in bow_results.get("important_words", {}).items():
-                            df = pd.DataFrame(words)
-                            # Create bar chart for top words
-                            fig = px.bar(df.head(15), x='word', y='count',
-                                      title=f"Top Words Used by {model_name}",
-                                      labels={'word': 'Word', 'count': 'Frequency'},
-                                      height=400)
-                            # Improve layout
-                            fig.update_layout(
-                                xaxis_title="Word",
-                                yaxis_title="Frequency",
-                                xaxis={'categoryorder':'total descending'}
-                            )
-                            visualization_components.append(gr.Plot(value=fig))
-                    # Visualize differential words
-                    models = bow_results.get("models", [])
-                    diff_words = bow_results.get("differential_words", [])
-                    word_matrix = bow_results.get("word_count_matrix", {})
-                    if len(models) >= 2 and diff_words and word_matrix:
-                        model1, model2 = models[0], models[1]
-                        diff_data = []
-                        for word in diff_words[:15]:
-                            if word in word_matrix:
-                                counts = word_matrix[word]
-                                diff_data.append({
-                                    "word": word,
-                                    model1: counts.get(model1, 0),
-                                    model2: counts.get(model2, 0)
-                                })
-                        if diff_data:
-                            diff_df = pd.DataFrame(diff_data)
-                            # Create grouped bar chart
-                            fig = go.Figure()
-                            fig.add_trace(go.Bar(
-                                x=diff_df['word'],
-                                y=diff_df[model1],
-                                name=model1,
-                                marker_color='indianred'
-                            ))
-                            fig.add_trace(go.Bar(
-                                x=diff_df['word'],
-                                y=diff_df[model2],
-                                name=model2,
-                                marker_color='lightsalmon'
-                            ))
-                            fig.update_layout(
-                                title="Word Frequency Comparison",
-                                xaxis_title="Word",
-                                yaxis_title="Frequency",
-                                barmode='group',
-                                height=500
-                            )
-                            visualization_components.append(gr.Plot(value=fig))
-                    return visualization_components
-                # Add a button to show visualizations
-                show_button = gr.Button("📊 Show Word Frequency Visualizations")
-                visualization_container = gr.Column(visible=False)
-                def show_visualizations():
-                    return gr.Column(visible=True)
-                show_button.click(
-                    fn=show_visualizations,
-                    outputs=visualization_container
-                )
-                with visualization_container:
-                    # Generate model-specific word frequency charts
-                    if "important_words" in bow_results:
-                        for model_name, words in bow_results.get("important_words", {}).items():
-                            df = pd.DataFrame(words)
-                            # Create bar chart for top words
-                            fig = px.bar(df.head(15), x='word', y='count',
-                                      title=f"Top Words Used by {model_name}",
-                                      labels={'word': 'Word', 'count': 'Frequency'},
-                                      height=400)
-                            # Improve layout
-                            fig.update_layout(
-                                xaxis_title="Word",
-                                yaxis_title="Frequency",
-                                xaxis={'categoryorder':'total descending'}
-                            )
-                            gr.Plot(value=fig)
-                    # Visualize differential words
-                    models = bow_results.get("models", [])
-                    diff_words = bow_results.get("differential_words", [])
-                    word_matrix = bow_results.get("word_count_matrix", {})
-                    if len(models) >= 2 and diff_words and word_matrix:
-                        model1, model2 = models[0], models[1]
-                        diff_data = []
-                        for word in diff_words[:15]:
-                            if word in word_matrix:
-                                counts = word_matrix[word]
-                                diff_data.append({
-                                    "word": word,
-                                    model1: counts.get(model1, 0),
-                                    model2: counts.get(model2, 0)
-                                })
-                        if diff_data:
-                            diff_df = pd.DataFrame(diff_data)
-                            # Create grouped bar chart
-                            fig = go.Figure()
-                            fig.add_trace(go.Bar(
-                                x=diff_df['word'],
-                                y=diff_df[model1],
-                                name=model1,
-                                marker_color='indianred'
-                            ))
-                            fig.add_trace(go.Bar(
-                                x=diff_df['word'],
-                                y=diff_df[model2],
-                                name=model2,
-                                marker_color='lightsalmon'
-                            ))
-                            fig.update_layout(
-                                title="Word Frequency Comparison",
-                                xaxis_title="Word",
-                                yaxis_title="Frequency",
-                                barmode='group',
-                                height=500
-                            )
-                            gr.Plot(value=fig)
-                components.append(show_button)
-                components.append(visualization_container)
         if not components:
             components.append(gr.Markdown("No visualization components could be created from the analysis results."))

                         word_list = [f"{item['word']} ({item['count']})" for item in words[:10]]
                         components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}"))
+                # Generate and display visualizations directly
+                if "important_words" in bow_results:
+                    components.append(gr.Markdown("### Word Frequency Visualizations"))
+                    for model_name, words in bow_results.get("important_words", {}).items():
+                        print(f"Creating visualization for {model_name}")
+                        df = pd.DataFrame(words)
+                        # Create bar chart for top words
+                        fig = px.bar(df.head(15), x='word', y='count',
+                                  title=f"Top Words Used by {model_name}",
+                                  labels={'word': 'Word', 'count': 'Frequency'},
+                                  height=400)
+                        # Improve layout
+                        fig.update_layout(
+                            xaxis_title="Word",
+                            yaxis_title="Frequency",
+                            xaxis={'categoryorder':'total descending'}
+                        )
+                        components.append(gr.Plot(value=fig))
+                # Visualize differential words
+                models = bow_results.get("models", [])
+                diff_words = bow_results.get("differential_words", [])
+                word_matrix = bow_results.get("word_count_matrix", {})
+                if len(models) >= 2 and diff_words and word_matrix:
+                    model1, model2 = models[0], models[1]
+                    diff_data = []
+                    for word in diff_words[:15]:
+                        if word in word_matrix:
+                            counts = word_matrix[word]
+                            diff_data.append({
+                                "word": word,
+                                model1: counts.get(model1, 0),
+                                model2: counts.get(model2, 0)
+                            })
+                    if diff_data:
+                        diff_df = pd.DataFrame(diff_data)
+                        # Create grouped bar chart
+                        fig = go.Figure()
+                        fig.add_trace(go.Bar(
+                            x=diff_df['word'],
+                            y=diff_df[model1],
+                            name=model1,
+                            marker_color='indianred'
+                        ))
+                        fig.add_trace(go.Bar(
+                            x=diff_df['word'],
+                            y=diff_df[model2],
+                            name=model2,
+                            marker_color='lightsalmon'
+                        ))
+                        fig.update_layout(
+                            title="Word Frequency Comparison Between Models",
+                            xaxis_title="Word",
+                            yaxis_title="Frequency",
+                            barmode='group',
+                            height=500
+                        )
+                        components.append(gr.Plot(value=fig))
+            # Check for N-gram analysis
+            if "ngram_analysis" in analyses:
+                print("Processing N-gram visualization")
+                components.append(gr.Markdown("### N-gram Analysis"))
+                ngram_components = create_ngram_visualization(
+                    {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}}
+                )
+                # Skip the first component if it's a duplicate header
+                if len(ngram_components) > 1:
+                    components.extend(ngram_components[1:])
+            # Check for Topic Modeling analysis
+            if "topic_modeling" in analyses:
+                print("Processing Topic Modeling visualization")
+                components.append(gr.Markdown("### Topic Modeling Analysis"))
+                topic_components = process_and_visualize_topic_analysis(
+                    {"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}}
+                )
+                # Skip the first component if it's a duplicate header
+                if len(topic_components) > 1:
+                    components.extend(topic_components[1:])
         if not components:
             components.append(gr.Markdown("No visualization components could be created from the analysis results."))