Spaces:

RyanS974
/

525GradioApp

Build error

App Files Files Community

Ryan commited on Apr 24

Commit

769095a

1 Parent(s): 0071ad3

update

Browse files

Files changed (3) hide show

README.md +17 -0
app.py +88 -43
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -47,6 +47,22 @@ Once you have loaded a dataset, you now have four options:
 - Bias Detection
 - Classifier
 ## RoBERTa Sentiment
@@ -212,6 +228,7 @@ Limitations:
 ## Bias Detection
 # Contributions

 - Bias Detection
 - Classifier
+### Bag of Words
+### N-grams
+### Bias Detection
+### Classifier
 ## RoBERTa Sentiment
 ## Bias Detection
 # Contributions

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ import matplotlib.pyplot as plt
 import io
 import base64
 import datetime
 # Download necessary NLTK resources function remains unchanged
 def download_nltk_resources():
@@ -135,12 +136,11 @@ def create_app():
             status_message = gr.Markdown(visible=False)
             # Define a helper function to extract parameter values and run the analysis
-            def run_analysis(dataset, selected_analysis, ngram_n, topic_count, existing_log):
                 try:
                     if not dataset or "entries" not in dataset or not dataset["entries"]:
                         return (
                             {},  # analysis_results_state
-                            existing_log,  # no changes to user_analysis_log
                             False,  # analysis_output visibility
                             False,  # visualization_area_visible
                             gr.update(visible=False),  # analysis_title
@@ -169,44 +169,10 @@ def create_app():
                     # Process the analysis request - passing selected_analysis as a string
                     analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
-                    # NEW: Store the results in the user_analysis_log
-                    updated_log = existing_log.copy() if existing_log else {}
-                    # Get the prompt text for identifying this analysis
-                    prompt_text = None
-                    if analysis_results and "analyses" in analysis_results:
-                        prompt_text = list(analysis_results["analyses"].keys())[0] if analysis_results["analyses"] else None
-                    if prompt_text:
-                        # Initialize this prompt in the log if it doesn't exist
-                        if prompt_text not in updated_log:
-                            updated_log[prompt_text] = {}
-                        # Store the results for this analysis type
-                        if selected_analysis in ["Bag of Words", "N-gram Analysis", "Bias Detection", "Classifier"]:
-                            # Only store if the analysis was actually performed and has results
-                            analyses = analysis_results["analyses"][prompt_text]
-                            # Map the selected analysis to its key in the analyses dict
-                            analysis_key_map = {
-                                "Bag of Words": "bag_of_words",
-                                "N-gram Analysis": "ngram_analysis",
-                                "Bias Detection": "bias_detection",
-                                "Classifier": "classifier"
-                            }
-                            if analysis_key_map[selected_analysis] in analyses:
-                                # Store the specific analysis result
-                                updated_log[prompt_text][selected_analysis] = {
-                                    "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                                    "result": analyses[analysis_key_map[selected_analysis]]
-                                }
                     # If there's an error or no results
                     if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
                         return (
                             analysis_results,
-                            updated_log,  # Return the updated log
                             False,
                             False,
                             gr.update(visible=False),
@@ -251,7 +217,6 @@ def create_app():
                     if "message" in analyses:
                         return (
                             analysis_results,
-                            updated_log,  # Return the updated log
                             False,
                             False,
                             gr.update(visible=False),
@@ -349,7 +314,13 @@ def create_app():
                                 model1_title_visible = True
                                 model1_title_value = f"#### Top {size_name} Used by {model1_name}"
-                                ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in important_ngrams[model1_name][:10]]
                                 model1_words_visible = True
                                 model1_words_value = ", ".join(ngram_list)
@@ -357,7 +328,13 @@ def create_app():
                                 model2_title_visible = True
                                 model2_title_value = f"#### Top {size_name} Used by {model2_name}"
-                                ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in important_ngrams[model2_name][:10]]
                                 model2_words_visible = True
                                 model2_words_value = ", ".join(ngram_list)
@@ -374,6 +351,78 @@ def create_app():
                                     similarity_metrics_value = f"""
                                     - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
                                     """
                     # Check for Topic Modeling analysis
                     elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
@@ -575,7 +624,6 @@ def create_app():
                     if not visualization_area_visible:
                         return (
                             analysis_results,
-                            updated_log,  # Return the updated log
                             False,
                             False,
                             gr.update(visible=False),
@@ -586,7 +634,6 @@ def create_app():
                             gr.update(visible=False),
                             gr.update(visible=False),
                             gr.update(visible=False),
-                            gr.update(visible=False),
                             True,  # status_message_visible
                             gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
                         )
@@ -594,7 +641,6 @@ def create_app():
                     # Return all updated component values
                     return (
                         analysis_results,  # analysis_results_state
-                        updated_log,  # Return the updated log
                         False,  # analysis_output visibility
                         True,   # visualization_area_visible
                         gr.update(visible=True),  # analysis_title
@@ -617,7 +663,6 @@ def create_app():
                     return (
                         {"error": error_msg},  # analysis_results_state
-                        existing_log,  # Return unchanged log
                         True,  # analysis_output visibility (show raw JSON for debugging)
                         False,  # visualization_area_visible
                         gr.update(visible=False),

 import io
 import base64
 import datetime
+from PIL import Image
 # Download necessary NLTK resources function remains unchanged
 def download_nltk_resources():
             status_message = gr.Markdown(visible=False)
             # Define a helper function to extract parameter values and run the analysis
+            def run_analysis(dataset, selected_analysis, ngram_n, topic_count):
                 try:
                     if not dataset or "entries" not in dataset or not dataset["entries"]:
                         return (
                             {},  # analysis_results_state
                             False,  # analysis_output visibility
                             False,  # visualization_area_visible
                             gr.update(visible=False),  # analysis_title
                     # Process the analysis request - passing selected_analysis as a string
                     analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
                     # If there's an error or no results
                     if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
                         return (
                             analysis_results,
                             False,
                             False,
                             gr.update(visible=False),
                     if "message" in analyses:
                         return (
                             analysis_results,
                             False,
                             False,
                             gr.update(visible=False),
                                 model1_title_visible = True
                                 model1_title_value = f"#### Top {size_name} Used by {model1_name}"
+                                # Create a better formatted list of n-grams
+                                ngram_list = []
+                                for item in important_ngrams[model1_name][:10]:
+                                    ngram_text = item['ngram']
+                                    ngram_count = item['count']
+                                    ngram_list.append(f"**{ngram_text}** ({ngram_count})")
                                 model1_words_visible = True
                                 model1_words_value = ", ".join(ngram_list)
                                 model2_title_visible = True
                                 model2_title_value = f"#### Top {size_name} Used by {model2_name}"
+                                # Create a better formatted list of n-grams
+                                ngram_list = []
+                                for item in important_ngrams[model2_name][:10]:
+                                    ngram_text = item['ngram']
+                                    ngram_count = item['count']
+                                    ngram_list.append(f"**{ngram_text}** ({ngram_count})")
                                 model2_words_visible = True
                                 model2_words_value = ", ".join(ngram_list)
                                     similarity_metrics_value = f"""
                                     - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
                                     """
+                            # Create visualization using matplotlib (similar to Visuals tab)
+                            import matplotlib.pyplot as plt
+                            import io
+                            from PIL import Image
+                            # Create a new function to generate N-gram visualizations
+                            def generate_ngram_visualization(important_ngrams, model1_name, model2_name):
+                                plt.figure(figsize=(12, 6))
+                                # Process data for model 1
+                                model1_data = {}
+                                if model1_name in important_ngrams:
+                                    for item in important_ngrams[model1_name][:10]:
+                                        model1_data[item['ngram']] = item['count']
+                                # Process data for model 2
+                                model2_data = {}
+                                if model2_name in important_ngrams:
+                                    for item in important_ngrams[model2_name][:10]:
+                                        model2_data[item['ngram']] = item['count']
+                                # Plot for the first model
+                                plt.subplot(1, 2, 1)
+                                sorted_data1 = sorted(model1_data.items(), key=lambda x: x[1], reverse=True)[:10]
+                                terms1, counts1 = zip(*sorted_data1) if sorted_data1 else ([], [])
+                                # Create horizontal bar chart
+                                plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms1[::-1]], counts1[::-1])
+                                plt.xlabel('Frequency')
+                                plt.title(f'Top {size_name} Used by {model1_name}')
+                                plt.tight_layout()
+                                # Plot for the second model
+                                plt.subplot(1, 2, 2)
+                                sorted_data2 = sorted(model2_data.items(), key=lambda x: x[1], reverse=True)[:10]
+                                terms2, counts2 = zip(*sorted_data2) if sorted_data2 else ([], [])
+                                # Create horizontal bar chart
+                                plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms2[::-1]], counts2[::-1])
+                                plt.xlabel('Frequency')
+                                plt.title(f'Top {size_name} Used by {model2_name}')
+                                plt.tight_layout()
+                                # Save the plot to a bytes buffer
+                                buf = io.BytesIO()
+                                plt.savefig(buf, format='png', dpi=100)
+                                buf.seek(0)
+                                # Convert to PIL Image
+                                image = Image.open(buf)
+                                return image
+                            # Create the visualization
+                            try:
+                                viz_image = generate_ngram_visualization(important_ngrams, model1_name, model2_name)
+                                # Convert the image to a base64 string for embedding
+                                buffered = io.BytesIO()
+                                viz_image.save(buffered, format="PNG")
+                                img_str = base64.b64encode(buffered.getvalue()).decode()
+                                # Append the image to the metrics_value
+                                similarity_metrics_value += f"""
+                                <div style="margin-top: 20px;">
+                                <img src="data:image/png;base64,{img_str}" alt="N-gram visualization" style="max-width: 100%;">
+                                </div>
+                                """
+                                similarity_metrics_visible = True
+                            except Exception as viz_error:
+                                print(f"Visualization error: {viz_error}")
+                                # Handle the error gracefully - continue without the visualization
                     # Check for Topic Modeling analysis
                     elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
                     if not visualization_area_visible:
                         return (
                             analysis_results,
                             False,
                             False,
                             gr.update(visible=False),
                             gr.update(visible=False),
                             gr.update(visible=False),
                             gr.update(visible=False),
                             True,  # status_message_visible
                             gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
                         )
                     # Return all updated component values
                     return (
                         analysis_results,  # analysis_results_state
                         False,  # analysis_output visibility
                         True,   # visualization_area_visible
                         gr.update(visible=True),  # analysis_title
                     return (
                         {"error": error_msg},  # analysis_results_state
                         True,  # analysis_output visibility (show raw JSON for debugging)
                         False,  # visualization_area_visible
                         gr.update(visible=False),

requirements.txt CHANGED Viewed

@@ -7,3 +7,4 @@ plotly>=5.3.0
 matplotlib>=3.4.0
 transformers>=4.15.0
 torch>=1.9.0

 matplotlib>=3.4.0
 transformers>=4.15.0
 torch>=1.9.0
+pillow>=9.0.0