Spaces:

RyanS974
/

525GradioApp

Sleeping

App Files Files Community

Ryan commited on Apr 21

Commit

cc57712

1 Parent(s): 9a806ac

update

Browse files

Files changed (3) hide show

.DS_Store +0 -0
app.py +21 -25
ui/analysis_screen.py +221 -41

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -559,31 +559,27 @@ def create_app():
                 outputs=[llm_analysis_status, llm_analysis_result]
             )
-        # Visibility update functions - unchanged
-        def update_visibility(viz_visible, status_visible):
-            # ...existing code...
-            # Run analysis with proper parameters
-            run_analysis_btn.click(
-                fn=run_analysis,
-                inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count],
-                outputs=[
-                    analysis_results_state,
-                    analysis_output,
-                    visualization_area_visible,
-                    analysis_title,
-                    prompt_title,
-                    models_compared,
-                    model1_title,
-                    model1_words,
-                    model2_title,
-                    model2_words,
-                    similarity_metrics_title,
-                    similarity_metrics,
-                    status_message_visible,
-                    status_message
-                ]
-            )
     return app

                 outputs=[llm_analysis_status, llm_analysis_result]
             )
+        # Run analysis with proper parameters
+        run_analysis_btn.click(
+            fn=run_analysis,
+            inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count],
+            outputs=[
+                analysis_results_state,
+                analysis_output,
+                visualization_area_visible,
+                analysis_title,
+                prompt_title,
+                models_compared,
+                model1_title,
+                model1_words,
+                model2_title,
+                model2_words,
+                similarity_metrics_title,
+                similarity_metrics,
+                status_message_visible,
+                status_message
+            ]
+        )
     return app

ui/analysis_screen.py CHANGED Viewed

@@ -3,13 +3,9 @@ import json
 from visualization.bow_visualizer import process_and_visualize_analysis
 # Import analysis modules
-from processors.topic_modeling import compare_topics  # Added import
 from processors.ngram_analysis import compare_ngrams
-# from processors.bias_detection import compare_bias
 from processors.bow_analysis import compare_bow
-# from processors.metrics import calculate_similarity
-# from processors.diff_highlighter import highlight_differences
-# Add this import at the top
 from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
 def create_analysis_screen():
@@ -31,8 +27,8 @@ def create_analysis_screen():
                     "N-gram Analysis",
                     "Topic Modeling",
                     "Bias Detection",
-                    "Classifier",  # New option for future development
-                    "LLM Analysis"  # New option for future development
                 ],
                 value="Bag of Words",  # Default selection
                 label="Select Analysis Type"
@@ -86,15 +82,10 @@ def create_analysis_screen():
                     label="Bias Detection Methods"
                 )
-            # Classifier parameters for future development
             with gr.Group(visible=False) as classifier_params:
                 gr.Markdown("### Classifier Parameters")
-                gr.Markdown("*Classifier options will be available in a future update*")
-            # LLM Analysis parameters for future development
-            with gr.Group(visible=False) as llm_params:
-                gr.Markdown("### LLM Analysis Parameters")
-                gr.Markdown("*LLM Analysis options will be available in a future update*")
             # Function to update parameter visibility based on selected analysis
             def update_params_visibility(selected):
@@ -103,7 +94,6 @@ def create_analysis_screen():
                     ngram_params: gr.update(visible=selected == "N-gram Analysis"),
                     bias_params: gr.update(visible=selected == "Bias Detection"),
                     classifier_params: gr.update(visible=selected == "Classifier"),
-                    llm_params: gr.update(visible=selected == "LLM Analysis"),
                     ngram_n: gr.update(visible=selected == "N-gram Analysis"),
                     ngram_top: gr.update(visible=selected == "N-gram Analysis"),
                     topic_count: gr.update(visible=selected == "Topic Modeling"),
@@ -119,7 +109,6 @@ def create_analysis_screen():
                     ngram_params,
                     bias_params,
                     classifier_params,
-                    llm_params,
                     ngram_n,
                     ngram_top,
                     topic_count,
@@ -136,6 +125,198 @@ def create_analysis_screen():
     # Return the components needed by app.py
     return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count
 # Process analysis request function
 def process_analysis_request(dataset, selected_analysis, parameters):
     """
@@ -172,41 +353,41 @@ def process_analysis_request(dataset, selected_analysis, parameters):
     # Process based on the selected analysis type
     if selected_analysis == "Bag of Words":
-        # Perform Bag of Words analysis
-        results["analyses"][prompt_text]["bag_of_words"] = {
-            "models": [model1_name, model2_name],
-            "important_words": {
-                model1_name: extract_important_words(model1_response, top_n=parameters.get("bow_top", 20)),
-                model2_name: extract_important_words(model2_response, top_n=parameters.get("bow_top", 20))
-            },
-            "comparisons": {
-                f"{model1_name} vs {model2_name}": calculate_text_similarity(model1_response, model2_response)
-            }
-        }
     elif selected_analysis == "N-gram Analysis":
         # Perform N-gram analysis
         ngram_size = parameters.get("ngram_n", 2)
         top_n = parameters.get("ngram_top", 15)
-        results["analyses"][prompt_text]["ngram_analysis"] = {
-            "models": [model1_name, model2_name],
-            "ngram_size": ngram_size,
-            "important_ngrams": {
-                model1_name: extract_ngrams(model1_response, n=ngram_size, top_n=top_n),
-                model2_name: extract_ngrams(model2_response, n=ngram_size, top_n=top_n)
-            },
-            "comparisons": {
-                f"{model1_name} vs {model2_name}": compare_ngrams(model1_response, model2_response, n=ngram_size)
-            }
-        }
     elif selected_analysis == "Topic Modeling":
         # Perform topic modeling analysis
         topic_count = parameters.get("topic_count", 3)
         try:
-            topic_results = perform_topic_modeling(
                 [model1_response, model2_response],
                 model_names=[model1_name, model2_name],
                 n_topics=topic_count
@@ -223,7 +404,7 @@ def process_analysis_request(dataset, selected_analysis, parameters):
             }
     elif selected_analysis == "Classifier":
-        # Perform classifier analysis (placeholder implementation)
         results["analyses"][prompt_text]["classifier"] = {
             "models": [model1_name, model2_name],
             "classifications": {
@@ -247,4 +428,3 @@ def process_analysis_request(dataset, selected_analysis, parameters):
     # Return both the analysis results and a placeholder for visualization data
     return results, None

 from visualization.bow_visualizer import process_and_visualize_analysis
 # Import analysis modules
+from processors.topic_modeling import compare_topics
 from processors.ngram_analysis import compare_ngrams
 from processors.bow_analysis import compare_bow
 from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
 def create_analysis_screen():
                     "N-gram Analysis",
                     "Topic Modeling",
                     "Bias Detection",
+                    "Classifier"
+                    # Removed "LLM Analysis" as requested
                 ],
                 value="Bag of Words",  # Default selection
                 label="Select Analysis Type"
                     label="Bias Detection Methods"
                 )
+            # Classifier parameters
             with gr.Group(visible=False) as classifier_params:
                 gr.Markdown("### Classifier Parameters")
+                gr.Markdown("Classifies responses based on formality, sentiment, and complexity")
             # Function to update parameter visibility based on selected analysis
             def update_params_visibility(selected):
                     ngram_params: gr.update(visible=selected == "N-gram Analysis"),
                     bias_params: gr.update(visible=selected == "Bias Detection"),
                     classifier_params: gr.update(visible=selected == "Classifier"),
                     ngram_n: gr.update(visible=selected == "N-gram Analysis"),
                     ngram_top: gr.update(visible=selected == "N-gram Analysis"),
                     topic_count: gr.update(visible=selected == "Topic Modeling"),
                     ngram_params,
                     bias_params,
                     classifier_params,
                     ngram_n,
                     ngram_top,
                     topic_count,
     # Return the components needed by app.py
     return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count
+# Add the implementation of these helper functions
+def extract_important_words(text, top_n=20):
+    """
+    Extract the most important words from a text.
+    Args:
+        text (str): Input text
+        top_n (int): Number of top words to return
+    Returns:
+        list: List of important words with their counts
+    """
+    # Import necessary modules
+    from collections import Counter
+    import re
+    import nltk
+    from nltk.corpus import stopwords
+    from nltk.tokenize import word_tokenize
+    # Make sure nltk resources are available
+    try:
+        stop_words = set(stopwords.words('english'))
+    except:
+        nltk.download('stopwords')
+        stop_words = set(stopwords.words('english'))
+    try:
+        tokens = word_tokenize(text.lower())
+    except:
+        nltk.download('punkt')
+        tokens = word_tokenize(text.lower())
+    # Remove stopwords and non-alphabetic tokens
+    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words and len(word) > 2]
+    # Count word frequencies
+    word_counts = Counter(filtered_tokens)
+    # Get the top N words
+    top_words = word_counts.most_common(top_n)
+    # Format the result
+    result = [{"word": word, "count": count} for word, count in top_words]
+    return result
+def calculate_text_similarity(text1, text2):
+    """
+    Calculate similarity metrics between two texts.
+    Args:
+        text1 (str): First text
+        text2 (str): Second text
+    Returns:
+        dict: Similarity metrics
+    """
+    from processors.metrics import calculate_similarity
+    # Calculate similarity using the metrics module
+    metrics = calculate_similarity(text1, text2)
+    # Add common word count
+    from collections import Counter
+    import nltk
+    from nltk.corpus import stopwords
+    # Make sure nltk resources are available
+    try:
+        stop_words = set(stopwords.words('english'))
+    except:
+        nltk.download('stopwords')
+        stop_words = set(stopwords.words('english'))
+    # Simple tokenization and filtering
+    words1 = set([w.lower() for w in nltk.word_tokenize(text1)
+                if w.isalpha() and w.lower() not in stop_words])
+    words2 = set([w.lower() for w in nltk.word_tokenize(text2)
+                if w.isalpha() and w.lower() not in stop_words])
+    # Calculate common words
+    common_words = words1.intersection(words2)
+    # Add to metrics
+    metrics["common_word_count"] = len(common_words)
+    return metrics
+def extract_ngrams(text, n=2, top_n=10):
+    """
+    Extract the most common n-grams from text.
+    Args:
+        text (str): Input text
+        n (int or str): Size of n-grams
+        top_n (int): Number of top n-grams to return
+    Returns:
+        list: List of important n-grams with their counts
+    """
+    import nltk
+    from nltk.util import ngrams
+    from collections import Counter
+    # Convert n to int if it's a string
+    if isinstance(n, str):
+        n = int(n)
+    # Make sure nltk resources are available
+    try:
+        tokens = nltk.word_tokenize(text.lower())
+    except:
+        nltk.download('punkt')
+        tokens = nltk.word_tokenize(text.lower())
+    # Generate n-grams
+    n_grams = list(ngrams(tokens, n))
+    # Convert n-grams to strings for easier handling
+    n_gram_strings = [' '.join(gram) for gram in n_grams]
+    # Count n-gram frequencies
+    n_gram_counts = Counter(n_gram_strings)
+    # Get the top N n-grams
+    top_n_grams = n_gram_counts.most_common(top_n)
+    # Format the result
+    result = [{"ngram": ngram, "count": count} for ngram, count in top_n_grams]
+    return result
+def compare_ngrams(text1, text2, n=2):
+    """
+    Compare n-grams between two texts.
+    Args:
+        text1 (str): First text
+        text2 (str): Second text
+        n (int or str): Size of n-grams
+    Returns:
+        dict: Comparison metrics
+    """
+    import nltk
+    from nltk.util import ngrams
+    from collections import Counter
+    # Convert n to int if it's a string
+    if isinstance(n, str):
+        n = int(n)
+    # Make sure nltk resources are available
+    try:
+        tokens1 = nltk.word_tokenize(text1.lower())
+        tokens2 = nltk.word_tokenize(text2.lower())
+    except:
+        nltk.download('punkt')
+        tokens1 = nltk.word_tokenize(text1.lower())
+        tokens2 = nltk.word_tokenize(text2.lower())
+    # Generate n-grams
+    n_grams1 = set([' '.join(gram) for gram in ngrams(tokens1, n)])
+    n_grams2 = set([' '.join(gram) for gram in ngrams(tokens2, n)])
+    # Calculate common n-grams
+    common_n_grams = n_grams1.intersection(n_grams2)
+    # Return comparison metrics
+    return {
+        "common_ngram_count": len(common_n_grams)
+    }
+def perform_topic_modeling(texts, model_names, n_topics=3):
+    """
+    Perform topic modeling on a list of texts.
+    Args:
+        texts (list): List of text documents
+        model_names (list): Names of the models
+        n_topics (int): Number of topics to extract
+    Returns:
+        dict: Topic modeling results
+    """
+    from processors.topic_modeling import compare_topics
+    # Use the topic modeling processor
+    result = compare_topics(texts, model_names, n_topics=n_topics)
+    return result
 # Process analysis request function
 def process_analysis_request(dataset, selected_analysis, parameters):
     """
     # Process based on the selected analysis type
     if selected_analysis == "Bag of Words":
+        # Perform Bag of Words analysis using the processor
+        bow_results = compare_bow(
+            [model1_response, model2_response],
+            [model1_name, model2_name],
+            top_n=parameters.get("bow_top", 25)
+        )
+        results["analyses"][prompt_text]["bag_of_words"] = bow_results
     elif selected_analysis == "N-gram Analysis":
         # Perform N-gram analysis
         ngram_size = parameters.get("ngram_n", 2)
+        if isinstance(ngram_size, str):
+            ngram_size = int(ngram_size)
         top_n = parameters.get("ngram_top", 15)
+        if isinstance(top_n, str):
+            top_n = int(top_n)
+        # Use the processor
+        ngram_results = compare_ngrams(
+            [model1_response, model2_response],
+            [model1_name, model2_name],
+            n=ngram_size,
+            top_n=top_n
+        )
+        results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
     elif selected_analysis == "Topic Modeling":
         # Perform topic modeling analysis
         topic_count = parameters.get("topic_count", 3)
+        if isinstance(topic_count, str):
+            topic_count = int(topic_count)
         try:
+            topic_results = compare_topics(
                 [model1_response, model2_response],
                 model_names=[model1_name, model2_name],
                 n_topics=topic_count
             }
     elif selected_analysis == "Classifier":
+        # Perform classifier analysis
         results["analyses"][prompt_text]["classifier"] = {
             "models": [model1_name, model2_name],
             "classifications": {
     # Return both the analysis results and a placeholder for visualization data
     return results, None