File size: 17,974 Bytes
4ebaf08
 
6528c77
4ebaf08
fd06f0b
cc57712
08f222a
4ebaf08
e815033
cc937dd
d7de222
 
 
 
 
4ebaf08
 
 
fe5be12
4ebaf08
 
da60688
4ebaf08
fe5be12
4ebaf08
fe5be12
4ebaf08
fe5be12
 
 
 
 
 
 
 
cc57712
 
fe5be12
 
 
 
 
 
 
 
 
 
 
da60688
 
08f222a
fe5be12
 
 
 
 
 
 
 
 
 
 
 
 
4ebaf08
da60688
fe5be12
 
da60688
fe5be12
bb4ec07
fe5be12
 
d7de222
 
4ebaf08
cc57712
fe5be12
 
cc57712
fe5be12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ebaf08
fe5be12
 
4ebaf08
fe5be12
 
 
da60688
 
4ebaf08
cc57712
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4798a1
 
cc57712
 
 
 
 
 
 
 
 
 
 
 
 
e4798a1
 
 
 
 
 
cc57712
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08f222a
a800293
4ebaf08
c435293
 
 
 
 
 
 
 
 
4ebaf08
d7de222
 
c435293
d7de222
c435293
 
 
 
 
 
 
 
d7de222
c435293
 
 
 
 
 
 
 
 
 
 
 
d7de222
 
 
 
 
 
 
cc57712
d7de222
a800293
d7de222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc937dd
d7de222
cc937dd
d7de222
 
 
 
 
cc937dd
d7de222
cc937dd
d7de222
 
 
 
 
 
cc937dd
d7de222
 
 
 
 
 
 
 
 
 
 
 
 
 
cc937dd
d7de222
cc937dd
d7de222
 
 
 
 
 
 
 
 
 
 
 
 
cc937dd
d7de222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c435293
 
d7de222
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
import gradio as gr
import json
from visualization.bow_visualizer import process_and_visualize_analysis

# Import analysis modules
from processors.topic_modeling import compare_topics
from processors.ngram_analysis import compare_ngrams
from processors.bow_analysis import compare_bow
from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
from processors.bias_detection import compare_bias
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('analysis_screen')

def create_analysis_screen():
    """
    Create the analysis options screen
    
    Returns:
        tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, ngram_n, topic_count)
    """
    with gr.Column() as analysis_screen:
        gr.Markdown("## Analysis Options")
        gr.Markdown("Select which analysis you want to run on the LLM responses.")
        
        # Change from CheckboxGroup to Radio for analysis selection
        with gr.Group():
            analysis_options = gr.Radio(
                choices=[
                    "Bag of Words",
                    "N-gram Analysis",
                    "Topic Modeling",
                    "Bias Detection",
                    "Classifier"
                    # Removed "LLM Analysis" as requested
                ],
                value="Bag of Words",  # Default selection
                label="Select Analysis Type"
            )
        
        # Create N-gram parameters accessible at top level
        ngram_n = gr.Radio(
            choices=["1", "2", "3"], value="2", 
            label="N-gram Size",
            visible=False
        )
        
        # Removed ngram_top slider
        
        # Create topic modeling parameter accessible at top level
        topic_count = gr.Slider(
            minimum=2, maximum=10, value=3, step=1,
            label="Number of Topics",
            visible=False
        )
        
        # Parameters for each analysis type
        with gr.Group() as analysis_params:
            # Topic modeling parameters
            with gr.Group(visible=False) as topic_params:
                gr.Markdown("### Topic Modeling Parameters")
                # We'll use the topic_count defined above
            
            # N-gram parameters group (using external ngram_n, removed ngram_top)
            with gr.Group(visible=False) as ngram_params:
                gr.Markdown("### N-gram Parameters")
                # We're already using ngram_n defined above
                
            # Bias detection parameters - simplified with no checkboxes
            with gr.Group(visible=False) as bias_params:
                gr.Markdown("### Bias Detection Parameters")
                gr.Markdown("Using partisan leaning bias detection and sentiment analysis")
                gr.Markdown("This analysis detects sentiment bias, partisan leaning, and issue framing patterns.")
            
            # Classifier parameters
            with gr.Group(visible=False) as classifier_params:
                gr.Markdown("### Classifier Parameters")
                gr.Markdown("Classifies responses based on formality, sentiment, and complexity")
                
            # Function to update parameter visibility based on selected analysis
            def update_params_visibility(selected):
                return {
                    topic_params: gr.update(visible=selected == "Topic Modeling"),
                    ngram_params: gr.update(visible=selected == "N-gram Analysis"),
                    bias_params: gr.update(visible=selected == "Bias Detection"),
                    classifier_params: gr.update(visible=selected == "Classifier"),
                    ngram_n: gr.update(visible=selected == "N-gram Analysis"),
                    topic_count: gr.update(visible=selected == "Topic Modeling"),
                }
                
            # Set up event handler for analysis selection
            analysis_options.change(
                fn=update_params_visibility,
                inputs=[analysis_options],
                outputs=[
                    topic_params, 
                    ngram_params, 
                    bias_params, 
                    classifier_params,
                    ngram_n, 
                    topic_count,
                ]
            )
        
        # Run analysis button
        run_analysis_btn = gr.Button("Run Analysis", variant="primary", size="large")
        
        # Analysis output area - hidden JSON component to store raw results
        analysis_output = gr.JSON(label="Analysis Results", visible=False)
    
    # Return the components needed by app.py, with ngram_top removed
    return analysis_options, analysis_params, run_analysis_btn, analysis_output, ngram_n, topic_count

# Add the implementation of these helper functions
def extract_important_words(text, top_n=20):
    """
    Extract the most important words from a text.
    
    Args:
        text (str): Input text
        top_n (int): Number of top words to return
        
    Returns:
        list: List of important words with their counts
    """
    # Import necessary modules
    from collections import Counter
    import re
    import nltk
    from nltk.corpus import stopwords
    from nltk.tokenize import word_tokenize
    
    # Make sure nltk resources are available
    try:
        stop_words = set(stopwords.words('english'))
    except:
        nltk.download('stopwords')
        stop_words = set(stopwords.words('english'))
    
    try:
        tokens = word_tokenize(text.lower())
    except:
        nltk.download('punkt')
        tokens = word_tokenize(text.lower())
    
    # Remove stopwords and non-alphabetic tokens
    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words and len(word) > 2]
    
    # Count word frequencies
    word_counts = Counter(filtered_tokens)
    
    # Get the top N words
    top_words = word_counts.most_common(top_n)
    
    # Format the result
    result = [{"word": word, "count": count} for word, count in top_words]
    
    return result

def calculate_text_similarity(text1, text2):
    """
    Calculate similarity metrics between two texts.
    
    Args:
        text1 (str): First text
        text2 (str): Second text
        
    Returns:
        dict: Similarity metrics
    """
    from processors.metrics import calculate_similarity
    
    # Calculate similarity using the metrics module
    metrics = calculate_similarity(text1, text2)
    
    # Add common word count
    from collections import Counter
    import nltk
    from nltk.corpus import stopwords
    
    # Make sure nltk resources are available
    try:
        stop_words = set(stopwords.words('english'))
    except:
        nltk.download('stopwords')
        stop_words = set(stopwords.words('english'))
    
    # Simple tokenization and filtering
    words1 = set([w.lower() for w in nltk.word_tokenize(text1) 
                if w.isalpha() and w.lower() not in stop_words])
    words2 = set([w.lower() for w in nltk.word_tokenize(text2) 
                if w.isalpha() and w.lower() not in stop_words])
    
    # Calculate common words
    common_words = words1.intersection(words2)
    
    # Add to metrics
    metrics["common_word_count"] = len(common_words)
    
    return metrics

def extract_ngrams(text, n=2, top_n=10):
    """
    Extract the most common n-grams from text.
    
    Args:
        text (str): Input text
        n (int or str): Size of n-grams
        top_n (int): Number of top n-grams to return
        
    Returns:
        list: List of important n-grams with their counts
    """
    import nltk
    from nltk.util import ngrams
    from collections import Counter
    
    # Convert n to int if it's a string
    if isinstance(n, str):
        n = int(n)
    
    # Make sure nltk resources are available
    try:
        tokens = nltk.word_tokenize(text.lower())
    except:
        nltk.download('punkt')
        tokens = nltk.word_tokenize(text.lower())
    
    # Generate n-grams
    n_grams = list(ngrams(tokens, n))
    
    # Convert n-grams to strings for easier handling
    n_gram_strings = [' '.join(gram) for gram in n_grams]
    
    # Count n-gram frequencies
    n_gram_counts = Counter(n_gram_strings)
    
    # Get the top N n-grams
    top_n_grams = n_gram_counts.most_common(top_n)
    
    # Format the result
    result = [{"ngram": ngram, "count": count} for ngram, count in top_n_grams]
    
    return result

def compare_ngrams(text1, text2, n=2):
    """
    Compare n-grams between two texts.
    
    Args:
        text1 (str or list): First text
        text2 (str or list): Second text
        n (int or str): Size of n-grams
        
    Returns:
        dict: Comparison metrics
    """
    import nltk
    from nltk.util import ngrams
    from collections import Counter
    
    # Convert n to int if it's a string
    if isinstance(n, str):
        n = int(n)
    
    # Handle list inputs by converting to strings
    if isinstance(text1, list):
        text1 = ' '.join(str(item) for item in text1)
    if isinstance(text2, list):
        text2 = ' '.join(str(item) for item in text2)
    
    # Make sure nltk resources are available
    try:
        tokens1 = nltk.word_tokenize(text1.lower())
        tokens2 = nltk.word_tokenize(text2.lower())
    except:
        nltk.download('punkt')
        tokens1 = nltk.word_tokenize(text1.lower())
        tokens2 = nltk.word_tokenize(text2.lower())
    
    # Generate n-grams
    n_grams1 = set([' '.join(gram) for gram in ngrams(tokens1, n)])
    n_grams2 = set([' '.join(gram) for gram in ngrams(tokens2, n)])
    
    # Calculate common n-grams
    common_n_grams = n_grams1.intersection(n_grams2)
    
    # Return comparison metrics
    return {
        "common_ngram_count": len(common_n_grams)
    }

def perform_topic_modeling(texts, model_names, n_topics=3):
    """
    Perform topic modeling on a list of texts.
    
    Args:
        texts (list): List of text documents
        model_names (list): Names of the models
        n_topics (int): Number of topics to extract
        
    Returns:
        dict: Topic modeling results
    """
    from processors.topic_modeling import compare_topics
    
    # Use the topic modeling processor
    result = compare_topics(texts, model_names, n_topics=n_topics)
    
    return result

# Process analysis request function
def process_analysis_request(dataset, selected_analysis, parameters):
    """
    Process the analysis request based on the selected options.
    
    Args:
        dataset (dict): The input dataset
        selected_analysis (str): The selected analysis type
        parameters (dict): Additional parameters for the analysis
    
    Returns:
        tuple: A tuple containing (analysis_results, visualization_data)
    """
    logger.info(f"Processing analysis request: {selected_analysis}")
    
    if not dataset or "entries" not in dataset or not dataset["entries"]:
        logger.warning("No valid dataset provided for analysis")
        return {}, None
        
    # Initialize the results structure
    results = {"analyses": {}}
    
    # Get the prompt text from the first entry
    prompt_text = dataset["entries"][0].get("prompt", "")
    if not prompt_text:
        logger.warning("No prompt found in dataset")
        return {"error": "No prompt found in dataset"}, None
        
    # Initialize the analysis container for this prompt
    results["analyses"][prompt_text] = {}
    
    # Get model names and responses
    model1_name = dataset["entries"][0].get("model", "Model 1")
    model2_name = dataset["entries"][1].get("model", "Model 2")
    
    model1_response = dataset["entries"][0].get("response", "")
    model2_response = dataset["entries"][1].get("response", "")
    
    logger.info(f"Comparing responses from {model1_name} and {model2_name}")
    
    try:
        # Process based on the selected analysis type
        if selected_analysis == "Bag of Words":
            # Use fixed default value of 25 for top_n
            top_n = 25
            
            logger.info(f"Running Bag of Words analysis with top_n={top_n}")
            
            # Perform Bag of Words analysis using the processor
            bow_results = compare_bow(
                [model1_response, model2_response],
                [model1_name, model2_name],
                top_n=top_n
            )
            results["analyses"][prompt_text]["bag_of_words"] = bow_results
            
        elif selected_analysis == "N-gram Analysis":
            # Perform N-gram analysis
            ngram_size = parameters.get("ngram_n", 2)
            if isinstance(ngram_size, str):
                ngram_size = int(ngram_size)
                
            top_n = parameters.get("ngram_top", 15)
            if isinstance(top_n, str):
                top_n = int(top_n)
            
            logger.info(f"Running N-gram analysis with n={ngram_size}, top_n={top_n}")
            
            # Use the processor from the dedicated ngram_analysis module
            from processors.ngram_analysis import compare_ngrams as ngram_processor
            ngram_results = ngram_processor(
                [model1_response, model2_response],
                [model1_name, model2_name],
                n=ngram_size,
                top_n=top_n
            )
            results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
            
        elif selected_analysis == "Topic Modeling":
            # Perform topic modeling analysis
            topic_count = parameters.get("topic_count", 3)
            if isinstance(topic_count, str):
                topic_count = int(topic_count)
            
            logger.info(f"Running Topic Modeling analysis with n_topics={topic_count}")
            
            try:
                topic_results = compare_topics(
                    texts_set_1=[model1_response], 
                    texts_set_2=[model2_response], 
                    n_topics=topic_count,
                    model_names=[model1_name, model2_name])
                
                results["analyses"][prompt_text]["topic_modeling"] = topic_results
            except Exception as e:
                import traceback
                error_msg = f"Topic modeling error: {str(e)}\n{traceback.format_exc()}"
                logger.error(error_msg)
                results["analyses"][prompt_text]["topic_modeling"] = {
                    "models": [model1_name, model2_name],
                    "error": str(e),
                    "message": "Topic modeling failed. Try with longer text or different parameters."
                }
        
        elif selected_analysis == "Classifier":
            # Perform classifier analysis
            logger.info("Running Classifier analysis")
            
            results["analyses"][prompt_text]["classifier"] = {
                "models": [model1_name, model2_name],
                "classifications": {
                    model1_name: {
                        "formality": classify_formality(model1_response),
                        "sentiment": classify_sentiment(model1_response),
                        "complexity": classify_complexity(model1_response)
                    },
                    model2_name: {
                        "formality": classify_formality(model2_response),
                        "sentiment": classify_sentiment(model2_response),
                        "complexity": classify_complexity(model2_response)
                    }
                },
                "differences": compare_classifications(model1_response, model2_response)
            }

        elif selected_analysis == "Bias Detection":
            # Use partisan leaning bias detection by default
            logger.info("Running Bias Detection analysis")
            
            try:
                # Perform bias detection analysis
                logger.info(f"Calling compare_bias with model names: {model1_name}, {model2_name}")
                logger.info(f"Text lengths - Text1: {len(model1_response)}, Text2: {len(model2_response)}")
                
                bias_results = compare_bias(
                    model1_response, 
                    model2_response,
                    model_names=[model1_name, model2_name]
                )
                
                logger.info(f"Bias detection complete. Result has keys: {bias_results.keys() if bias_results else 'None'}")
                results["analyses"][prompt_text]["bias_detection"] = bias_results
                
            except Exception as e:
                import traceback
                error_msg = f"Bias detection error: {str(e)}\n{traceback.format_exc()}"
                logger.error(error_msg)
                results["analyses"][prompt_text]["bias_detection"] = {
                    "models": [model1_name, model2_name],
                    "error": str(e),
                    "message": "Bias detection failed. Try with different parameters."
                }
        
        else:
            # Unknown analysis type
            logger.warning(f"Unknown analysis type: {selected_analysis}")
            results["analyses"][prompt_text]["message"] = "Please select a valid analysis type."
    
    except Exception as e:
        import traceback
        error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
        logger.error(error_msg)
        results = {
            "error": error_msg,
            "analyses": {
                prompt_text: {
                    "message": f"Analysis failed: {str(e)}"
                }
            }
        }
    
    # Return both the analysis results and a placeholder for visualization data
    return results, None