Ryan commited on
Commit
7138f76
·
1 Parent(s): 8910689
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -97,7 +97,7 @@ def create_app():
97
  # Analysis Tab
98
  with gr.Tab("Analysis"):
99
  # Use create_analysis_screen to get UI components including visualization container
100
- analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top = create_analysis_screen()
101
 
102
  # Pre-create visualization components (initially hidden)
103
  visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
@@ -122,7 +122,7 @@ def create_app():
122
  status_message = gr.Markdown(visible=False)
123
 
124
  # Define a helper function to extract parameter values and run the analysis
125
- def run_analysis(dataset, selected_analyses, bow_top, ngram_n, ngram_top):
126
  try:
127
  if not dataset or "entries" not in dataset or not dataset["entries"]:
128
  return (
@@ -145,12 +145,14 @@ def create_app():
145
  parameters = {
146
  "bow_top": bow_top,
147
  "ngram_n": ngram_n,
148
- "ngram_top": ngram_top
 
149
  }
150
- print("Running analysis with parameters:", parameters)
 
151
 
152
- # Process the analysis request
153
- analysis_results, _ = process_analysis_request(dataset, selected_analyses, parameters)
154
 
155
  # If there's an error or no results
156
  if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
@@ -168,7 +170,7 @@ def create_app():
168
  gr.update(visible=False),
169
  gr.update(visible=False),
170
  True,
171
- gr.update(visible=True, value="❌ **No results found.** Try different analysis options.")
172
  )
173
 
174
  # Extract information to display in components
@@ -196,8 +198,27 @@ def create_app():
196
  similarity_metrics_visible = False
197
  similarity_metrics_value = ""
198
 
199
- # Check for Bag of Words analysis
200
- if "bag_of_words" in analyses:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  visualization_area_visible = True
202
  bow_results = analyses["bag_of_words"]
203
  models = bow_results.get("models", [])
@@ -252,8 +273,8 @@ def create_app():
252
  - **Common Words**: {common_words} words appear in both responses
253
  """
254
 
255
- # Check for N-gram analysis (if not found, we'll fallback to BOW)
256
- if "ngram_analysis" in analyses and not visualization_area_visible:
257
  visualization_area_visible = True
258
  ngram_results = analyses["ngram_analysis"]
259
  models = ngram_results.get("models", [])
@@ -304,6 +325,129 @@ def create_app():
304
  - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
305
  """
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  # If we don't have visualization data from any analysis
308
  if not visualization_area_visible:
309
  return (
@@ -320,7 +464,7 @@ def create_app():
320
  gr.update(visible=False),
321
  gr.update(visible=False),
322
  True,
323
- gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select at least one analysis type.")
324
  )
325
 
326
  # Return all updated component values
@@ -362,61 +506,80 @@ def create_app():
362
  True, # status_message_visible
363
  gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
364
  )
365
-
366
- # Function to update visibility based on checkbox state
367
- def update_visibility(viz_visible, status_visible):
368
- return [
369
- gr.update(visible=viz_visible), # analysis_title
370
- gr.update(visible=viz_visible), # prompt_title
371
- gr.update(visible=viz_visible), # models_compared
372
- gr.update(visible=viz_visible), # model1_title
373
- gr.update(visible=viz_visible), # model1_words
374
- gr.update(visible=viz_visible), # model2_title
375
- gr.update(visible=viz_visible), # model2_words
376
- gr.update(visible=viz_visible), # similarity_metrics_title
377
- gr.update(visible=viz_visible), # similarity_metrics
378
- gr.update(visible=status_visible) # status_message
379
- ]
380
-
381
- # Connect visibility checkboxes to update function
382
- visualization_area_visible.change(
383
- fn=update_visibility,
384
- inputs=[visualization_area_visible, status_message_visible],
385
- outputs=[
386
- analysis_title,
387
- prompt_title,
388
- models_compared,
389
- model1_title,
390
- model1_words,
391
- model2_title,
392
- model2_words,
393
- similarity_metrics_title,
394
- similarity_metrics,
395
- status_message
396
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  )
398
 
399
- # Run analysis with proper parameters
400
- run_analysis_btn.click(
401
- fn=run_analysis,
402
- inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top],
403
- outputs=[
404
- analysis_results_state,
405
- analysis_output,
406
- visualization_area_visible,
407
- analysis_title,
408
- prompt_title,
409
- models_compared,
410
- model1_title,
411
- model1_words,
412
- model2_title,
413
- model2_words,
414
- similarity_metrics_title,
415
- similarity_metrics,
416
- status_message_visible,
417
- status_message
418
- ]
419
- )
420
 
421
  return app
422
 
@@ -425,4 +588,4 @@ if __name__ == "__main__":
425
  download_nltk_resources()
426
 
427
  app = create_app()
428
- app.launch()
 
97
  # Analysis Tab
98
  with gr.Tab("Analysis"):
99
  # Use create_analysis_screen to get UI components including visualization container
100
+ analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count = create_analysis_screen()
101
 
102
  # Pre-create visualization components (initially hidden)
103
  visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
 
122
  status_message = gr.Markdown(visible=False)
123
 
124
  # Define a helper function to extract parameter values and run the analysis
125
+ def run_analysis(dataset, selected_analysis, bow_top, ngram_n, ngram_top, topic_count):
126
  try:
127
  if not dataset or "entries" not in dataset or not dataset["entries"]:
128
  return (
 
145
  parameters = {
146
  "bow_top": bow_top,
147
  "ngram_n": ngram_n,
148
+ "ngram_top": ngram_top,
149
+ "topic_count": topic_count
150
  }
151
+ print(f"Running analysis with selected type: {selected_analysis}")
152
+ print("Parameters:", parameters)
153
 
154
+ # Process the analysis request - passing selected_analysis as a string
155
+ analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
156
 
157
  # If there's an error or no results
158
  if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
 
170
  gr.update(visible=False),
171
  gr.update(visible=False),
172
  True,
173
+ gr.update(visible=True, value="❌ **No results found.** Try a different analysis option.")
174
  )
175
 
176
  # Extract information to display in components
 
198
  similarity_metrics_visible = False
199
  similarity_metrics_value = ""
200
 
201
+ # Check for messages from placeholder analyses
202
+ if "message" in analyses:
203
+ return (
204
+ analysis_results,
205
+ False,
206
+ False,
207
+ gr.update(visible=False),
208
+ gr.update(visible=False),
209
+ gr.update(visible=False),
210
+ gr.update(visible=False),
211
+ gr.update(visible=False),
212
+ gr.update(visible=False),
213
+ gr.update(visible=False),
214
+ gr.update(visible=False),
215
+ gr.update(visible=False),
216
+ True,
217
+ gr.update(visible=True, value=f"ℹ️ **{analyses['message']}**")
218
+ )
219
+
220
+ # Process based on the selected analysis type
221
+ if selected_analysis == "Bag of Words" and "bag_of_words" in analyses:
222
  visualization_area_visible = True
223
  bow_results = analyses["bag_of_words"]
224
  models = bow_results.get("models", [])
 
273
  - **Common Words**: {common_words} words appear in both responses
274
  """
275
 
276
+ # Check for N-gram analysis
277
+ elif selected_analysis == "N-gram Analysis" and "ngram_analysis" in analyses:
278
  visualization_area_visible = True
279
  ngram_results = analyses["ngram_analysis"]
280
  models = ngram_results.get("models", [])
 
325
  - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
326
  """
327
 
328
+ # Check for Topic Modeling analysis
329
+ elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
330
+ visualization_area_visible = True
331
+ topic_results = analyses["topic_modeling"]
332
+ models = topic_results.get("models", [])
333
+ method = topic_results.get("method", "lda").upper()
334
+ n_topics = topic_results.get("n_topics", 3)
335
+
336
+ if len(models) >= 2:
337
+ prompt_title_visible = True
338
+ prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
339
+
340
+ models_compared_visible = True
341
+ models_compared_value = f"### Topic Modeling Analysis ({method}, {n_topics} topics)"
342
+
343
+ # Extract and format topic information
344
+ topics = topic_results.get("topics", [])
345
+
346
+ if topics:
347
+ # Format topic info for display
348
+ topic_info = []
349
+ for topic in topics[:3]: # Show first 3 topics
350
+ topic_id = topic.get("id", 0)
351
+ words = topic.get("words", [])[:5] # Top 5 words per topic
352
+
353
+ if words:
354
+ topic_info.append(f"**Topic {topic_id+1}**: {', '.join(words)}")
355
+
356
+ if topic_info:
357
+ model1_title_visible = True
358
+ model1_title_value = "#### Discovered Topics"
359
+ model1_words_visible = True
360
+ model1_words_value = "\n".join(topic_info)
361
+
362
+ # Get topic distributions for models
363
+ model_topics = topic_results.get("model_topics", {})
364
+
365
+ if model_topics:
366
+ model1_name = models[0]
367
+ model2_name = models[1]
368
+
369
+ # Format topic distribution info
370
+ if model1_name in model_topics and model2_name in model_topics:
371
+ model2_title_visible = True
372
+ model2_title_value = "#### Topic Distribution"
373
+ model2_words_visible = True
374
+
375
+ # Simple distribution display
376
+ dist1 = model_topics[model1_name]
377
+ dist2 = model_topics[model2_name]
378
+
379
+ model2_words_value = f"""
380
+ **{model1_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist1[:3])])}
381
+
382
+ **{model2_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist2[:3])])}
383
+ """
384
+
385
+ # Add similarity metrics if available
386
+ comparisons = topic_results.get("comparisons", {})
387
+ if comparisons:
388
+ comparison_key = f"{model1_name} vs {model2_name}"
389
+
390
+ if comparison_key in comparisons:
391
+ metrics = comparisons[comparison_key]
392
+ js_div = metrics.get("js_divergence", 0)
393
+
394
+ similarity_title_visible = True
395
+ similarity_metrics_visible = True
396
+ similarity_metrics_value = f"""
397
+ - **Topic Distribution Divergence**: {js_div:.4f} (lower means more similar topic distributions)
398
+ """
399
+
400
+ # Check for Classifier analysis
401
+ elif selected_analysis == "Classifier" and "classifier" in analyses:
402
+ visualization_area_visible = True
403
+ classifier_results = analyses["classifier"]
404
+ models = classifier_results.get("models", [])
405
+
406
+ if len(models) >= 2:
407
+ prompt_title_visible = True
408
+ prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
409
+
410
+ models_compared_visible = True
411
+ models_compared_value = f"### Classifier Analysis for {models[0]} and {models[1]}"
412
+
413
+ # Extract and format classifier information
414
+ model1_name = models[0]
415
+ model2_name = models[1]
416
+
417
+ # Display classifications for each model
418
+ classifications = classifier_results.get("classifications", {})
419
+
420
+ if classifications:
421
+ model1_title_visible = True
422
+ model1_title_value = f"#### Classification Results"
423
+ model1_words_visible = True
424
+
425
+ model1_results = classifications.get(model1_name, {})
426
+ model2_results = classifications.get(model2_name, {})
427
+
428
+ model1_words_value = f"""
429
+ **{model1_name}**:
430
+ - Formality: {model1_results.get('formality', 'N/A')}
431
+ - Sentiment: {model1_results.get('sentiment', 'N/A')}
432
+ - Complexity: {model1_results.get('complexity', 'N/A')}
433
+
434
+ **{model2_name}**:
435
+ - Formality: {model2_results.get('formality', 'N/A')}
436
+ - Sentiment: {model2_results.get('sentiment', 'N/A')}
437
+ - Complexity: {model2_results.get('complexity', 'N/A')}
438
+ """
439
+
440
+ # Show comparison
441
+ model2_title_visible = True
442
+ model2_title_value = f"#### Classification Comparison"
443
+ model2_words_visible = True
444
+
445
+ differences = classifier_results.get("differences", {})
446
+ model2_words_value = "\n".join([
447
+ f"- **{category}**: {diff}"
448
+ for category, diff in differences.items()
449
+ ])
450
+
451
  # If we don't have visualization data from any analysis
452
  if not visualization_area_visible:
453
  return (
 
464
  gr.update(visible=False),
465
  gr.update(visible=False),
466
  True,
467
+ gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
468
  )
469
 
470
  # Return all updated component values
 
506
  True, # status_message_visible
507
  gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
508
  )
509
+
510
+ # Add a new LLM Analysis tab
511
+ with gr.Tab("LLM Analysis"):
512
+ gr.Markdown("## LLM-Based Response Analysis")
513
+
514
+ with gr.Row():
515
+ with gr.Column():
516
+ llm_analysis_type = gr.Radio(
517
+ choices=["Response Quality", "Response Comparison", "Factual Accuracy"],
518
+ label="Analysis Type",
519
+ value="Response Comparison"
520
+ )
521
+
522
+ llm_model = gr.Dropdown(
523
+ choices=["OpenAI GPT-4", "Anthropic Claude", "Local LLM"],
524
+ label="Analysis Model",
525
+ value="OpenAI GPT-4"
526
+ )
527
+
528
+ run_llm_analysis_btn = gr.Button("Run LLM Analysis", variant="primary")
529
+
530
+ with gr.Column():
531
+ llm_analysis_prompt = gr.Textbox(
532
+ label="Custom Analysis Instructions (Optional)",
533
+ placeholder="Enter any specific instructions for the analysis...",
534
+ lines=3
535
+ )
536
+
537
+ llm_analysis_status = gr.Markdown("*No analysis has been run*")
538
+
539
+ llm_analysis_result = gr.Markdown(visible=False)
540
+
541
+ # Placeholder function for LLM analysis
542
+ def run_llm_analysis(dataset, analysis_type, model, custom_prompt):
543
+ if not dataset or "entries" not in dataset or not dataset["entries"]:
544
+ return (
545
+ gr.update(visible=True, value="❌ **Error:** No dataset loaded. Please create or load a dataset first."),
546
+ gr.update(visible=False)
547
+ )
548
+
549
+ # Placeholder for actual implementation
550
+ return (
551
+ gr.update(visible=True, value="⏳ **Implementation in progress**\n\nLLM-based analysis will be available in a future update."),
552
+ gr.update(visible=False)
553
+ )
554
+
555
+ # Connect the run button to the analysis function
556
+ run_llm_analysis_btn.click(
557
+ fn=run_llm_analysis,
558
+ inputs=[dataset_state, llm_analysis_type, llm_model, llm_analysis_prompt],
559
+ outputs=[llm_analysis_status, llm_analysis_result]
560
  )
561
 
562
+ # Run analysis with proper parameters
563
+ run_analysis_btn.click(
564
+ fn=run_analysis,
565
+ inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count],
566
+ outputs=[
567
+ analysis_results_state,
568
+ analysis_output,
569
+ visualization_area_visible,
570
+ analysis_title,
571
+ prompt_title,
572
+ models_compared,
573
+ model1_title,
574
+ model1_words,
575
+ model2_title,
576
+ model2_words,
577
+ similarity_metrics_title,
578
+ similarity_metrics,
579
+ status_message_visible,
580
+ status_message
581
+ ]
582
+ )
583
 
584
  return app
585
 
 
588
  download_nltk_resources()
589
 
590
  app = create_app()
591
+ app.launch()
processors/ngram_analysis.py CHANGED
@@ -52,7 +52,18 @@ def compare_ngrams(texts, model_names, n=2, top_n=25):
52
  stop_words='english'
53
  )
54
 
55
- X = vectorizer.fit_transform(texts)
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Get feature names (n-grams)
58
  feature_names = vectorizer.get_feature_names_out()
 
52
  stop_words='english'
53
  )
54
 
55
+ # Make sure texts are strings before processing
56
+ processed_texts = []
57
+ for text in texts:
58
+ # If text is not a string (e.g., it's a list), convert it to a string
59
+ if not isinstance(text, str):
60
+ if isinstance(text, list):
61
+ text = ' '.join(text) # Join list elements if it's a list
62
+ else:
63
+ text = str(text) # Convert to string if it's another type
64
+ processed_texts.append(text)
65
+
66
+ X = vectorizer.fit_transform(processed_texts)
67
 
68
  # Get feature names (n-grams)
69
  feature_names = vectorizer.get_feature_names_out()
processors/text_classifiers.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.sentiment import SentimentIntensityAnalyzer
3
+ import statistics
4
+ import re
5
+
6
+ def download_nltk_resources():
7
+ """Download required NLTK resources if not already downloaded"""
8
+ try:
9
+ nltk.download('vader_lexicon', quiet=True)
10
+ except:
11
+ pass
12
+
13
+ # Ensure NLTK resources are available
14
+ download_nltk_resources()
15
+
16
+ def classify_formality(text):
17
+ """
18
+ Classify text formality based on simple heuristics
19
+
20
+ Args:
21
+ text (str): Text to analyze
22
+
23
+ Returns:
24
+ str: Formality level (Formal, Neutral, or Informal)
25
+ """
26
+ # Simple formality indicators
27
+ formal_indicators = [
28
+ r'\b(therefore|thus|consequently|furthermore|moreover|however)\b',
29
+ r'\b(in accordance with|with respect to|regarding|concerning)\b',
30
+ r'\b(shall|must|may|will be required to)\b',
31
+ r'\b(it is|there are|there is)\b',
32
+ r'\b(Mr\.|Ms\.|Dr\.|Prof\.)\b'
33
+ ]
34
+
35
+ informal_indicators = [
36
+ r'\b(like|yeah|cool|awesome|gonna|wanna|gotta)\b',
37
+ r'(\!{2,}|\?{2,})',
38
+ r'\b(lol|haha|wow|omg|btw)\b',
39
+ r'\b(don\'t|can\'t|won\'t|shouldn\'t)\b',
40
+ r'(\.{3,})'
41
+ ]
42
+
43
+ # Calculate scores
44
+ formal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in formal_indicators])
45
+ informal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in informal_indicators])
46
+
47
+ # Normalize by text length
48
+ words = len(text.split())
49
+ if words > 0:
50
+ formal_score = formal_score / (words / 100) # per 100 words
51
+ informal_score = informal_score / (words / 100) # per 100 words
52
+
53
+ # Determine formality
54
+ if formal_score > informal_score * 1.5:
55
+ return "Formal"
56
+ elif informal_score > formal_score * 1.5:
57
+ return "Informal"
58
+ else:
59
+ return "Neutral"
60
+
61
+ def classify_sentiment(text):
62
+ """
63
+ Classify text sentiment using NLTK's VADER
64
+
65
+ Args:
66
+ text (str): Text to analyze
67
+
68
+ Returns:
69
+ str: Sentiment (Positive, Neutral, or Negative)
70
+ """
71
+ try:
72
+ sia = SentimentIntensityAnalyzer()
73
+ sentiment = sia.polarity_scores(text)
74
+
75
+ if sentiment['compound'] >= 0.05:
76
+ return "Positive"
77
+ elif sentiment['compound'] <= -0.05:
78
+ return "Negative"
79
+ else:
80
+ return "Neutral"
81
+ except:
82
+ return "Neutral"
83
+
84
+ def classify_complexity(text):
85
+ """
86
+ Classify text complexity based on sentence length and word length
87
+
88
+ Args:
89
+ text (str): Text to analyze
90
+
91
+ Returns:
92
+ str: Complexity level (Simple, Average, or Complex)
93
+ """
94
+ # Split into sentences
95
+ sentences = nltk.sent_tokenize(text)
96
+
97
+ if not sentences:
98
+ return "Average"
99
+
100
+ # Calculate average sentence length
101
+ sentence_lengths = [len(s.split()) for s in sentences]
102
+ avg_sentence_length = statistics.mean(sentence_lengths) if sentence_lengths else 0
103
+
104
+ # Calculate average word length
105
+ words = [word for sentence in sentences for word in nltk.word_tokenize(sentence)
106
+ if word.isalnum()] # only consider alphanumeric tokens
107
+
108
+ avg_word_length = statistics.mean([len(word) for word in words]) if words else 0
109
+
110
+ # Determine complexity
111
+ if avg_sentence_length > 20 or avg_word_length > 6:
112
+ return "Complex"
113
+ elif avg_sentence_length < 12 or avg_word_length < 4:
114
+ return "Simple"
115
+ else:
116
+ return "Average"
117
+
118
+ def compare_classifications(text1, text2):
119
+ """
120
+ Compare classifications between two texts
121
+
122
+ Args:
123
+ text1 (str): First text
124
+ text2 (str): Second text
125
+
126
+ Returns:
127
+ dict: Comparison results
128
+ """
129
+ formality1 = classify_formality(text1)
130
+ formality2 = classify_formality(text2)
131
+
132
+ sentiment1 = classify_sentiment(text1)
133
+ sentiment2 = classify_sentiment(text2)
134
+
135
+ complexity1 = classify_complexity(text1)
136
+ complexity2 = classify_complexity(text2)
137
+
138
+ results = {}
139
+
140
+ if formality1 != formality2:
141
+ results["Formality"] = f"Model 1 is {formality1.lower()}, while Model 2 is {formality2.lower()}"
142
+
143
+ if sentiment1 != sentiment2:
144
+ results["Sentiment"] = f"Model 1 has a {sentiment1.lower()} tone, while Model 2 has a {sentiment2.lower()} tone"
145
+
146
+ if complexity1 != complexity2:
147
+ results["Complexity"] = f"Model 1 uses {complexity1.lower()} language, while Model 2 uses {complexity2.lower()} language"
148
+
149
+ if not results:
150
+ results["Summary"] = "Both responses have similar writing characteristics"
151
+
152
+ return results
ui/analysis_screen.py CHANGED
@@ -3,40 +3,35 @@ import json
3
  from visualization.bow_visualizer import process_and_visualize_analysis
4
 
5
  # Import analysis modules
6
- # Uncomment these when implemented
7
- # from processors.topic_modeling import extract_topics, compare_topics
8
  from processors.ngram_analysis import compare_ngrams
9
- # from processors.bias_detection import compare_bias
10
  from processors.bow_analysis import compare_bow
11
- # from processors.metrics import calculate_similarity
12
- # from processors.diff_highlighter import highlight_differences
13
 
14
  def create_analysis_screen():
15
  """
16
  Create the analysis options screen
17
 
18
  Returns:
19
- tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top)
20
  """
21
  with gr.Column() as analysis_screen:
22
  gr.Markdown("## Analysis Options")
23
- gr.Markdown("Select which analyses you want to run on the LLM responses.")
24
 
25
- # Analysis selection
26
  with gr.Group():
27
- analysis_options = gr.CheckboxGroup(
28
  choices=[
29
- "Topic Modeling",
30
  "N-gram Analysis",
 
31
  "Bias Detection",
32
- "Bag of Words",
33
- "Similarity Metrics",
34
- "Difference Highlighting"
35
- ],
36
- value=[
37
- "Bag of Words",
38
  ],
39
- label="Select Analyses to Run"
 
40
  )
41
 
42
  # Create slider directly here for easier access
@@ -59,13 +54,19 @@ def create_analysis_screen():
59
  visible=False
60
  )
61
 
 
 
 
 
 
 
 
62
  # Parameters for each analysis type
63
  with gr.Group() as analysis_params:
64
  # Topic modeling parameters
65
  with gr.Group(visible=False) as topic_params:
66
  gr.Markdown("### Topic Modeling Parameters")
67
- topic_count = gr.Slider(minimum=2, maximum=10, value=3, step=1,
68
- label="Number of Topics")
69
 
70
  # N-gram parameters group (using external ngram_n and ngram_top)
71
  with gr.Group(visible=False) as ngram_params:
@@ -81,32 +82,38 @@ def create_analysis_screen():
81
  label="Bias Detection Methods"
82
  )
83
 
84
- # Similarity metrics parameters
85
- with gr.Group(visible=False) as similarity_params:
86
- gr.Markdown("### Similarity Metrics Parameters")
87
- similarity_metrics = gr.CheckboxGroup(
88
- choices=["Cosine Similarity", "Jaccard Similarity", "Semantic Similarity"],
89
- value=["Cosine Similarity", "Semantic Similarity"],
90
- label="Similarity Metrics to Calculate"
91
- )
92
 
93
- # Function to update parameter visibility based on selected analyses
94
  def update_params_visibility(selected):
95
- ngram_visible = "N-gram Analysis" in selected
96
  return {
97
- topic_params: gr.update(visible="Topic Modeling" in selected),
98
- ngram_params: gr.update(visible=ngram_visible),
99
- bias_params: gr.update(visible="Bias Detection" in selected),
100
- similarity_params: gr.update(visible="Similarity Metrics" in selected),
101
- ngram_n: gr.update(visible=ngram_visible),
102
- ngram_top: gr.update(visible=ngram_visible)
 
 
103
  }
104
 
105
  # Set up event handler for analysis selection
106
  analysis_options.change(
107
  fn=update_params_visibility,
108
  inputs=[analysis_options],
109
- outputs=[topic_params, ngram_params, bias_params, similarity_params, ngram_n, ngram_top]
 
 
 
 
 
 
 
 
 
110
  )
111
 
112
  # Run analysis button
@@ -116,54 +123,308 @@ def create_analysis_screen():
116
  analysis_output = gr.JSON(label="Analysis Results", visible=False)
117
 
118
  # Return the components needed by app.py
119
- return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top
120
 
121
- # Process analysis request function
122
- def process_analysis_request(dataset, selected_analyses, parameters):
123
  """
124
- Process the analysis request and run selected analyses
 
 
 
 
 
 
 
125
  """
 
 
 
 
 
 
 
 
126
  try:
127
- print(f"Processing analysis request with: {selected_analyses}")
128
- print(f"Parameters: {parameters}")
129
-
130
- if not dataset or "entries" not in dataset or not dataset["entries"]:
131
- return {}, gr.update(visible=True,
132
- value=json.dumps({"error": "No dataset provided or dataset is empty"}, indent=2))
133
-
134
- analysis_results = {"analyses": {}}
135
-
136
- # Extract prompt and responses
137
- prompt = dataset["entries"][0]["prompt"]
138
- response_texts = [entry["response"] for entry in dataset["entries"]]
139
- model_names = [entry["model"] for entry in dataset["entries"]]
140
-
141
- print(f"Analyzing prompt: '{prompt[:50]}...'")
142
- print(f"Models: {model_names}")
 
 
 
 
 
 
 
 
143
 
144
- analysis_results["analyses"][prompt] = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # Run Bag of Words analysis if selected
147
- if "Bag of Words" in selected_analyses:
148
- top_words = parameters.get("bow_top", 25)
149
- print(f"Running BOW analysis with top_words={top_words}")
150
- bow_results = compare_bow(response_texts, model_names, top_words)
151
- analysis_results["analyses"][prompt]["bag_of_words"] = bow_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- # Run N-gram analysis if selected
154
- if "N-gram Analysis" in selected_analyses:
155
- ngram_n = int(parameters.get("ngram_n", "2"))
156
- ngram_top = parameters.get("ngram_top", 10)
157
- print(f"Running N-gram analysis with n={ngram_n}, top_n={ngram_top}")
158
- ngram_results = compare_ngrams(response_texts, model_names, ngram_n, ngram_top)
159
- analysis_results["analyses"][prompt]["ngram_analysis"] = ngram_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- print("Analysis complete - results:", analysis_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- # Return results and update the output component
164
- return analysis_results, gr.update(visible=False, value=analysis_results) # Hide the raw JSON
165
- except Exception as e:
166
- import traceback
167
- error_msg = f"Analysis error: {str(e)}\n{traceback.format_exc()}"
168
- print(error_msg)
169
- return {}, gr.update(visible=True, value=json.dumps({"error": error_msg}, indent=2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from visualization.bow_visualizer import process_and_visualize_analysis
4
 
5
  # Import analysis modules
6
+ from processors.topic_modeling import compare_topics
 
7
  from processors.ngram_analysis import compare_ngrams
 
8
  from processors.bow_analysis import compare_bow
9
+ from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
 
10
 
11
  def create_analysis_screen():
12
  """
13
  Create the analysis options screen
14
 
15
  Returns:
16
+ tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count)
17
  """
18
  with gr.Column() as analysis_screen:
19
  gr.Markdown("## Analysis Options")
20
+ gr.Markdown("Select which analysis you want to run on the LLM responses.")
21
 
22
+ # Change from CheckboxGroup to Radio for analysis selection
23
  with gr.Group():
24
+ analysis_options = gr.Radio(
25
  choices=[
26
+ "Bag of Words",
27
  "N-gram Analysis",
28
+ "Topic Modeling",
29
  "Bias Detection",
30
+ "Classifier"
31
+ # Removed "LLM Analysis" as requested
 
 
 
 
32
  ],
33
+ value="Bag of Words", # Default selection
34
+ label="Select Analysis Type"
35
  )
36
 
37
  # Create slider directly here for easier access
 
54
  visible=False
55
  )
56
 
57
+ # Create topic modeling parameter accessible at top level
58
+ topic_count = gr.Slider(
59
+ minimum=2, maximum=10, value=3, step=1,
60
+ label="Number of Topics",
61
+ visible=False
62
+ )
63
+
64
  # Parameters for each analysis type
65
  with gr.Group() as analysis_params:
66
  # Topic modeling parameters
67
  with gr.Group(visible=False) as topic_params:
68
  gr.Markdown("### Topic Modeling Parameters")
69
+ # We'll use the topic_count defined above
 
70
 
71
  # N-gram parameters group (using external ngram_n and ngram_top)
72
  with gr.Group(visible=False) as ngram_params:
 
82
  label="Bias Detection Methods"
83
  )
84
 
85
+ # Classifier parameters
86
+ with gr.Group(visible=False) as classifier_params:
87
+ gr.Markdown("### Classifier Parameters")
88
+ gr.Markdown("Classifies responses based on formality, sentiment, and complexity")
 
 
 
 
89
 
90
+ # Function to update parameter visibility based on selected analysis
91
  def update_params_visibility(selected):
 
92
  return {
93
+ topic_params: gr.update(visible=selected == "Topic Modeling"),
94
+ ngram_params: gr.update(visible=selected == "N-gram Analysis"),
95
+ bias_params: gr.update(visible=selected == "Bias Detection"),
96
+ classifier_params: gr.update(visible=selected == "Classifier"),
97
+ ngram_n: gr.update(visible=selected == "N-gram Analysis"),
98
+ ngram_top: gr.update(visible=selected == "N-gram Analysis"),
99
+ topic_count: gr.update(visible=selected == "Topic Modeling"),
100
+ bow_top_slider: gr.update(visible=selected == "Bag of Words")
101
  }
102
 
103
  # Set up event handler for analysis selection
104
  analysis_options.change(
105
  fn=update_params_visibility,
106
  inputs=[analysis_options],
107
+ outputs=[
108
+ topic_params,
109
+ ngram_params,
110
+ bias_params,
111
+ classifier_params,
112
+ ngram_n,
113
+ ngram_top,
114
+ topic_count,
115
+ bow_top_slider
116
+ ]
117
  )
118
 
119
  # Run analysis button
 
123
  analysis_output = gr.JSON(label="Analysis Results", visible=False)
124
 
125
  # Return the components needed by app.py
126
+ return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count
127
 
128
+ # Add the implementation of these helper functions
129
+ def extract_important_words(text, top_n=20):
130
  """
131
+ Extract the most important words from a text.
132
+
133
+ Args:
134
+ text (str): Input text
135
+ top_n (int): Number of top words to return
136
+
137
+ Returns:
138
+ list: List of important words with their counts
139
  """
140
+ # Import necessary modules
141
+ from collections import Counter
142
+ import re
143
+ import nltk
144
+ from nltk.corpus import stopwords
145
+ from nltk.tokenize import word_tokenize
146
+
147
+ # Make sure nltk resources are available
148
  try:
149
+ stop_words = set(stopwords.words('english'))
150
+ except:
151
+ nltk.download('stopwords')
152
+ stop_words = set(stopwords.words('english'))
153
+
154
+ try:
155
+ tokens = word_tokenize(text.lower())
156
+ except:
157
+ nltk.download('punkt')
158
+ tokens = word_tokenize(text.lower())
159
+
160
+ # Remove stopwords and non-alphabetic tokens
161
+ filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words and len(word) > 2]
162
+
163
+ # Count word frequencies
164
+ word_counts = Counter(filtered_tokens)
165
+
166
+ # Get the top N words
167
+ top_words = word_counts.most_common(top_n)
168
+
169
+ # Format the result
170
+ result = [{"word": word, "count": count} for word, count in top_words]
171
+
172
+ return result
173
 
174
+ def calculate_text_similarity(text1, text2):
175
+ """
176
+ Calculate similarity metrics between two texts.
177
+
178
+ Args:
179
+ text1 (str): First text
180
+ text2 (str): Second text
181
+
182
+ Returns:
183
+ dict: Similarity metrics
184
+ """
185
+ from processors.metrics import calculate_similarity
186
+
187
+ # Calculate similarity using the metrics module
188
+ metrics = calculate_similarity(text1, text2)
189
+
190
+ # Add common word count
191
+ from collections import Counter
192
+ import nltk
193
+ from nltk.corpus import stopwords
194
+
195
+ # Make sure nltk resources are available
196
+ try:
197
+ stop_words = set(stopwords.words('english'))
198
+ except:
199
+ nltk.download('stopwords')
200
+ stop_words = set(stopwords.words('english'))
201
+
202
+ # Simple tokenization and filtering
203
+ words1 = set([w.lower() for w in nltk.word_tokenize(text1)
204
+ if w.isalpha() and w.lower() not in stop_words])
205
+ words2 = set([w.lower() for w in nltk.word_tokenize(text2)
206
+ if w.isalpha() and w.lower() not in stop_words])
207
+
208
+ # Calculate common words
209
+ common_words = words1.intersection(words2)
210
+
211
+ # Add to metrics
212
+ metrics["common_word_count"] = len(common_words)
213
+
214
+ return metrics
215
 
216
+ def extract_ngrams(text, n=2, top_n=10):
217
+ """
218
+ Extract the most common n-grams from text.
219
+
220
+ Args:
221
+ text (str): Input text
222
+ n (int or str): Size of n-grams
223
+ top_n (int): Number of top n-grams to return
224
+
225
+ Returns:
226
+ list: List of important n-grams with their counts
227
+ """
228
+ import nltk
229
+ from nltk.util import ngrams
230
+ from collections import Counter
231
+
232
+ # Convert n to int if it's a string
233
+ if isinstance(n, str):
234
+ n = int(n)
235
+
236
+ # Make sure nltk resources are available
237
+ try:
238
+ tokens = nltk.word_tokenize(text.lower())
239
+ except:
240
+ nltk.download('punkt')
241
+ tokens = nltk.word_tokenize(text.lower())
242
+
243
+ # Generate n-grams
244
+ n_grams = list(ngrams(tokens, n))
245
+
246
+ # Convert n-grams to strings for easier handling
247
+ n_gram_strings = [' '.join(gram) for gram in n_grams]
248
+
249
+ # Count n-gram frequencies
250
+ n_gram_counts = Counter(n_gram_strings)
251
+
252
+ # Get the top N n-grams
253
+ top_n_grams = n_gram_counts.most_common(top_n)
254
+
255
+ # Format the result
256
+ result = [{"ngram": ngram, "count": count} for ngram, count in top_n_grams]
257
+
258
+ return result
259
 
260
+ def compare_ngrams(text1, text2, n=2):
261
+ """
262
+ Compare n-grams between two texts.
263
+
264
+ Args:
265
+ text1 (str): First text
266
+ text2 (str): Second text
267
+ n (int or str): Size of n-grams
268
+
269
+ Returns:
270
+ dict: Comparison metrics
271
+ """
272
+ import nltk
273
+ from nltk.util import ngrams
274
+ from collections import Counter
275
+
276
+ # Convert n to int if it's a string
277
+ if isinstance(n, str):
278
+ n = int(n)
279
+
280
+ # Make sure nltk resources are available
281
+ try:
282
+ tokens1 = nltk.word_tokenize(text1.lower())
283
+ tokens2 = nltk.word_tokenize(text2.lower())
284
+ except:
285
+ nltk.download('punkt')
286
+ tokens1 = nltk.word_tokenize(text1.lower())
287
+ tokens2 = nltk.word_tokenize(text2.lower())
288
+
289
+ # Generate n-grams
290
+ n_grams1 = set([' '.join(gram) for gram in ngrams(tokens1, n)])
291
+ n_grams2 = set([' '.join(gram) for gram in ngrams(tokens2, n)])
292
+
293
+ # Calculate common n-grams
294
+ common_n_grams = n_grams1.intersection(n_grams2)
295
+
296
+ # Return comparison metrics
297
+ return {
298
+ "common_ngram_count": len(common_n_grams)
299
+ }
300
 
301
+ def perform_topic_modeling(texts, model_names, n_topics=3):
302
+ """
303
+ Perform topic modeling on a list of texts.
304
+
305
+ Args:
306
+ texts (list): List of text documents
307
+ model_names (list): Names of the models
308
+ n_topics (int): Number of topics to extract
309
+
310
+ Returns:
311
+ dict: Topic modeling results
312
+ """
313
+ from processors.topic_modeling import compare_topics
314
+
315
+ # Use the topic modeling processor
316
+ result = compare_topics(texts, model_names, n_topics=n_topics)
317
+
318
+ return result
319
 
320
+ # Process analysis request function
321
+ def process_analysis_request(dataset, selected_analysis, parameters):
322
+ """
323
+ Process the analysis request based on the selected options.
324
+
325
+ Args:
326
+ dataset (dict): The input dataset
327
+ selected_analysis (str): The selected analysis type
328
+ parameters (dict): Additional parameters for the analysis
329
+
330
+ Returns:
331
+ tuple: A tuple containing (analysis_results, visualization_data)
332
+ """
333
+ if not dataset or "entries" not in dataset or not dataset["entries"]:
334
+ return {}, None
335
+
336
+ # Initialize the results structure
337
+ results = {"analyses": {}}
338
+
339
+ # Get the prompt text from the first entry
340
+ prompt_text = dataset["entries"][0].get("prompt", "")
341
+ if not prompt_text:
342
+ return {"error": "No prompt found in dataset"}, None
343
+
344
+ # Initialize the analysis container for this prompt
345
+ results["analyses"][prompt_text] = {}
346
+
347
+ # Get model names and responses
348
+ model1_name = dataset["entries"][0].get("model", "Model 1")
349
+ model2_name = dataset["entries"][1].get("model", "Model 2")
350
+
351
+ model1_response = dataset["entries"][0].get("response", "")
352
+ model2_response = dataset["entries"][1].get("response", "")
353
+
354
+ # Process based on the selected analysis type
355
+ if selected_analysis == "Bag of Words":
356
+ # Perform Bag of Words analysis using the processor
357
+ bow_results = compare_bow(
358
+ [model1_response, model2_response],
359
+ [model1_name, model2_name],
360
+ top_n=parameters.get("bow_top", 25)
361
+ )
362
+ results["analyses"][prompt_text]["bag_of_words"] = bow_results
363
+
364
+ elif selected_analysis == "N-gram Analysis":
365
+ # Perform N-gram analysis
366
+ ngram_size = parameters.get("ngram_n", 2)
367
+ if isinstance(ngram_size, str):
368
+ ngram_size = int(ngram_size)
369
+
370
+ top_n = parameters.get("ngram_top", 15)
371
+ if isinstance(top_n, str):
372
+ top_n = int(top_n)
373
+
374
+ # Use the processor
375
+ ngram_results = compare_ngrams(
376
+ [model1_response, model2_response],
377
+ [model1_name, model2_name],
378
+ n=ngram_size,
379
+ top_n=top_n
380
+ )
381
+ results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
382
+
383
+ elif selected_analysis == "Topic Modeling":
384
+ # Perform topic modeling analysis
385
+ topic_count = parameters.get("topic_count", 3)
386
+ if isinstance(topic_count, str):
387
+ topic_count = int(topic_count)
388
+
389
+ try:
390
+ topic_results = compare_topics(
391
+ [model1_response, model2_response],
392
+ model_names=[model1_name, model2_name],
393
+ n_topics=topic_count
394
+ )
395
+
396
+ results["analyses"][prompt_text]["topic_modeling"] = topic_results
397
+ except Exception as e:
398
+ import traceback
399
+ print(f"Topic modeling error: {str(e)}\n{traceback.format_exc()}")
400
+ results["analyses"][prompt_text]["topic_modeling"] = {
401
+ "models": [model1_name, model2_name],
402
+ "error": str(e),
403
+ "message": "Topic modeling failed. Try with longer text or different parameters."
404
+ }
405
+
406
+ elif selected_analysis == "Classifier":
407
+ # Perform classifier analysis
408
+ results["analyses"][prompt_text]["classifier"] = {
409
+ "models": [model1_name, model2_name],
410
+ "classifications": {
411
+ model1_name: {
412
+ "formality": classify_formality(model1_response),
413
+ "sentiment": classify_sentiment(model1_response),
414
+ "complexity": classify_complexity(model1_response)
415
+ },
416
+ model2_name: {
417
+ "formality": classify_formality(model2_response),
418
+ "sentiment": classify_sentiment(model2_response),
419
+ "complexity": classify_complexity(model2_response)
420
+ }
421
+ },
422
+ "differences": compare_classifications(model1_response, model2_response)
423
+ }
424
+
425
+ else:
426
+ # Unknown analysis type
427
+ results["analyses"][prompt_text]["message"] = "Please select a valid analysis type."
428
+
429
+ # Return both the analysis results and a placeholder for visualization data
430
+ return results, None
visualization/bow_visualizer.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
  from difflib import SequenceMatcher
9
 
10
  from visualization.ngram_visualizer import create_ngram_visualization
 
11
 
12
  def create_bow_visualization(analysis_results):
13
  """
@@ -203,6 +204,14 @@ def process_and_visualize_analysis(analysis_results):
203
  ngram_components = create_ngram_visualization(
204
  {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}})
205
  components.extend(ngram_components)
 
 
 
 
 
 
 
 
206
 
207
  if not components:
208
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))
 
8
  from difflib import SequenceMatcher
9
 
10
  from visualization.ngram_visualizer import create_ngram_visualization
11
+ from visualization.topic_visualizer import process_and_visualize_topic_analysis # Added import
12
 
13
  def create_bow_visualization(analysis_results):
14
  """
 
204
  ngram_components = create_ngram_visualization(
205
  {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}})
206
  components.extend(ngram_components)
207
+
208
+ # Check for Topic Modeling analysis
209
+ if "topic_modeling" in analyses:
210
+ print("Processing Topic Modeling visualization")
211
+ # Use the dedicated topic visualization function
212
+ topic_components = process_and_visualize_topic_analysis(
213
+ {"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}})
214
+ components.extend(topic_components)
215
 
216
  if not components:
217
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))