Ryan commited on
Commit
8910689
·
1 Parent(s): 4e77dc0
app.py CHANGED
@@ -97,7 +97,7 @@ def create_app():
97
  # Analysis Tab
98
  with gr.Tab("Analysis"):
99
  # Use create_analysis_screen to get UI components including visualization container
100
- analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count = create_analysis_screen()
101
 
102
  # Pre-create visualization components (initially hidden)
103
  visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
@@ -122,7 +122,7 @@ def create_app():
122
  status_message = gr.Markdown(visible=False)
123
 
124
  # Define a helper function to extract parameter values and run the analysis
125
- def run_analysis(dataset, selected_analysis, bow_top, ngram_n, ngram_top, topic_count):
126
  try:
127
  if not dataset or "entries" not in dataset or not dataset["entries"]:
128
  return (
@@ -145,14 +145,12 @@ def create_app():
145
  parameters = {
146
  "bow_top": bow_top,
147
  "ngram_n": ngram_n,
148
- "ngram_top": ngram_top,
149
- "topic_count": topic_count
150
  }
151
- print(f"Running analysis with selected type: {selected_analysis}")
152
- print("Parameters:", parameters)
153
 
154
- # Process the analysis request - passing selected_analysis as a string
155
- analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
156
 
157
  # If there's an error or no results
158
  if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
@@ -170,7 +168,7 @@ def create_app():
170
  gr.update(visible=False),
171
  gr.update(visible=False),
172
  True,
173
- gr.update(visible=True, value="❌ **No results found.** Try a different analysis option.")
174
  )
175
 
176
  # Extract information to display in components
@@ -198,27 +196,8 @@ def create_app():
198
  similarity_metrics_visible = False
199
  similarity_metrics_value = ""
200
 
201
- # Check for messages from placeholder analyses
202
- if "message" in analyses:
203
- return (
204
- analysis_results,
205
- False,
206
- False,
207
- gr.update(visible=False),
208
- gr.update(visible=False),
209
- gr.update(visible=False),
210
- gr.update(visible=False),
211
- gr.update(visible=False),
212
- gr.update(visible=False),
213
- gr.update(visible=False),
214
- gr.update(visible=False),
215
- gr.update(visible=False),
216
- True,
217
- gr.update(visible=True, value=f"ℹ️ **{analyses['message']}**")
218
- )
219
-
220
- # Process based on the selected analysis type
221
- if selected_analysis == "Bag of Words" and "bag_of_words" in analyses:
222
  visualization_area_visible = True
223
  bow_results = analyses["bag_of_words"]
224
  models = bow_results.get("models", [])
@@ -273,8 +252,8 @@ def create_app():
273
  - **Common Words**: {common_words} words appear in both responses
274
  """
275
 
276
- # Check for N-gram analysis
277
- elif selected_analysis == "N-gram Analysis" and "ngram_analysis" in analyses:
278
  visualization_area_visible = True
279
  ngram_results = analyses["ngram_analysis"]
280
  models = ngram_results.get("models", [])
@@ -325,129 +304,6 @@ def create_app():
325
  - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
326
  """
327
 
328
- # Check for Topic Modeling analysis
329
- elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
330
- visualization_area_visible = True
331
- topic_results = analyses["topic_modeling"]
332
- models = topic_results.get("models", [])
333
- method = topic_results.get("method", "lda").upper()
334
- n_topics = topic_results.get("n_topics", 3)
335
-
336
- if len(models) >= 2:
337
- prompt_title_visible = True
338
- prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
339
-
340
- models_compared_visible = True
341
- models_compared_value = f"### Topic Modeling Analysis ({method}, {n_topics} topics)"
342
-
343
- # Extract and format topic information
344
- topics = topic_results.get("topics", [])
345
-
346
- if topics:
347
- # Format topic info for display
348
- topic_info = []
349
- for topic in topics[:3]: # Show first 3 topics
350
- topic_id = topic.get("id", 0)
351
- words = topic.get("words", [])[:5] # Top 5 words per topic
352
-
353
- if words:
354
- topic_info.append(f"**Topic {topic_id+1}**: {', '.join(words)}")
355
-
356
- if topic_info:
357
- model1_title_visible = True
358
- model1_title_value = "#### Discovered Topics"
359
- model1_words_visible = True
360
- model1_words_value = "\n".join(topic_info)
361
-
362
- # Get topic distributions for models
363
- model_topics = topic_results.get("model_topics", {})
364
-
365
- if model_topics:
366
- model1_name = models[0]
367
- model2_name = models[1]
368
-
369
- # Format topic distribution info
370
- if model1_name in model_topics and model2_name in model_topics:
371
- model2_title_visible = True
372
- model2_title_value = "#### Topic Distribution"
373
- model2_words_visible = True
374
-
375
- # Simple distribution display
376
- dist1 = model_topics[model1_name]
377
- dist2 = model_topics[model2_name]
378
-
379
- model2_words_value = f"""
380
- **{model1_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist1[:3])])}
381
-
382
- **{model2_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist2[:3])])}
383
- """
384
-
385
- # Add similarity metrics if available
386
- comparisons = topic_results.get("comparisons", {})
387
- if comparisons:
388
- comparison_key = f"{model1_name} vs {model2_name}"
389
-
390
- if comparison_key in comparisons:
391
- metrics = comparisons[comparison_key]
392
- js_div = metrics.get("js_divergence", 0)
393
-
394
- similarity_title_visible = True
395
- similarity_metrics_visible = True
396
- similarity_metrics_value = f"""
397
- - **Topic Distribution Divergence**: {js_div:.4f} (lower means more similar topic distributions)
398
- """
399
-
400
- # Check for Classifier analysis
401
- elif selected_analysis == "Classifier" and "classifier" in analyses:
402
- visualization_area_visible = True
403
- classifier_results = analyses["classifier"]
404
- models = classifier_results.get("models", [])
405
-
406
- if len(models) >= 2:
407
- prompt_title_visible = True
408
- prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
409
-
410
- models_compared_visible = True
411
- models_compared_value = f"### Classifier Analysis for {models[0]} and {models[1]}"
412
-
413
- # Extract and format classifier information
414
- model1_name = models[0]
415
- model2_name = models[1]
416
-
417
- # Display classifications for each model
418
- classifications = classifier_results.get("classifications", {})
419
-
420
- if classifications:
421
- model1_title_visible = True
422
- model1_title_value = f"#### Classification Results"
423
- model1_words_visible = True
424
-
425
- model1_results = classifications.get(model1_name, {})
426
- model2_results = classifications.get(model2_name, {})
427
-
428
- model1_words_value = f"""
429
- **{model1_name}**:
430
- - Formality: {model1_results.get('formality', 'N/A')}
431
- - Sentiment: {model1_results.get('sentiment', 'N/A')}
432
- - Complexity: {model1_results.get('complexity', 'N/A')}
433
-
434
- **{model2_name}**:
435
- - Formality: {model2_results.get('formality', 'N/A')}
436
- - Sentiment: {model2_results.get('sentiment', 'N/A')}
437
- - Complexity: {model2_results.get('complexity', 'N/A')}
438
- """
439
-
440
- # Show comparison
441
- model2_title_visible = True
442
- model2_title_value = f"#### Classification Comparison"
443
- model2_words_visible = True
444
-
445
- differences = classifier_results.get("differences", {})
446
- model2_words_value = "\n".join([
447
- f"- **{category}**: {diff}"
448
- for category, diff in differences.items()
449
- ])
450
-
451
  # If we don't have visualization data from any analysis
452
  if not visualization_area_visible:
453
  return (
@@ -464,7 +320,7 @@ def create_app():
464
  gr.update(visible=False),
465
  gr.update(visible=False),
466
  True,
467
- gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
468
  )
469
 
470
  # Return all updated component values
@@ -506,80 +362,61 @@ def create_app():
506
  True, # status_message_visible
507
  gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
508
  )
509
-
510
- # Add a new LLM Analysis tab
511
- with gr.Tab("LLM Analysis"):
512
- gr.Markdown("## LLM-Based Response Analysis")
513
-
514
- with gr.Row():
515
- with gr.Column():
516
- llm_analysis_type = gr.Radio(
517
- choices=["Response Quality", "Response Comparison", "Factual Accuracy"],
518
- label="Analysis Type",
519
- value="Response Comparison"
520
- )
521
-
522
- llm_model = gr.Dropdown(
523
- choices=["OpenAI GPT-4", "Anthropic Claude", "Local LLM"],
524
- label="Analysis Model",
525
- value="OpenAI GPT-4"
526
- )
527
-
528
- run_llm_analysis_btn = gr.Button("Run LLM Analysis", variant="primary")
529
-
530
- with gr.Column():
531
- llm_analysis_prompt = gr.Textbox(
532
- label="Custom Analysis Instructions (Optional)",
533
- placeholder="Enter any specific instructions for the analysis...",
534
- lines=3
535
- )
536
-
537
- llm_analysis_status = gr.Markdown("*No analysis has been run*")
538
-
539
- llm_analysis_result = gr.Markdown(visible=False)
540
-
541
- # Placeholder function for LLM analysis
542
- def run_llm_analysis(dataset, analysis_type, model, custom_prompt):
543
- if not dataset or "entries" not in dataset or not dataset["entries"]:
544
- return (
545
- gr.update(visible=True, value="❌ **Error:** No dataset loaded. Please create or load a dataset first."),
546
- gr.update(visible=False)
547
- )
548
-
549
- # Placeholder for actual implementation
550
- return (
551
- gr.update(visible=True, value="⏳ **Implementation in progress**\n\nLLM-based analysis will be available in a future update."),
552
- gr.update(visible=False)
553
- )
554
-
555
- # Connect the run button to the analysis function
556
- run_llm_analysis_btn.click(
557
- fn=run_llm_analysis,
558
- inputs=[dataset_state, llm_analysis_type, llm_model, llm_analysis_prompt],
559
- outputs=[llm_analysis_status, llm_analysis_result]
560
  )
561
 
562
- # Run analysis with proper parameters
563
- run_analysis_btn.click(
564
- fn=run_analysis,
565
- inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count],
566
- outputs=[
567
- analysis_results_state,
568
- analysis_output,
569
- visualization_area_visible,
570
- analysis_title,
571
- prompt_title,
572
- models_compared,
573
- model1_title,
574
- model1_words,
575
- model2_title,
576
- model2_words,
577
- similarity_metrics_title,
578
- similarity_metrics,
579
- status_message_visible,
580
- status_message
581
- ]
582
- )
583
 
584
  return app
585
 
@@ -588,4 +425,4 @@ if __name__ == "__main__":
588
  download_nltk_resources()
589
 
590
  app = create_app()
591
- app.launch()
 
97
  # Analysis Tab
98
  with gr.Tab("Analysis"):
99
  # Use create_analysis_screen to get UI components including visualization container
100
+ analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top = create_analysis_screen()
101
 
102
  # Pre-create visualization components (initially hidden)
103
  visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
 
122
  status_message = gr.Markdown(visible=False)
123
 
124
  # Define a helper function to extract parameter values and run the analysis
125
+ def run_analysis(dataset, selected_analyses, bow_top, ngram_n, ngram_top):
126
  try:
127
  if not dataset or "entries" not in dataset or not dataset["entries"]:
128
  return (
 
145
  parameters = {
146
  "bow_top": bow_top,
147
  "ngram_n": ngram_n,
148
+ "ngram_top": ngram_top
 
149
  }
150
+ print("Running analysis with parameters:", parameters)
 
151
 
152
+ # Process the analysis request
153
+ analysis_results, _ = process_analysis_request(dataset, selected_analyses, parameters)
154
 
155
  # If there's an error or no results
156
  if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
 
168
  gr.update(visible=False),
169
  gr.update(visible=False),
170
  True,
171
+ gr.update(visible=True, value="❌ **No results found.** Try different analysis options.")
172
  )
173
 
174
  # Extract information to display in components
 
196
  similarity_metrics_visible = False
197
  similarity_metrics_value = ""
198
 
199
+ # Check for Bag of Words analysis
200
+ if "bag_of_words" in analyses:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  visualization_area_visible = True
202
  bow_results = analyses["bag_of_words"]
203
  models = bow_results.get("models", [])
 
252
  - **Common Words**: {common_words} words appear in both responses
253
  """
254
 
255
+ # Check for N-gram analysis (if not found, we'll fallback to BOW)
256
+ if "ngram_analysis" in analyses and not visualization_area_visible:
257
  visualization_area_visible = True
258
  ngram_results = analyses["ngram_analysis"]
259
  models = ngram_results.get("models", [])
 
304
  - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
305
  """
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  # If we don't have visualization data from any analysis
308
  if not visualization_area_visible:
309
  return (
 
320
  gr.update(visible=False),
321
  gr.update(visible=False),
322
  True,
323
+ gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select at least one analysis type.")
324
  )
325
 
326
  # Return all updated component values
 
362
  True, # status_message_visible
363
  gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
364
  )
365
+
366
+ # Function to update visibility based on checkbox state
367
+ def update_visibility(viz_visible, status_visible):
368
+ return [
369
+ gr.update(visible=viz_visible), # analysis_title
370
+ gr.update(visible=viz_visible), # prompt_title
371
+ gr.update(visible=viz_visible), # models_compared
372
+ gr.update(visible=viz_visible), # model1_title
373
+ gr.update(visible=viz_visible), # model1_words
374
+ gr.update(visible=viz_visible), # model2_title
375
+ gr.update(visible=viz_visible), # model2_words
376
+ gr.update(visible=viz_visible), # similarity_metrics_title
377
+ gr.update(visible=viz_visible), # similarity_metrics
378
+ gr.update(visible=status_visible) # status_message
379
+ ]
380
+
381
+ # Connect visibility checkboxes to update function
382
+ visualization_area_visible.change(
383
+ fn=update_visibility,
384
+ inputs=[visualization_area_visible, status_message_visible],
385
+ outputs=[
386
+ analysis_title,
387
+ prompt_title,
388
+ models_compared,
389
+ model1_title,
390
+ model1_words,
391
+ model2_title,
392
+ model2_words,
393
+ similarity_metrics_title,
394
+ similarity_metrics,
395
+ status_message
396
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  )
398
 
399
+ # Run analysis with proper parameters
400
+ run_analysis_btn.click(
401
+ fn=run_analysis,
402
+ inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top],
403
+ outputs=[
404
+ analysis_results_state,
405
+ analysis_output,
406
+ visualization_area_visible,
407
+ analysis_title,
408
+ prompt_title,
409
+ models_compared,
410
+ model1_title,
411
+ model1_words,
412
+ model2_title,
413
+ model2_words,
414
+ similarity_metrics_title,
415
+ similarity_metrics,
416
+ status_message_visible,
417
+ status_message
418
+ ]
419
+ )
420
 
421
  return app
422
 
 
425
  download_nltk_resources()
426
 
427
  app = create_app()
428
+ app.launch()
processors/ngram_analysis.py CHANGED
@@ -9,22 +9,6 @@ from nltk.util import ngrams
9
  from nltk.tokenize import word_tokenize
10
  from nltk.corpus import stopwords
11
 
12
- # Helper function to flatten nested lists
13
- def flatten_list(nested_list):
14
- """
15
- Recursively flattens a nested list.
16
-
17
- Args:
18
- nested_list (list): A potentially nested list.
19
-
20
- Returns:
21
- list: A flattened list.
22
- """
23
- for item in nested_list:
24
- if isinstance(item, list):
25
- yield from flatten_list(item)
26
- else:
27
- yield item
28
 
29
  def compare_ngrams(texts, model_names, n=2, top_n=25):
30
  """
@@ -67,11 +51,8 @@ def compare_ngrams(texts, model_names, n=2, top_n=25):
67
  max_features=1000,
68
  stop_words='english'
69
  )
70
-
71
- # Ensure each text is a string, without attempting complex preprocessing
72
- processed_texts = [str(text) if not isinstance(text, str) else text for text in texts]
73
-
74
- X = vectorizer.fit_transform(processed_texts)
75
 
76
  # Get feature names (n-grams)
77
  feature_names = vectorizer.get_feature_names_out()
 
9
  from nltk.tokenize import word_tokenize
10
  from nltk.corpus import stopwords
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def compare_ngrams(texts, model_names, n=2, top_n=25):
14
  """
 
51
  max_features=1000,
52
  stop_words='english'
53
  )
54
+
55
+ X = vectorizer.fit_transform(texts)
 
 
 
56
 
57
  # Get feature names (n-grams)
58
  feature_names = vectorizer.get_feature_names_out()
processors/text_classifiers.py DELETED
@@ -1,152 +0,0 @@
1
- import nltk
2
- from nltk.sentiment import SentimentIntensityAnalyzer
3
- import statistics
4
- import re
5
-
6
- def download_nltk_resources():
7
- """Download required NLTK resources if not already downloaded"""
8
- try:
9
- nltk.download('vader_lexicon', quiet=True)
10
- except:
11
- pass
12
-
13
- # Ensure NLTK resources are available
14
- download_nltk_resources()
15
-
16
- def classify_formality(text):
17
- """
18
- Classify text formality based on simple heuristics
19
-
20
- Args:
21
- text (str): Text to analyze
22
-
23
- Returns:
24
- str: Formality level (Formal, Neutral, or Informal)
25
- """
26
- # Simple formality indicators
27
- formal_indicators = [
28
- r'\b(therefore|thus|consequently|furthermore|moreover|however)\b',
29
- r'\b(in accordance with|with respect to|regarding|concerning)\b',
30
- r'\b(shall|must|may|will be required to)\b',
31
- r'\b(it is|there are|there is)\b',
32
- r'\b(Mr\.|Ms\.|Dr\.|Prof\.)\b'
33
- ]
34
-
35
- informal_indicators = [
36
- r'\b(like|yeah|cool|awesome|gonna|wanna|gotta)\b',
37
- r'(\!{2,}|\?{2,})',
38
- r'\b(lol|haha|wow|omg|btw)\b',
39
- r'\b(don\'t|can\'t|won\'t|shouldn\'t)\b',
40
- r'(\.{3,})'
41
- ]
42
-
43
- # Calculate scores
44
- formal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in formal_indicators])
45
- informal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in informal_indicators])
46
-
47
- # Normalize by text length
48
- words = len(text.split())
49
- if words > 0:
50
- formal_score = formal_score / (words / 100) # per 100 words
51
- informal_score = informal_score / (words / 100) # per 100 words
52
-
53
- # Determine formality
54
- if formal_score > informal_score * 1.5:
55
- return "Formal"
56
- elif informal_score > formal_score * 1.5:
57
- return "Informal"
58
- else:
59
- return "Neutral"
60
-
61
- def classify_sentiment(text):
62
- """
63
- Classify text sentiment using NLTK's VADER
64
-
65
- Args:
66
- text (str): Text to analyze
67
-
68
- Returns:
69
- str: Sentiment (Positive, Neutral, or Negative)
70
- """
71
- try:
72
- sia = SentimentIntensityAnalyzer()
73
- sentiment = sia.polarity_scores(text)
74
-
75
- if sentiment['compound'] >= 0.05:
76
- return "Positive"
77
- elif sentiment['compound'] <= -0.05:
78
- return "Negative"
79
- else:
80
- return "Neutral"
81
- except:
82
- return "Neutral"
83
-
84
- def classify_complexity(text):
85
- """
86
- Classify text complexity based on sentence length and word length
87
-
88
- Args:
89
- text (str): Text to analyze
90
-
91
- Returns:
92
- str: Complexity level (Simple, Average, or Complex)
93
- """
94
- # Split into sentences
95
- sentences = nltk.sent_tokenize(text)
96
-
97
- if not sentences:
98
- return "Average"
99
-
100
- # Calculate average sentence length
101
- sentence_lengths = [len(s.split()) for s in sentences]
102
- avg_sentence_length = statistics.mean(sentence_lengths) if sentence_lengths else 0
103
-
104
- # Calculate average word length
105
- words = [word for sentence in sentences for word in nltk.word_tokenize(sentence)
106
- if word.isalnum()] # only consider alphanumeric tokens
107
-
108
- avg_word_length = statistics.mean([len(word) for word in words]) if words else 0
109
-
110
- # Determine complexity
111
- if avg_sentence_length > 20 or avg_word_length > 6:
112
- return "Complex"
113
- elif avg_sentence_length < 12 or avg_word_length < 4:
114
- return "Simple"
115
- else:
116
- return "Average"
117
-
118
- def compare_classifications(text1, text2):
119
- """
120
- Compare classifications between two texts
121
-
122
- Args:
123
- text1 (str): First text
124
- text2 (str): Second text
125
-
126
- Returns:
127
- dict: Comparison results
128
- """
129
- formality1 = classify_formality(text1)
130
- formality2 = classify_formality(text2)
131
-
132
- sentiment1 = classify_sentiment(text1)
133
- sentiment2 = classify_sentiment(text2)
134
-
135
- complexity1 = classify_complexity(text1)
136
- complexity2 = classify_complexity(text2)
137
-
138
- results = {}
139
-
140
- if formality1 != formality2:
141
- results["Formality"] = f"Model 1 is {formality1.lower()}, while Model 2 is {formality2.lower()}"
142
-
143
- if sentiment1 != sentiment2:
144
- results["Sentiment"] = f"Model 1 has a {sentiment1.lower()} tone, while Model 2 has a {sentiment2.lower()} tone"
145
-
146
- if complexity1 != complexity2:
147
- results["Complexity"] = f"Model 1 uses {complexity1.lower()} language, while Model 2 uses {complexity2.lower()} language"
148
-
149
- if not results:
150
- results["Summary"] = "Both responses have similar writing characteristics"
151
-
152
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processors/topic_modeling.py CHANGED
@@ -83,12 +83,10 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
83
  # Create document-term matrix
84
  if method == "nmf":
85
  # For NMF, use TF-IDF vectorization
86
- # Adjust min_df and max_df for small document sets
87
- vectorizer = TfidfVectorizer(max_features=1000, min_df=1, max_df=1.0)
88
  else:
89
  # For LDA, use CountVectorizer
90
- # Adjust min_df and max_df for small document sets
91
- vectorizer = CountVectorizer(max_features=1000, min_df=1, max_df=1.0)
92
 
93
  X = vectorizer.fit_transform(preprocessed_texts)
94
  feature_names = vectorizer.get_feature_names_out()
@@ -117,65 +115,94 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
117
 
118
  return result
119
 
120
- def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
121
  """
122
- Compare topics between two sets of texts
123
 
124
  Args:
125
- texts_set_1 (list): First list of text documents
126
- texts_set_2 (list): Second list of text documents
127
  n_topics (int): Number of topics to extract
128
  n_top_words (int): Number of top words per topic
129
  method (str): Topic modeling method ('lda' or 'nmf')
130
- model_names (list, optional): Names of the models being compared
131
 
132
  Returns:
133
- dict: Comparison results with topics from both sets and similarity metrics
134
  """
135
- # Set default model names if not provided
136
- if model_names is None:
137
- model_names = ["Model 1", "Model 2"]
138
-
139
- # Extract topics for each set
140
- topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
141
- topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
142
-
143
- # Calculate similarity between topics
144
- similarity_matrix = []
145
- for topic1 in topics_set_1["topics"]:
146
- topic_similarities = []
147
- words1 = set(topic1["words"])
148
- for topic2 in topics_set_2["topics"]:
149
- words2 = set(topic2["words"])
150
- # Jaccard similarity: intersection over union
151
- intersection = len(words1.intersection(words2))
152
- union = len(words1.union(words2))
153
- similarity = intersection / union if union > 0 else 0
154
- topic_similarities.append(similarity)
155
- similarity_matrix.append(topic_similarities)
156
-
157
- # Find the best matching topic pairs
158
- matched_topics = []
159
- for i, similarities in enumerate(similarity_matrix):
160
- best_match_idx = np.argmax(similarities)
161
- matched_topics.append({
162
- "set1_topic_id": i,
163
- "set1_topic_words": topics_set_1["topics"][i]["words"],
164
- "set2_topic_id": best_match_idx,
165
- "set2_topic_words": topics_set_2["topics"][best_match_idx]["words"],
166
- "similarity": similarities[best_match_idx]
167
- })
168
-
169
- # Construct result
170
  result = {
 
171
  "method": method,
172
  "n_topics": n_topics,
173
- "set1_topics": topics_set_1["topics"],
174
- "set2_topics": topics_set_2["topics"],
175
- "similarity_matrix": similarity_matrix,
176
- "matched_topics": matched_topics,
177
- "average_similarity": np.mean([match["similarity"] for match in matched_topics]),
178
- "models": model_names # Add model names to result
179
  }
180
 
181
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  # Create document-term matrix
84
  if method == "nmf":
85
  # For NMF, use TF-IDF vectorization
86
+ vectorizer = TfidfVectorizer(max_features=1000, min_df=2, max_df=0.85)
 
87
  else:
88
  # For LDA, use CountVectorizer
89
+ vectorizer = CountVectorizer(max_features=1000, min_df=2, max_df=0.85)
 
90
 
91
  X = vectorizer.fit_transform(preprocessed_texts)
92
  feature_names = vectorizer.get_feature_names_out()
 
115
 
116
  return result
117
 
118
+ def compare_topics(response_texts, model_names, n_topics=3, n_top_words=10, method="lda"):
119
  """
120
+ Compare topic distributions between different model responses
121
 
122
  Args:
123
+ response_texts (list): List of response texts to compare
124
+ model_names (list): Names of models corresponding to responses
125
  n_topics (int): Number of topics to extract
126
  n_top_words (int): Number of top words per topic
127
  method (str): Topic modeling method ('lda' or 'nmf')
 
128
 
129
  Returns:
130
+ dict: Comparative topic analysis
131
  """
132
+ # Initialize results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  result = {
134
+ "models": model_names,
135
  "method": method,
136
  "n_topics": n_topics,
137
+ "topics": [],
138
+ "model_topics": {},
139
+ "comparisons": {}
 
 
 
140
  }
141
 
142
+ # Extract topics
143
+ topic_model = extract_topics(response_texts, n_topics, n_top_words, method)
144
+ result["topics"] = topic_model["topics"]
145
+
146
+ # Map topic distributions to models
147
+ for i, model_name in enumerate(model_names):
148
+ if i < len(topic_model["document_topics"]):
149
+ result["model_topics"][model_name] = topic_model["document_topics"][i]["distribution"]
150
+
151
+ # Calculate topic distribution differences for pairs of models
152
+ if len(model_names) >= 2:
153
+ for i in range(len(model_names)):
154
+ for j in range(i+1, len(model_names)):
155
+ model1, model2 = model_names[i], model_names[j]
156
+
157
+ # Get topic distributions
158
+ dist1 = result["model_topics"].get(model1, [])
159
+ dist2 = result["model_topics"].get(model2, [])
160
+
161
+ # Skip if distributions are not available
162
+ if not dist1 or not dist2 or len(dist1) != len(dist2):
163
+ continue
164
+
165
+ # Calculate Jensen-Shannon divergence (approximation using average of KL divergences)
166
+ dist1 = np.array(dist1)
167
+ dist2 = np.array(dist2)
168
+
169
+ # Add small epsilon to avoid division by zero
170
+ epsilon = 1e-10
171
+ dist1 = dist1 + epsilon
172
+ dist2 = dist2 + epsilon
173
+
174
+ # Normalize
175
+ dist1 = dist1 / np.sum(dist1)
176
+ dist2 = dist2 / np.sum(dist2)
177
+
178
+ # Calculate average distribution
179
+ avg_dist = (dist1 + dist2) / 2
180
+
181
+ # Calculate KL divergences
182
+ kl_div1 = np.sum(dist1 * np.log(dist1 / avg_dist))
183
+ kl_div2 = np.sum(dist2 * np.log(dist2 / avg_dist))
184
+
185
+ # Jensen-Shannon divergence
186
+ js_div = (kl_div1 + kl_div2) / 2
187
+
188
+ # Topic-wise differences
189
+ topic_diffs = []
190
+ for t in range(len(dist1)):
191
+ topic_diffs.append({
192
+ "topic_id": t,
193
+ "model1_weight": float(dist1[t]),
194
+ "model2_weight": float(dist2[t]),
195
+ "diff": float(abs(dist1[t] - dist2[t]))
196
+ })
197
+
198
+ # Sort by difference
199
+ topic_diffs.sort(key=lambda x: x["diff"], reverse=True)
200
+
201
+ # Store comparison
202
+ comparison_key = f"{model1} vs {model2}"
203
+ result["comparisons"][comparison_key] = {
204
+ "js_divergence": float(js_div),
205
+ "topic_differences": topic_diffs
206
+ }
207
+
208
+ return result
ui/analysis_screen.py CHANGED
@@ -3,35 +3,40 @@ import json
3
  from visualization.bow_visualizer import process_and_visualize_analysis
4
 
5
  # Import analysis modules
6
- from processors.topic_modeling import compare_topics
 
7
  from processors.ngram_analysis import compare_ngrams
 
8
  from processors.bow_analysis import compare_bow
9
- from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
 
10
 
11
  def create_analysis_screen():
12
  """
13
  Create the analysis options screen
14
 
15
  Returns:
16
- tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count)
17
  """
18
  with gr.Column() as analysis_screen:
19
  gr.Markdown("## Analysis Options")
20
- gr.Markdown("Select which analysis you want to run on the LLM responses.")
21
 
22
- # Change from CheckboxGroup to Radio for analysis selection
23
  with gr.Group():
24
- analysis_options = gr.Radio(
25
  choices=[
26
- "Bag of Words",
27
- "N-gram Analysis",
28
  "Topic Modeling",
 
29
  "Bias Detection",
30
- "Classifier"
31
- # Removed "LLM Analysis" as requested
 
32
  ],
33
- value="Bag of Words", # Default selection
34
- label="Select Analysis Type"
 
 
35
  )
36
 
37
  # Create slider directly here for easier access
@@ -54,19 +59,13 @@ def create_analysis_screen():
54
  visible=False
55
  )
56
 
57
- # Create topic modeling parameter accessible at top level
58
- topic_count = gr.Slider(
59
- minimum=2, maximum=10, value=3, step=1,
60
- label="Number of Topics",
61
- visible=False
62
- )
63
-
64
  # Parameters for each analysis type
65
  with gr.Group() as analysis_params:
66
  # Topic modeling parameters
67
  with gr.Group(visible=False) as topic_params:
68
  gr.Markdown("### Topic Modeling Parameters")
69
- # We'll use the topic_count defined above
 
70
 
71
  # N-gram parameters group (using external ngram_n and ngram_top)
72
  with gr.Group(visible=False) as ngram_params:
@@ -82,38 +81,32 @@ def create_analysis_screen():
82
  label="Bias Detection Methods"
83
  )
84
 
85
- # Classifier parameters
86
- with gr.Group(visible=False) as classifier_params:
87
- gr.Markdown("### Classifier Parameters")
88
- gr.Markdown("Classifies responses based on formality, sentiment, and complexity")
 
 
 
 
89
 
90
- # Function to update parameter visibility based on selected analysis
91
  def update_params_visibility(selected):
 
92
  return {
93
- topic_params: gr.update(visible=selected == "Topic Modeling"),
94
- ngram_params: gr.update(visible=selected == "N-gram Analysis"),
95
- bias_params: gr.update(visible=selected == "Bias Detection"),
96
- classifier_params: gr.update(visible=selected == "Classifier"),
97
- ngram_n: gr.update(visible=selected == "N-gram Analysis"),
98
- ngram_top: gr.update(visible=selected == "N-gram Analysis"),
99
- topic_count: gr.update(visible=selected == "Topic Modeling"),
100
- bow_top_slider: gr.update(visible=selected == "Bag of Words")
101
  }
102
 
103
  # Set up event handler for analysis selection
104
  analysis_options.change(
105
  fn=update_params_visibility,
106
  inputs=[analysis_options],
107
- outputs=[
108
- topic_params,
109
- ngram_params,
110
- bias_params,
111
- classifier_params,
112
- ngram_n,
113
- ngram_top,
114
- topic_count,
115
- bow_top_slider
116
- ]
117
  )
118
 
119
  # Run analysis button
@@ -123,322 +116,54 @@ def create_analysis_screen():
123
  analysis_output = gr.JSON(label="Analysis Results", visible=False)
124
 
125
  # Return the components needed by app.py
126
- return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count
127
 
128
- # Add the implementation of these helper functions
129
- def extract_important_words(text, top_n=20):
130
  """
131
- Extract the most important words from a text.
132
-
133
- Args:
134
- text (str): Input text
135
- top_n (int): Number of top words to return
136
-
137
- Returns:
138
- list: List of important words with their counts
139
  """
140
- # Import necessary modules
141
- from collections import Counter
142
- import re
143
- import nltk
144
- from nltk.corpus import stopwords
145
- from nltk.tokenize import word_tokenize
146
-
147
- # Make sure nltk resources are available
148
- try:
149
- stop_words = set(stopwords.words('english'))
150
- except:
151
- nltk.download('stopwords')
152
- stop_words = set(stopwords.words('english'))
153
-
154
  try:
155
- tokens = word_tokenize(text.lower())
156
- except:
157
- nltk.download('punkt')
158
- tokens = word_tokenize(text.lower())
159
-
160
- # Remove stopwords and non-alphabetic tokens
161
- filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words and len(word) > 2]
162
-
163
- # Count word frequencies
164
- word_counts = Counter(filtered_tokens)
165
-
166
- # Get the top N words
167
- top_words = word_counts.most_common(top_n)
168
-
169
- # Format the result
170
- result = [{"word": word, "count": count} for word, count in top_words]
171
-
172
- return result
173
 
174
- def calculate_text_similarity(text1, text2):
175
- """
176
- Calculate similarity metrics between two texts.
177
-
178
- Args:
179
- text1 (str): First text
180
- text2 (str): Second text
181
-
182
- Returns:
183
- dict: Similarity metrics
184
- """
185
- from processors.metrics import calculate_similarity
186
-
187
- # Calculate similarity using the metrics module
188
- metrics = calculate_similarity(text1, text2)
189
-
190
- # Add common word count
191
- from collections import Counter
192
- import nltk
193
- from nltk.corpus import stopwords
194
-
195
- # Make sure nltk resources are available
196
- try:
197
- stop_words = set(stopwords.words('english'))
198
- except:
199
- nltk.download('stopwords')
200
- stop_words = set(stopwords.words('english'))
201
-
202
- # Simple tokenization and filtering
203
- words1 = set([w.lower() for w in nltk.word_tokenize(text1)
204
- if w.isalpha() and w.lower() not in stop_words])
205
- words2 = set([w.lower() for w in nltk.word_tokenize(text2)
206
- if w.isalpha() and w.lower() not in stop_words])
207
-
208
- # Calculate common words
209
- common_words = words1.intersection(words2)
210
-
211
- # Add to metrics
212
- metrics["common_word_count"] = len(common_words)
213
-
214
- return metrics
215
 
216
- def extract_ngrams(text, n=2, top_n=10):
217
- """
218
- Extract the most common n-grams from text.
219
-
220
- Args:
221
- text (str): Input text
222
- n (int or str): Size of n-grams
223
- top_n (int): Number of top n-grams to return
224
-
225
- Returns:
226
- list: List of important n-grams with their counts
227
- """
228
- import nltk
229
- from nltk.util import ngrams
230
- from collections import Counter
231
-
232
- # Convert n to int if it's a string
233
- if isinstance(n, str):
234
- n = int(n)
235
-
236
- # Make sure nltk resources are available
237
- try:
238
- tokens = nltk.word_tokenize(text.lower())
239
- except:
240
- nltk.download('punkt')
241
- tokens = nltk.word_tokenize(text.lower())
242
-
243
- # Generate n-grams
244
- n_grams = list(ngrams(tokens, n))
245
-
246
- # Convert n-grams to strings for easier handling
247
- n_gram_strings = [' '.join(gram) for gram in n_grams]
248
-
249
- # Count n-gram frequencies
250
- n_gram_counts = Counter(n_gram_strings)
251
-
252
- # Get the top N n-grams
253
- top_n_grams = n_gram_counts.most_common(top_n)
254
-
255
- # Format the result
256
- result = [{"ngram": ngram, "count": count} for ngram, count in top_n_grams]
257
-
258
- return result
259
 
260
- def compare_ngrams(text1, text2, n=2):
261
- """
262
- Compare n-grams between two texts.
263
-
264
- Args:
265
- text1 (str or list): First text
266
- text2 (str or list): Second text
267
- n (int or str): Size of n-grams
268
-
269
- Returns:
270
- dict: Comparison metrics
271
- """
272
- import nltk
273
- from nltk.util import ngrams
274
- from collections import Counter
275
-
276
- # Convert n to int if it's a string
277
- if isinstance(n, str):
278
- n = int(n)
279
-
280
- # Handle list inputs by converting to strings
281
- if isinstance(text1, list):
282
- text1 = ' '.join(str(item) for item in text1)
283
- if isinstance(text2, list):
284
- text2 = ' '.join(str(item) for item in text2)
285
-
286
- # Make sure nltk resources are available
287
- try:
288
- tokens1 = nltk.word_tokenize(text1.lower())
289
- tokens2 = nltk.word_tokenize(text2.lower())
290
- except:
291
- nltk.download('punkt')
292
- tokens1 = nltk.word_tokenize(text1.lower())
293
- tokens2 = nltk.word_tokenize(text2.lower())
294
-
295
- # Generate n-grams
296
- n_grams1 = set([' '.join(gram) for gram in ngrams(tokens1, n)])
297
- n_grams2 = set([' '.join(gram) for gram in ngrams(tokens2, n)])
298
-
299
- # Calculate common n-grams
300
- common_n_grams = n_grams1.intersection(n_grams2)
301
-
302
- # Return comparison metrics
303
- return {
304
- "common_ngram_count": len(common_n_grams)
305
- }
306
 
307
- def perform_topic_modeling(texts, model_names, n_topics=3):
308
- """
309
- Perform topic modeling on a list of texts.
310
-
311
- Args:
312
- texts (list): List of text documents
313
- model_names (list): Names of the models
314
- n_topics (int): Number of topics to extract
315
-
316
- Returns:
317
- dict: Topic modeling results
318
- """
319
- from processors.topic_modeling import compare_topics
320
-
321
- # Use the topic modeling processor
322
- result = compare_topics(texts, model_names, n_topics=n_topics)
323
-
324
- return result
325
 
326
- # Process analysis request function
327
- def process_analysis_request(dataset, selected_analysis, parameters):
328
- """
329
- Process the analysis request based on the selected options.
330
-
331
- Args:
332
- dataset (dict): The input dataset
333
- selected_analysis (str): The selected analysis type
334
- parameters (dict): Additional parameters for the analysis
335
-
336
- Returns:
337
- tuple: A tuple containing (analysis_results, visualization_data)
338
- """
339
- if not dataset or "entries" not in dataset or not dataset["entries"]:
340
- return {}, None
341
-
342
- # Initialize the results structure
343
- results = {"analyses": {}}
344
-
345
- # Get the prompt text from the first entry
346
- prompt_text = dataset["entries"][0].get("prompt", "")
347
- if not prompt_text:
348
- return {"error": "No prompt found in dataset"}, None
349
-
350
- # Initialize the analysis container for this prompt
351
- results["analyses"][prompt_text] = {}
352
-
353
- # Get model names and responses
354
- model1_name = dataset["entries"][0].get("model", "Model 1")
355
- model2_name = dataset["entries"][1].get("model", "Model 2")
356
-
357
- model1_response = dataset["entries"][0].get("response", "")
358
- model2_response = dataset["entries"][1].get("response", "")
359
-
360
- # Process based on the selected analysis type
361
- if selected_analysis == "Bag of Words":
362
- # Get the top_n parameter and ensure it's an integer
363
- top_n = parameters.get("bow_top", 25)
364
- if isinstance(top_n, str):
365
- top_n = int(top_n)
366
-
367
- print(f"Using top_n value: {top_n}") # Debug print
368
-
369
- # Perform Bag of Words analysis using the processor
370
- bow_results = compare_bow(
371
- [model1_response, model2_response],
372
- [model1_name, model2_name],
373
- top_n=top_n
374
- )
375
- results["analyses"][prompt_text]["bag_of_words"] = bow_results
376
-
377
- elif selected_analysis == "N-gram Analysis":
378
- # Perform N-gram analysis
379
- ngram_size = parameters.get("ngram_n", 2)
380
- if isinstance(ngram_size, str):
381
- ngram_size = int(ngram_size)
382
-
383
- top_n = parameters.get("ngram_top", 15)
384
- if isinstance(top_n, str):
385
- top_n = int(top_n)
386
-
387
- # Use the processor from the dedicated ngram_analysis module
388
- from processors.ngram_analysis import compare_ngrams as ngram_processor
389
- ngram_results = ngram_processor(
390
- [model1_response, model2_response],
391
- [model1_name, model2_name],
392
- n=ngram_size,
393
- top_n=top_n
394
- )
395
- results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
396
-
397
- elif selected_analysis == "Topic Modeling":
398
- # Perform topic modeling analysis
399
- topic_count = parameters.get("topic_count", 3)
400
- if isinstance(topic_count, str):
401
- topic_count = int(topic_count)
402
-
403
- try:
404
- topic_results = compare_topics(
405
- texts_set_1=[model1_response],
406
- texts_set_2=[model2_response],
407
- n_topics=topic_count,
408
- model_names=[model1_name, model2_name])
409
-
410
- results["analyses"][prompt_text]["topic_modeling"] = topic_results
411
- except Exception as e:
412
- import traceback
413
- print(f"Topic modeling error: {str(e)}\n{traceback.format_exc()}")
414
- results["analyses"][prompt_text]["topic_modeling"] = {
415
- "models": [model1_name, model2_name],
416
- "error": str(e),
417
- "message": "Topic modeling failed. Try with longer text or different parameters."
418
- }
419
-
420
- elif selected_analysis == "Classifier":
421
- # Perform classifier analysis
422
- results["analyses"][prompt_text]["classifier"] = {
423
- "models": [model1_name, model2_name],
424
- "classifications": {
425
- model1_name: {
426
- "formality": classify_formality(model1_response),
427
- "sentiment": classify_sentiment(model1_response),
428
- "complexity": classify_complexity(model1_response)
429
- },
430
- model2_name: {
431
- "formality": classify_formality(model2_response),
432
- "sentiment": classify_sentiment(model2_response),
433
- "complexity": classify_complexity(model2_response)
434
- }
435
- },
436
- "differences": compare_classifications(model1_response, model2_response)
437
- }
438
-
439
- else:
440
- # Unknown analysis type
441
- results["analyses"][prompt_text]["message"] = "Please select a valid analysis type."
442
-
443
- # Return both the analysis results and a placeholder for visualization data
444
- return results, None
 
3
  from visualization.bow_visualizer import process_and_visualize_analysis
4
 
5
  # Import analysis modules
6
+ # Uncomment these when implemented
7
+ # from processors.topic_modeling import extract_topics, compare_topics
8
  from processors.ngram_analysis import compare_ngrams
9
+ # from processors.bias_detection import compare_bias
10
  from processors.bow_analysis import compare_bow
11
+ # from processors.metrics import calculate_similarity
12
+ # from processors.diff_highlighter import highlight_differences
13
 
14
  def create_analysis_screen():
15
  """
16
  Create the analysis options screen
17
 
18
  Returns:
19
+ tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top)
20
  """
21
  with gr.Column() as analysis_screen:
22
  gr.Markdown("## Analysis Options")
23
+ gr.Markdown("Select which analyses you want to run on the LLM responses.")
24
 
25
+ # Analysis selection
26
  with gr.Group():
27
+ analysis_options = gr.CheckboxGroup(
28
  choices=[
 
 
29
  "Topic Modeling",
30
+ "N-gram Analysis",
31
  "Bias Detection",
32
+ "Bag of Words",
33
+ "Similarity Metrics",
34
+ "Difference Highlighting"
35
  ],
36
+ value=[
37
+ "Bag of Words",
38
+ ],
39
+ label="Select Analyses to Run"
40
  )
41
 
42
  # Create slider directly here for easier access
 
59
  visible=False
60
  )
61
 
 
 
 
 
 
 
 
62
  # Parameters for each analysis type
63
  with gr.Group() as analysis_params:
64
  # Topic modeling parameters
65
  with gr.Group(visible=False) as topic_params:
66
  gr.Markdown("### Topic Modeling Parameters")
67
+ topic_count = gr.Slider(minimum=2, maximum=10, value=3, step=1,
68
+ label="Number of Topics")
69
 
70
  # N-gram parameters group (using external ngram_n and ngram_top)
71
  with gr.Group(visible=False) as ngram_params:
 
81
  label="Bias Detection Methods"
82
  )
83
 
84
+ # Similarity metrics parameters
85
+ with gr.Group(visible=False) as similarity_params:
86
+ gr.Markdown("### Similarity Metrics Parameters")
87
+ similarity_metrics = gr.CheckboxGroup(
88
+ choices=["Cosine Similarity", "Jaccard Similarity", "Semantic Similarity"],
89
+ value=["Cosine Similarity", "Semantic Similarity"],
90
+ label="Similarity Metrics to Calculate"
91
+ )
92
 
93
+ # Function to update parameter visibility based on selected analyses
94
  def update_params_visibility(selected):
95
+ ngram_visible = "N-gram Analysis" in selected
96
  return {
97
+ topic_params: gr.update(visible="Topic Modeling" in selected),
98
+ ngram_params: gr.update(visible=ngram_visible),
99
+ bias_params: gr.update(visible="Bias Detection" in selected),
100
+ similarity_params: gr.update(visible="Similarity Metrics" in selected),
101
+ ngram_n: gr.update(visible=ngram_visible),
102
+ ngram_top: gr.update(visible=ngram_visible)
 
 
103
  }
104
 
105
  # Set up event handler for analysis selection
106
  analysis_options.change(
107
  fn=update_params_visibility,
108
  inputs=[analysis_options],
109
+ outputs=[topic_params, ngram_params, bias_params, similarity_params, ngram_n, ngram_top]
 
 
 
 
 
 
 
 
 
110
  )
111
 
112
  # Run analysis button
 
116
  analysis_output = gr.JSON(label="Analysis Results", visible=False)
117
 
118
  # Return the components needed by app.py
119
+ return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top
120
 
121
+ # Process analysis request function
122
+ def process_analysis_request(dataset, selected_analyses, parameters):
123
  """
124
+ Process the analysis request and run selected analyses
 
 
 
 
 
 
 
125
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  try:
127
+ print(f"Processing analysis request with: {selected_analyses}")
128
+ print(f"Parameters: {parameters}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ if not dataset or "entries" not in dataset or not dataset["entries"]:
131
+ return {}, gr.update(visible=True,
132
+ value=json.dumps({"error": "No dataset provided or dataset is empty"}, indent=2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
+ analysis_results = {"analyses": {}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ # Extract prompt and responses
137
+ prompt = dataset["entries"][0]["prompt"]
138
+ response_texts = [entry["response"] for entry in dataset["entries"]]
139
+ model_names = [entry["model"] for entry in dataset["entries"]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
+ print(f"Analyzing prompt: '{prompt[:50]}...'")
142
+ print(f"Models: {model_names}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ analysis_results["analyses"][prompt] = {}
145
+
146
+ # Run Bag of Words analysis if selected
147
+ if "Bag of Words" in selected_analyses:
148
+ top_words = parameters.get("bow_top", 25)
149
+ print(f"Running BOW analysis with top_words={top_words}")
150
+ bow_results = compare_bow(response_texts, model_names, top_words)
151
+ analysis_results["analyses"][prompt]["bag_of_words"] = bow_results
152
+
153
+ # Run N-gram analysis if selected
154
+ if "N-gram Analysis" in selected_analyses:
155
+ ngram_n = int(parameters.get("ngram_n", "2"))
156
+ ngram_top = parameters.get("ngram_top", 10)
157
+ print(f"Running N-gram analysis with n={ngram_n}, top_n={ngram_top}")
158
+ ngram_results = compare_ngrams(response_texts, model_names, ngram_n, ngram_top)
159
+ analysis_results["analyses"][prompt]["ngram_analysis"] = ngram_results
160
+
161
+ print("Analysis complete - results:", analysis_results)
162
+
163
+ # Return results and update the output component
164
+ return analysis_results, gr.update(visible=False, value=analysis_results) # Hide the raw JSON
165
+ except Exception as e:
166
+ import traceback
167
+ error_msg = f"Analysis error: {str(e)}\n{traceback.format_exc()}"
168
+ print(error_msg)
169
+ return {}, gr.update(visible=True, value=json.dumps({"error": error_msg}, indent=2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
visualization/bow_visualizer.py CHANGED
@@ -8,7 +8,6 @@ import pandas as pd
8
  from difflib import SequenceMatcher
9
 
10
  from visualization.ngram_visualizer import create_ngram_visualization
11
- from visualization.topic_visualizer import process_and_visualize_topic_analysis # Added import
12
 
13
  def create_bow_visualization(analysis_results):
14
  """
@@ -204,14 +203,6 @@ def process_and_visualize_analysis(analysis_results):
204
  ngram_components = create_ngram_visualization(
205
  {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}})
206
  components.extend(ngram_components)
207
-
208
- # Check for Topic Modeling analysis
209
- if "topic_modeling" in analyses:
210
- print("Processing Topic Modeling visualization")
211
- # Use the dedicated topic visualization function
212
- topic_components = process_and_visualize_topic_analysis(
213
- {"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}})
214
- components.extend(topic_components)
215
 
216
  if not components:
217
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))
 
8
  from difflib import SequenceMatcher
9
 
10
  from visualization.ngram_visualizer import create_ngram_visualization
 
11
 
12
  def create_bow_visualization(analysis_results):
13
  """
 
203
  ngram_components = create_ngram_visualization(
204
  {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}})
205
  components.extend(ngram_components)
 
 
 
 
 
 
 
 
206
 
207
  if not components:
208
  components.append(gr.Markdown("No visualization components could be created from the analysis results."))