Ryan commited on
Commit
f533950
·
1 Parent(s): 80636f0
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -1,12 +1,27 @@
1
  import gradio as gr
2
  from ui.dataset_input import create_dataset_input, load_example_dataset
3
- from ui.analysis_screen import create_analysis_screen, process_analysis_request
4
  from visualization.bow_visualizer import process_and_visualize_analysis
5
  import nltk
6
  import os
7
- import json
 
 
8
 
9
- # Download necessary NLTK resources function remains unchanged
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def download_nltk_resources():
11
  """Download required NLTK resources if not already downloaded"""
12
  try:
@@ -33,25 +48,25 @@ def download_nltk_resources():
33
  for location in locations:
34
  try:
35
  nltk.data.find(location)
36
- print(f"Resource {resource} already downloaded")
37
  found = True
38
  break
39
  except LookupError:
40
  continue
41
 
42
  if not found:
43
- print(f"Downloading {resource}...")
44
  nltk.download(resource, quiet=True)
45
  except Exception as e:
46
- print(f"Error with resource {resource}: {e}")
47
 
48
- print("NLTK resources check completed")
49
  except Exception as e:
50
- print(f"Error downloading NLTK resources: {e}")
51
 
52
  def create_app():
53
  """
54
- Create a streamlined Gradio app for dataset input and Bag of Words analysis.
55
 
56
  Returns:
57
  gr.Blocks: The Gradio application
@@ -97,7 +112,7 @@ def create_app():
97
  # Analysis Tab
98
  with gr.Tab("Analysis"):
99
  # Use create_analysis_screen to get UI components including visualization container
100
- analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count, bias_methods = create_analysis_screen()
101
 
102
  # Pre-create visualization components (initially hidden)
103
  visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
@@ -122,7 +137,7 @@ def create_app():
122
  status_message = gr.Markdown(visible=False)
123
 
124
  # Define a helper function to extract parameter values and run the analysis
125
- def run_analysis(dataset, selected_analysis, bow_top, ngram_n, ngram_top, topic_count, bias_methods):
126
  try:
127
  if not dataset or "entries" not in dataset or not dataset["entries"]:
128
  return (
@@ -146,11 +161,10 @@ def create_app():
146
  "bow_top": bow_top,
147
  "ngram_n": ngram_n,
148
  "ngram_top": ngram_top,
149
- "topic_count": topic_count,
150
- "bias_methods": bias_methods # Add this parameter
151
  }
152
- print(f"Running analysis with selected type: {selected_analysis}")
153
- print("Parameters:", parameters)
154
 
155
  # Process the analysis request - passing selected_analysis as a string
156
  analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
@@ -326,10 +340,30 @@ def create_app():
326
  - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
327
  """
328
 
329
- # Check for Topic Modeling analysis
330
  elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
331
  visualization_area_visible = True
332
  topic_results = analyses["topic_modeling"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  models = topic_results.get("models", [])
334
  method = topic_results.get("method", "lda").upper()
335
  n_topics = topic_results.get("n_topics", 3)
@@ -347,7 +381,7 @@ def create_app():
347
  if topics:
348
  # Format topic info for display
349
  topic_info = []
350
- for topic in topics[:3]: # Show first 3 topics
351
  topic_id = topic.get("id", 0)
352
  words = topic.get("words", [])[:5] # Top 5 words per topic
353
 
@@ -378,9 +412,9 @@ def create_app():
378
  dist2 = model_topics[model2_name]
379
 
380
  model2_words_value = f"""
381
- **{model1_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist1[:3])])}
382
 
383
- **{model2_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist2[:3])])}
384
  """
385
 
386
  # Add similarity metrics if available
@@ -392,10 +426,23 @@ def create_app():
392
  metrics = comparisons[comparison_key]
393
  js_div = metrics.get("js_divergence", 0)
394
 
 
 
 
 
 
 
 
 
 
 
 
395
  similarity_title_visible = True
396
  similarity_metrics_visible = True
397
  similarity_metrics_value = f"""
398
- - **Topic Distribution Divergence**: {js_div:.4f} (lower means more similar topic distributions)
 
 
399
  """
400
 
401
  # Check for Classifier analysis
@@ -448,101 +495,6 @@ def create_app():
448
  f"- **{category}**: {diff}"
449
  for category, diff in differences.items()
450
  ])
451
-
452
- # Check for Bias Detection analysis
453
- elif selected_analysis == "Bias Detection" and "bias_detection" in analyses:
454
- visualization_area_visible = True
455
- bias_results = analyses["bias_detection"]
456
- models = bias_results.get("models", [])
457
-
458
- if len(models) >= 2:
459
- prompt_title_visible = True
460
- prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
461
-
462
- models_compared_visible = True
463
- models_compared_value = f"### Bias Analysis: Comparing responses from {models[0]} and {models[1]}"
464
-
465
- # Display comparative bias results
466
- model1_name = models[0]
467
- model2_name = models[1]
468
-
469
- if "comparative" in bias_results:
470
- comparative = bias_results["comparative"]
471
-
472
- # Format summary for display
473
- model1_title_visible = True
474
- model1_title_value = "#### Bias Detection Summary"
475
- model1_words_visible = True
476
-
477
- summary_parts = []
478
-
479
- # Add sentiment comparison
480
- if "sentiment" in comparative:
481
- sent = comparative["sentiment"]
482
- is_significant = sent.get("significant", False)
483
- summary_parts.append(
484
- f"**Sentiment Bias**: {model1_name} shows {sent.get(model1_name, 'N/A')} sentiment, " +
485
- f"while {model2_name} shows {sent.get(model2_name, 'N/A')} sentiment. " +
486
- f"({'Significant' if is_significant else 'Minor'} difference)"
487
- )
488
-
489
- # Add partisan comparison
490
- if "partisan" in comparative:
491
- part = comparative["partisan"]
492
- is_significant = part.get("significant", False)
493
- summary_parts.append(
494
- f"**Partisan Leaning**: {model1_name} appears {part.get(model1_name, 'N/A')}, " +
495
- f"while {model2_name} appears {part.get(model2_name, 'N/A')}. " +
496
- f"({'Significant' if is_significant else 'Minor'} difference)"
497
- )
498
-
499
- # Add framing comparison
500
- if "framing" in comparative:
501
- frame = comparative["framing"]
502
- different_frames = frame.get("different_frames", False)
503
- m1_frame = frame.get(model1_name, "N/A").replace('_', ' ').title()
504
- m2_frame = frame.get(model2_name, "N/A").replace('_', ' ').title()
505
- summary_parts.append(
506
- f"**Issue Framing**: {model1_name} primarily frames issues in {m1_frame} terms, " +
507
- f"while {model2_name} uses {m2_frame} framing. " +
508
- f"({'Different' if different_frames else 'Similar'} approaches)"
509
- )
510
-
511
- # Add overall assessment
512
- if "overall" in comparative:
513
- overall = comparative["overall"]
514
- significant = overall.get("significant_bias_difference", False)
515
- summary_parts.append(
516
- f"**Overall Assessment**: " +
517
- f"Analysis shows a {overall.get('difference', 0):.2f}/1.0 difference in bias patterns. " +
518
- f"({'Significant' if significant else 'Minor'} overall bias difference)"
519
- )
520
-
521
- # Combine all parts
522
- model1_words_value = "\n\n".join(summary_parts)
523
-
524
- # Format detailed term analysis
525
- if (model1_name in bias_results and "partisan" in bias_results[model1_name] and
526
- model2_name in bias_results and "partisan" in bias_results[model2_name]):
527
-
528
- model2_title_visible = True
529
- model2_title_value = "#### Partisan Term Analysis"
530
- model2_words_visible = True
531
-
532
- m1_lib = bias_results[model1_name]["partisan"].get("liberal_terms", [])
533
- m1_con = bias_results[model1_name]["partisan"].get("conservative_terms", [])
534
- m2_lib = bias_results[model2_name]["partisan"].get("liberal_terms", [])
535
- m2_con = bias_results[model2_name]["partisan"].get("conservative_terms", [])
536
-
537
- model2_words_value = f"""
538
- **{model1_name}**:
539
- - Liberal terms: {', '.join(m1_lib) if m1_lib else 'None detected'}
540
- - Conservative terms: {', '.join(m1_con) if m1_con else 'None detected'}
541
-
542
- **{model2_name}**:
543
- - Liberal terms: {', '.join(m2_lib) if m2_lib else 'None detected'}
544
- - Conservative terms: {', '.join(m2_con) if m2_con else 'None detected'}
545
- """
546
 
547
  # If we don't have visualization data from any analysis
548
  if not visualization_area_visible:
@@ -584,7 +536,7 @@ def create_app():
584
  except Exception as e:
585
  import traceback
586
  error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
587
- print(error_msg)
588
 
589
  return (
590
  {"error": error_msg}, # analysis_results_state
@@ -602,80 +554,28 @@ def create_app():
602
  True, # status_message_visible
603
  gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
604
  )
605
-
606
- # Add a new LLM Analysis tab
607
- with gr.Tab("LLM Analysis"):
608
- gr.Markdown("## LLM-Based Response Analysis")
609
-
610
- with gr.Row():
611
- with gr.Column():
612
- llm_analysis_type = gr.Radio(
613
- choices=["Response Quality", "Response Comparison", "Factual Accuracy"],
614
- label="Analysis Type",
615
- value="Response Comparison"
616
- )
617
-
618
- llm_model = gr.Dropdown(
619
- choices=["OpenAI GPT-4", "Anthropic Claude", "Local LLM"],
620
- label="Analysis Model",
621
- value="OpenAI GPT-4"
622
- )
623
-
624
- run_llm_analysis_btn = gr.Button("Run LLM Analysis", variant="primary")
625
-
626
- with gr.Column():
627
- llm_analysis_prompt = gr.Textbox(
628
- label="Custom Analysis Instructions (Optional)",
629
- placeholder="Enter any specific instructions for the analysis...",
630
- lines=3
631
- )
632
-
633
- llm_analysis_status = gr.Markdown("*No analysis has been run*")
634
-
635
- llm_analysis_result = gr.Markdown(visible=False)
636
-
637
- # Placeholder function for LLM analysis
638
- def run_llm_analysis(dataset, analysis_type, model, custom_prompt):
639
- if not dataset or "entries" not in dataset or not dataset["entries"]:
640
- return (
641
- gr.update(visible=True, value="❌ **Error:** No dataset loaded. Please create or load a dataset first."),
642
- gr.update(visible=False)
643
- )
644
-
645
- # Placeholder for actual implementation
646
- return (
647
- gr.update(visible=True, value="⏳ **Implementation in progress**\n\nLLM-based analysis will be available in a future update."),
648
- gr.update(visible=False)
649
- )
650
-
651
  # Connect the run button to the analysis function
652
- run_llm_analysis_btn.click(
653
- fn=run_llm_analysis,
654
- inputs=[dataset_state, llm_analysis_type, llm_model, llm_analysis_prompt],
655
- outputs=[llm_analysis_status, llm_analysis_result]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
  )
657
-
658
- # Run analysis with proper parameters
659
- run_analysis_btn.click(
660
- fn=run_analysis,
661
- inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count, bias_methods],
662
- outputs=[
663
- analysis_results_state,
664
- analysis_output,
665
- visualization_area_visible,
666
- analysis_title,
667
- prompt_title,
668
- models_compared,
669
- model1_title,
670
- model1_words,
671
- model2_title,
672
- model2_words,
673
- similarity_metrics_title,
674
- similarity_metrics,
675
- status_message_visible,
676
- status_message
677
- ]
678
- )
679
 
680
  return app
681
 
@@ -683,5 +583,9 @@ if __name__ == "__main__":
683
  # Download required NLTK resources before launching the app
684
  download_nltk_resources()
685
 
 
 
 
 
686
  app = create_app()
687
  app.launch()
 
1
  import gradio as gr
2
  from ui.dataset_input import create_dataset_input, load_example_dataset
3
+ from ui.analysis_screen import create_analysis_screen
4
  from visualization.bow_visualizer import process_and_visualize_analysis
5
  import nltk
6
  import os
7
+ import logging
8
+ import sys
9
+ import traceback
10
 
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13
+ logger = logging.getLogger('gradio_app')
14
+
15
+ # Import the process_analysis_request function
16
+ # Try to use the improved version if available, otherwise use original
17
+ try:
18
+ from improved_analysis_handler import process_analysis_request
19
+ logger.info("Using improved analysis handler")
20
+ except ImportError:
21
+ logger.info("Using original analysis handler")
22
+ from ui.analysis_screen import process_analysis_request
23
+
24
+ # Download necessary NLTK resources
25
  def download_nltk_resources():
26
  """Download required NLTK resources if not already downloaded"""
27
  try:
 
48
  for location in locations:
49
  try:
50
  nltk.data.find(location)
51
+ logger.info(f"Resource {resource} already downloaded")
52
  found = True
53
  break
54
  except LookupError:
55
  continue
56
 
57
  if not found:
58
+ logger.info(f"Downloading {resource}...")
59
  nltk.download(resource, quiet=True)
60
  except Exception as e:
61
+ logger.error(f"Error with resource {resource}: {e}")
62
 
63
+ logger.info("NLTK resources check completed")
64
  except Exception as e:
65
+ logger.error(f"Error downloading NLTK resources: {e}")
66
 
67
  def create_app():
68
  """
69
+ Create a streamlined Gradio app for dataset input and analysis.
70
 
71
  Returns:
72
  gr.Blocks: The Gradio application
 
112
  # Analysis Tab
113
  with gr.Tab("Analysis"):
114
  # Use create_analysis_screen to get UI components including visualization container
115
+ analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count = create_analysis_screen()
116
 
117
  # Pre-create visualization components (initially hidden)
118
  visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
 
137
  status_message = gr.Markdown(visible=False)
138
 
139
  # Define a helper function to extract parameter values and run the analysis
140
+ def run_analysis(dataset, selected_analysis, bow_top, ngram_n, ngram_top, topic_count):
141
  try:
142
  if not dataset or "entries" not in dataset or not dataset["entries"]:
143
  return (
 
161
  "bow_top": bow_top,
162
  "ngram_n": ngram_n,
163
  "ngram_top": ngram_top,
164
+ "topic_count": topic_count
 
165
  }
166
+ logger.info(f"Running analysis with selected type: {selected_analysis}")
167
+ logger.info(f"Parameters: {parameters}")
168
 
169
  # Process the analysis request - passing selected_analysis as a string
170
  analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
 
340
  - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
341
  """
342
 
343
+ # Check for Topic Modeling analysis - IMPROVED HANDLING
344
  elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
345
  visualization_area_visible = True
346
  topic_results = analyses["topic_modeling"]
347
+
348
+ # Check for errors in topic modeling
349
+ if "error" in topic_results:
350
+ return (
351
+ analysis_results,
352
+ False, # Don't show raw JSON
353
+ False, # Don't show visualization area
354
+ gr.update(visible=False),
355
+ gr.update(visible=False),
356
+ gr.update(visible=False),
357
+ gr.update(visible=False),
358
+ gr.update(visible=False),
359
+ gr.update(visible=False),
360
+ gr.update(visible=False),
361
+ gr.update(visible=False),
362
+ gr.update(visible=False),
363
+ True, # Show status message
364
+ gr.update(visible=True, value=f"❌ **Topic modeling error:** {topic_results['error']}")
365
+ )
366
+
367
  models = topic_results.get("models", [])
368
  method = topic_results.get("method", "lda").upper()
369
  n_topics = topic_results.get("n_topics", 3)
 
381
  if topics:
382
  # Format topic info for display
383
  topic_info = []
384
+ for topic in topics[:5]: # Show first 5 topics
385
  topic_id = topic.get("id", 0)
386
  words = topic.get("words", [])[:5] # Top 5 words per topic
387
 
 
412
  dist2 = model_topics[model2_name]
413
 
414
  model2_words_value = f"""
415
+ **{model1_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist1[:5])])}
416
 
417
+ **{model2_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist2[:5])])}
418
  """
419
 
420
  # Add similarity metrics if available
 
426
  metrics = comparisons[comparison_key]
427
  js_div = metrics.get("js_divergence", 0)
428
 
429
+ # Add interpretation
430
+ similarity_text = ""
431
+ if js_div < 0.2:
432
+ similarity_text = "very similar"
433
+ elif js_div < 0.4:
434
+ similarity_text = "somewhat similar"
435
+ elif js_div < 0.6:
436
+ similarity_text = "moderately different"
437
+ else:
438
+ similarity_text = "very different"
439
+
440
  similarity_title_visible = True
441
  similarity_metrics_visible = True
442
  similarity_metrics_value = f"""
443
+ - **Topic Distribution Divergence**: {js_div:.4f}
444
+ - The topic distributions between models are **{similarity_text}**
445
+ - *Lower divergence values indicate more similar topic distributions*
446
  """
447
 
448
  # Check for Classifier analysis
 
495
  f"- **{category}**: {diff}"
496
  for category, diff in differences.items()
497
  ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
 
499
  # If we don't have visualization data from any analysis
500
  if not visualization_area_visible:
 
536
  except Exception as e:
537
  import traceback
538
  error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
539
+ logger.error(error_msg)
540
 
541
  return (
542
  {"error": error_msg}, # analysis_results_state
 
554
  True, # status_message_visible
555
  gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
556
  )
557
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  # Connect the run button to the analysis function
559
+ run_analysis_btn.click(
560
+ fn=run_analysis,
561
+ inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count],
562
+ outputs=[
563
+ analysis_results_state,
564
+ analysis_output,
565
+ visualization_area_visible,
566
+ analysis_title,
567
+ prompt_title,
568
+ models_compared,
569
+ model1_title,
570
+ model1_words,
571
+ model2_title,
572
+ model2_words,
573
+ similarity_metrics_title,
574
+ similarity_metrics,
575
+ status_message_visible,
576
+ status_message
577
+ ]
578
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
 
580
  return app
581
 
 
583
  # Download required NLTK resources before launching the app
584
  download_nltk_resources()
585
 
586
+ logger.info("Starting LLM Response Comparator application")
587
+ logger.info("===== Application Startup =====")
588
+
589
+ # Create and launch the application
590
  app = create_app()
591
  app.launch()
improved_analysis_handler.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import logging
4
+ from visualization.bow_visualizer import process_and_visualize_analysis
5
+ from processors.topic_modeling import compare_topics
6
+ from processors.ngram_analysis import compare_ngrams
7
+ from processors.bow_analysis import compare_bow
8
+ from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
9
+
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
12
+ logger = logging.getLogger('analysis_handler')
13
+
14
+ def process_analysis_request(dataset, selected_analysis, parameters):
15
+ """
16
+ Process the analysis request based on the selected options.
17
+
18
+ Args:
19
+ dataset (dict): The input dataset
20
+ selected_analysis (str): The selected analysis type
21
+ parameters (dict): Additional parameters for the analysis
22
+
23
+ Returns:
24
+ tuple: A tuple containing (analysis_results, visualization_data)
25
+ """
26
+ logger.info(f"Processing analysis request: {selected_analysis}")
27
+
28
+ if not dataset or "entries" not in dataset or not dataset["entries"]:
29
+ logger.warning("No valid dataset provided for analysis")
30
+ return {}, None
31
+
32
+ # Initialize the results structure
33
+ results = {"analyses": {}}
34
+
35
+ # Get the prompt text from the first entry
36
+ prompt_text = dataset["entries"][0].get("prompt", "")
37
+ if not prompt_text:
38
+ logger.warning("No prompt found in dataset")
39
+ return {"error": "No prompt found in dataset"}, None
40
+
41
+ # Initialize the analysis container for this prompt
42
+ results["analyses"][prompt_text] = {}
43
+
44
+ # Get model names and responses
45
+ model1_name = dataset["entries"][0].get("model", "Model 1")
46
+ model2_name = dataset["entries"][1].get("model", "Model 2")
47
+
48
+ model1_response = dataset["entries"][0].get("response", "")
49
+ model2_response = dataset["entries"][1].get("response", "")
50
+
51
+ logger.info(f"Comparing responses from {model1_name} and {model2_name}")
52
+
53
+ try:
54
+ # Process based on the selected analysis type
55
+ if selected_analysis == "Bag of Words":
56
+ # Get the top_n parameter and ensure it's an integer
57
+ top_n = parameters.get("bow_top", 25)
58
+ if isinstance(top_n, str):
59
+ top_n = int(top_n)
60
+
61
+ logger.info(f"Running Bag of Words analysis with top_n={top_n}")
62
+
63
+ # Perform Bag of Words analysis using the processor
64
+ bow_results = compare_bow(
65
+ [model1_response, model2_response],
66
+ [model1_name, model2_name],
67
+ top_n=top_n
68
+ )
69
+ results["analyses"][prompt_text]["bag_of_words"] = bow_results
70
+
71
+ elif selected_analysis == "N-gram Analysis":
72
+ # Perform N-gram analysis
73
+ ngram_size = parameters.get("ngram_n", 2)
74
+ if isinstance(ngram_size, str):
75
+ ngram_size = int(ngram_size)
76
+
77
+ top_n = parameters.get("ngram_top", 15)
78
+ if isinstance(top_n, str):
79
+ top_n = int(top_n)
80
+
81
+ logger.info(f"Running N-gram analysis with n={ngram_size}, top_n={top_n}")
82
+
83
+ # Use the processor from the dedicated ngram_analysis module
84
+ from processors.ngram_analysis import compare_ngrams as ngram_processor
85
+ ngram_results = ngram_processor(
86
+ [model1_response, model2_response],
87
+ [model1_name, model2_name],
88
+ n=ngram_size,
89
+ top_n=top_n
90
+ )
91
+ results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
92
+
93
+ elif selected_analysis == "Topic Modeling":
94
+ # Perform topic modeling analysis
95
+ topic_count = parameters.get("topic_count", 3)
96
+ if isinstance(topic_count, str):
97
+ topic_count = int(topic_count)
98
+
99
+ logger.info(f"Running Topic Modeling analysis with n_topics={topic_count}")
100
+
101
+ try:
102
+ # Import the improved topic modeling module
103
+ try:
104
+ # First try to import from improved module if available
105
+ from improved_topic_modeling import compare_topics as improved_compare_topics
106
+ logger.info("Using improved topic modeling implementation")
107
+ topic_results = improved_compare_topics(
108
+ texts_set_1=[model1_response],
109
+ texts_set_2=[model2_response],
110
+ n_topics=topic_count,
111
+ model_names=[model1_name, model2_name])
112
+ except ImportError:
113
+ # Fall back to original implementation
114
+ logger.info("Using original topic modeling implementation")
115
+ from processors.topic_modeling import compare_topics
116
+ topic_results = compare_topics(
117
+ texts_set_1=[model1_response],
118
+ texts_set_2=[model2_response],
119
+ n_topics=topic_count,
120
+ model_names=[model1_name, model2_name])
121
+
122
+ results["analyses"][prompt_text]["topic_modeling"] = topic_results
123
+
124
+ # Ensure the topic modeling results contain the necessary fields
125
+ if "topics" not in topic_results or not topic_results["topics"]:
126
+ logger.warning("No topics found in topic modeling results")
127
+ topic_results["message"] = "No significant topics were discovered in the text. Try a different analysis method or adjust parameters."
128
+
129
+ if "model_topics" not in topic_results or not topic_results["model_topics"]:
130
+ logger.warning("No model topics found in topic modeling results")
131
+ if "message" not in topic_results:
132
+ topic_results["message"] = "Could not calculate topic distributions for the models."
133
+
134
+ except Exception as e:
135
+ import traceback
136
+ error_msg = f"Topic modeling error: {str(e)}\n{traceback.format_exc()}"
137
+ logger.error(error_msg)
138
+ results["analyses"][prompt_text]["topic_modeling"] = {
139
+ "models": [model1_name, model2_name],
140
+ "error": str(e),
141
+ "message": "Topic modeling failed. Please try with longer text or different parameters.",
142
+ "stack_trace": traceback.format_exc()
143
+ }
144
+
145
+ elif selected_analysis == "Classifier":
146
+ # Perform classifier analysis
147
+ logger.info("Running Classifier analysis")
148
+
149
+ results["analyses"][prompt_text]["classifier"] = {
150
+ "models": [model1_name, model2_name],
151
+ "classifications": {
152
+ model1_name: {
153
+ "formality": classify_formality(model1_response),
154
+ "sentiment": classify_sentiment(model1_response),
155
+ "complexity": classify_complexity(model1_response)
156
+ },
157
+ model2_name: {
158
+ "formality": classify_formality(model2_response),
159
+ "sentiment": classify_sentiment(model2_response),
160
+ "complexity": classify_complexity(model2_response)
161
+ }
162
+ },
163
+ "differences": compare_classifications(model1_response, model2_response)
164
+ }
165
+
166
+ else:
167
+ # Unknown analysis type
168
+ logger.warning(f"Unknown analysis type: {selected_analysis}")
169
+ results["analyses"][prompt_text]["message"] = "Please select a valid analysis type."
170
+
171
+ except Exception as e:
172
+ import traceback
173
+ error_msg = f"Error processing analysis request: {str(e)}\n{traceback.format_exc()}"
174
+ logger.error(error_msg)
175
+ results = {
176
+ "error": str(e),
177
+ "stack_trace": traceback.format_exc(),
178
+ "analyses": {
179
+ prompt_text: {
180
+ "message": f"Analysis failed: {str(e)}"
181
+ }
182
+ }
183
+ }
184
+
185
+ # Return both the analysis results and a placeholder for visualization data
186
+ return results, None
processors/topic_modeling.py CHANGED
@@ -1,5 +1,6 @@
1
  """
2
- Topic modeling processor for comparing text responses
 
3
  """
4
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
5
  from sklearn.decomposition import LatentDirichletAllocation, NMF
@@ -7,6 +8,12 @@ import numpy as np
7
  import nltk
8
  from nltk.corpus import stopwords
9
  import re
 
 
 
 
 
 
10
 
11
  def preprocess_text(text):
12
  """
@@ -18,20 +25,25 @@ def preprocess_text(text):
18
  Returns:
19
  str: Preprocessed text
20
  """
21
- # Convert to lowercase
22
- text = text.lower()
23
-
24
- # Remove special characters and digits
25
- text = re.sub(r'[^a-zA-Z\s]', '', text)
26
-
27
- # Tokenize
28
- tokens = nltk.word_tokenize(text)
29
-
30
- # Remove stopwords
31
- stop_words = set(stopwords.words('english'))
32
- tokens = [token for token in tokens if token not in stop_words and len(token) > 3]
33
-
34
- return ' '.join(tokens)
 
 
 
 
 
35
 
36
  def get_top_words_per_topic(model, feature_names, n_top_words=10):
37
  """
@@ -70,6 +82,14 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
70
  Returns:
71
  dict: Topic modeling results with topics and document-topic distributions
72
  """
 
 
 
 
 
 
 
 
73
  result = {
74
  "method": method,
75
  "n_topics": n_topics,
@@ -77,45 +97,102 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
77
  "document_topics": []
78
  }
79
 
80
- # Preprocess texts
81
- preprocessed_texts = [preprocess_text(text) for text in texts]
82
-
83
- # Create document-term matrix
84
- if method == "nmf":
85
- # For NMF, use TF-IDF vectorization
86
- # Adjust min_df and max_df for small document sets
87
- vectorizer = TfidfVectorizer(max_features=1000, min_df=1, max_df=1.0)
88
- else:
89
- # For LDA, use CountVectorizer
90
- # Adjust min_df and max_df for small document sets
91
- vectorizer = CountVectorizer(max_features=1000, min_df=1, max_df=1.0)
92
-
93
- X = vectorizer.fit_transform(preprocessed_texts)
94
- feature_names = vectorizer.get_feature_names_out()
95
-
96
- # Apply topic modeling
97
- if method == "nmf":
98
- # Non-negative Matrix Factorization
99
- model = NMF(n_components=n_topics, random_state=42, max_iter=1000)
100
- else:
101
- # Latent Dirichlet Allocation
102
- model = LatentDirichletAllocation(n_components=n_topics, random_state=42, max_iter=20)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- topic_distribution = model.fit_transform(X)
 
 
 
 
105
 
106
- # Get top words for each topic
107
- result["topics"] = get_top_words_per_topic(model, feature_names, n_top_words)
 
 
 
 
 
 
 
 
108
 
109
- # Get topic distribution for each document
110
- for i, dist in enumerate(topic_distribution):
111
- # Normalize for easier comparison
112
- normalized_dist = dist / np.sum(dist) if np.sum(dist) > 0 else dist
113
- result["document_topics"].append({
114
- "document_id": i,
115
- "distribution": normalized_dist.tolist()
116
- })
117
 
118
- return result
 
 
119
 
120
  def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
121
  """
@@ -132,50 +209,98 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
132
  Returns:
133
  dict: Comparison results with topics from both sets and similarity metrics
134
  """
 
 
135
  # Set default model names if not provided
136
  if model_names is None:
137
  model_names = ["Model 1", "Model 2"]
138
 
139
- # Extract topics for each set
140
- topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
141
- topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
142
-
143
- # Calculate similarity between topics
144
- similarity_matrix = []
145
- for topic1 in topics_set_1["topics"]:
146
- topic_similarities = []
147
- words1 = set(topic1["words"])
148
- for topic2 in topics_set_2["topics"]:
149
- words2 = set(topic2["words"])
150
- # Jaccard similarity: intersection over union
151
- intersection = len(words1.intersection(words2))
152
- union = len(words1.union(words2))
153
- similarity = intersection / union if union > 0 else 0
154
- topic_similarities.append(similarity)
155
- similarity_matrix.append(topic_similarities)
156
-
157
- # Find the best matching topic pairs
158
- matched_topics = []
159
- for i, similarities in enumerate(similarity_matrix):
160
- best_match_idx = np.argmax(similarities)
161
- matched_topics.append({
162
- "set1_topic_id": i,
163
- "set1_topic_words": topics_set_1["topics"][i]["words"],
164
- "set2_topic_id": best_match_idx,
165
- "set2_topic_words": topics_set_2["topics"][best_match_idx]["words"],
166
- "similarity": similarities[best_match_idx]
167
- })
168
-
169
- # Construct result
170
  result = {
171
  "method": method,
172
  "n_topics": n_topics,
173
- "set1_topics": topics_set_1["topics"],
174
- "set2_topics": topics_set_2["topics"],
175
- "similarity_matrix": similarity_matrix,
176
- "matched_topics": matched_topics,
177
- "average_similarity": np.mean([match["similarity"] for match in matched_topics]),
178
- "models": model_names # Add model names to result
179
  }
180
 
181
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Enhanced topic modeling processor for comparing text responses with better error handling
3
+ and more robust algorithm configuration
4
  """
5
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
6
  from sklearn.decomposition import LatentDirichletAllocation, NMF
 
8
  import nltk
9
  from nltk.corpus import stopwords
10
  import re
11
+ from scipy.spatial import distance
12
+ import logging
13
+
14
+ # Set up logging
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
16
+ logger = logging.getLogger('topic_modeling')
17
 
18
  def preprocess_text(text):
19
  """
 
25
  Returns:
26
  str: Preprocessed text
27
  """
28
+ try:
29
+ # Convert to lowercase
30
+ text = text.lower()
31
+
32
+ # Remove special characters and digits
33
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
34
+
35
+ # Tokenize
36
+ tokens = nltk.word_tokenize(text)
37
+
38
+ # Remove stopwords
39
+ stop_words = set(stopwords.words('english'))
40
+ tokens = [token for token in tokens if token not in stop_words and len(token) > 3]
41
+
42
+ return ' '.join(tokens)
43
+ except Exception as e:
44
+ logger.error(f"Error in preprocess_text: {str(e)}")
45
+ # Return original text if preprocessing fails
46
+ return text
47
 
48
  def get_top_words_per_topic(model, feature_names, n_top_words=10):
49
  """
 
82
  Returns:
83
  dict: Topic modeling results with topics and document-topic distributions
84
  """
85
+ if isinstance(n_topics, str):
86
+ n_topics = int(n_topics)
87
+
88
+ # Ensure n_topics is at least 2
89
+ n_topics = max(2, n_topics)
90
+
91
+ logger.info(f"Starting topic modeling with method={method}, n_topics={n_topics}")
92
+
93
  result = {
94
  "method": method,
95
  "n_topics": n_topics,
 
97
  "document_topics": []
98
  }
99
 
100
+ try:
101
+ # Preprocess texts
102
+ logger.info("Preprocessing texts")
103
+ preprocessed_texts = [preprocess_text(text) for text in texts]
104
+
105
+ # Check if texts are not empty after preprocessing
106
+ preprocessed_texts = [text for text in preprocessed_texts if len(text.strip()) > 0]
107
+ if not preprocessed_texts:
108
+ logger.warning("All texts are empty after preprocessing")
109
+ return result
110
+
111
+ # Create document-term matrix
112
+ logger.info(f"Creating document-term matrix using {method}")
113
+ if method == "nmf":
114
+ # For NMF, use TF-IDF vectorization
115
+ vectorizer = TfidfVectorizer(max_features=1000, min_df=1, max_df=0.95, stop_words='english')
116
+ else:
117
+ # For LDA, use CountVectorizer
118
+ vectorizer = CountVectorizer(max_features=1000, min_df=1, max_df=0.95, stop_words='english')
119
+
120
+ try:
121
+ X = vectorizer.fit_transform(preprocessed_texts)
122
+ feature_names = vectorizer.get_feature_names_out()
123
+
124
+ # Check if we have enough features
125
+ if X.shape[1] < n_topics:
126
+ logger.warning(f"Only {X.shape[1]} features found, reducing n_topics from {n_topics}")
127
+ n_topics = max(2, X.shape[1] - 1)
128
+ result["n_topics"] = n_topics
129
+
130
+ # Apply topic modeling
131
+ logger.info(f"Applying {method.upper()} with {n_topics} topics")
132
+ if method == "nmf":
133
+ # Non-negative Matrix Factorization
134
+ model = NMF(n_components=n_topics, random_state=42, max_iter=1000)
135
+ else:
136
+ # Latent Dirichlet Allocation
137
+ model = LatentDirichletAllocation(
138
+ n_components=n_topics,
139
+ random_state=42,
140
+ max_iter=20,
141
+ learning_method='online'
142
+ )
143
+
144
+ topic_distribution = model.fit_transform(X)
145
+
146
+ # Get top words for each topic
147
+ logger.info("Extracting top words for each topic")
148
+ result["topics"] = get_top_words_per_topic(model, feature_names, n_top_words)
149
+
150
+ # Get topic distribution for each document
151
+ logger.info("Calculating topic distributions for documents")
152
+ for i, dist in enumerate(topic_distribution):
153
+ # Normalize for easier comparison
154
+ normalized_dist = dist / np.sum(dist) if np.sum(dist) > 0 else dist
155
+ result["document_topics"].append({
156
+ "document_id": i,
157
+ "distribution": normalized_dist.tolist()
158
+ })
159
+
160
+ logger.info("Topic modeling completed successfully")
161
+
162
+ except Exception as e:
163
+ logger.error(f"Error in vectorization or modeling: {str(e)}")
164
+ result["error"] = f"Topic modeling failed: {str(e)}"
165
+
166
+ except Exception as e:
167
+ logger.error(f"General error in extract_topics: {str(e)}")
168
+ result["error"] = f"Topic modeling failed: {str(e)}"
169
 
170
+ return result
171
+
172
+ def calculate_jensen_shannon_divergence(p, q):
173
+ """
174
+ Calculate Jensen-Shannon divergence between two probability distributions
175
 
176
+ Args:
177
+ p (array): First probability distribution
178
+ q (array): Second probability distribution
179
+
180
+ Returns:
181
+ float: Jensen-Shannon divergence
182
+ """
183
+ # Ensure inputs are numpy arrays
184
+ p = np.array(p)
185
+ q = np.array(q)
186
 
187
+ # Normalize if not already normalized
188
+ if np.sum(p) != 1.0:
189
+ p = p / np.sum(p) if np.sum(p) > 0 else p
190
+ if np.sum(q) != 1.0:
191
+ q = q / np.sum(q) if np.sum(q) > 0 else q
 
 
 
192
 
193
+ # Calculate Jensen-Shannon divergence
194
+ m = 0.5 * (p + q)
195
+ return 0.5 * (distance.jensenshannon(p, m) + distance.jensenshannon(q, m))
196
 
197
  def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
198
  """
 
209
  Returns:
210
  dict: Comparison results with topics from both sets and similarity metrics
211
  """
212
+ logger.info(f"Starting topic comparison with n_topics={n_topics}, method={method}")
213
+
214
  # Set default model names if not provided
215
  if model_names is None:
216
  model_names = ["Model 1", "Model 2"]
217
 
218
+ # Initialize the result structure
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  result = {
220
  "method": method,
221
  "n_topics": n_topics,
222
+ "models": model_names,
223
+ "model_topics": {},
224
+ "topics": [],
225
+ "comparisons": {}
 
 
226
  }
227
 
228
+ try:
229
+ # Extract topics for each set separately
230
+ # For very short texts, try combining all texts from each model
231
+ combined_text_1 = " ".join(texts_set_1)
232
+ combined_text_2 = " ".join(texts_set_2)
233
+
234
+ # Process all texts together to find common topics
235
+ all_texts = texts_set_1 + texts_set_2
236
+ logger.info(f"Processing {len(all_texts)} total texts")
237
+
238
+ # Extract topics from combined corpus
239
+ combined_result = extract_topics(all_texts, n_topics, n_top_words, method)
240
+
241
+ # Check for errors
242
+ if "error" in combined_result:
243
+ logger.warning(f"Error in combined topic extraction: {combined_result['error']}")
244
+ result["error"] = combined_result["error"]
245
+ return result
246
+
247
+ # Store topics from combined analysis
248
+ result["topics"] = combined_result["topics"]
249
+
250
+ # Now process each text set to get their topic distributions
251
+ model1_doc_topics = []
252
+ model2_doc_topics = []
253
+
254
+ # Try to use the same model from combined analysis for consistency
255
+ if "document_topics" in combined_result and len(combined_result["document_topics"]) == len(all_texts):
256
+ # Get document topics for each model
257
+ n_docs_model1 = len(texts_set_1)
258
+ for i, doc_topic in enumerate(combined_result["document_topics"]):
259
+ if i < n_docs_model1:
260
+ model1_doc_topics.append(doc_topic["distribution"])
261
+ else:
262
+ model2_doc_topics.append(doc_topic["distribution"])
263
+ else:
264
+ # Fallback: run separate topic modeling for each model
265
+ logger.info("Using separate topic modeling for each model")
266
+ model1_result = extract_topics([combined_text_1], n_topics, n_top_words, method)
267
+ model2_result = extract_topics([combined_text_2], n_topics, n_top_words, method)
268
+
269
+ if "document_topics" in model1_result and model1_result["document_topics"]:
270
+ model1_doc_topics = [doc["distribution"] for doc in model1_result["document_topics"]]
271
+
272
+ if "document_topics" in model2_result and model2_result["document_topics"]:
273
+ model2_doc_topics = [doc["distribution"] for doc in model2_result["document_topics"]]
274
+
275
+ # Calculate average topic distribution for each model
276
+ if model1_doc_topics:
277
+ model1_avg_distribution = np.mean(model1_doc_topics, axis=0).tolist()
278
+ result["model_topics"][model_names[0]] = model1_avg_distribution
279
+
280
+ if model2_doc_topics:
281
+ model2_avg_distribution = np.mean(model2_doc_topics, axis=0).tolist()
282
+ result["model_topics"][model_names[1]] = model2_avg_distribution
283
+
284
+ # Calculate similarity between models' topic distributions
285
+ if model_names[0] in result["model_topics"] and model_names[1] in result["model_topics"]:
286
+ comparison_key = f"{model_names[0]} vs {model_names[1]}"
287
+ dist1 = result["model_topics"][model_names[0]]
288
+ dist2 = result["model_topics"][model_names[1]]
289
+
290
+ # Calculate Jensen-Shannon divergence (smaller means more similar)
291
+ js_div = calculate_jensen_shannon_divergence(dist1, dist2)
292
+
293
+ # Create comparison result
294
+ result["comparisons"][comparison_key] = {
295
+ "js_divergence": js_div
296
+ }
297
+
298
+ logger.info(f"Topic comparison completed successfully. JS divergence: {js_div:.4f}")
299
+ else:
300
+ logger.warning("Could not calculate model comparisons due to missing topic distributions")
301
+
302
+ except Exception as e:
303
+ logger.error(f"Error in compare_topics: {str(e)}")
304
+ result["error"] = f"Topic comparison failed: {str(e)}"
305
+
306
+ return result
visualization/topic_visualizer.py CHANGED
@@ -1,7 +1,6 @@
1
  """
2
- Visualization for topic modeling analysis results
3
  """
4
- from visualization.ngram_visualizer import create_ngram_visualization
5
  import gradio as gr
6
  import json
7
  import numpy as np
@@ -9,6 +8,11 @@ import pandas as pd
9
  import plotly.express as px
10
  import plotly.graph_objects as go
11
  from plotly.subplots import make_subplots
 
 
 
 
 
12
 
13
  def create_topic_visualization(analysis_results):
14
  """
@@ -25,124 +29,137 @@ def create_topic_visualization(analysis_results):
25
 
26
  # Check if we have valid results
27
  if not analysis_results or "analyses" not in analysis_results:
 
28
  return [gr.Markdown("No analysis results found.")]
29
 
30
- # Process each prompt
31
- for prompt, analyses in analysis_results["analyses"].items():
32
- # Process Topic Modeling analysis if available
33
- if "topic_modeling" in analyses:
34
- topic_results = analyses["topic_modeling"]
35
-
36
- # Show method and number of topics
37
- method = topic_results.get("method", "lda").upper()
38
- n_topics = topic_results.get("n_topics", 3)
39
- output_components.append(gr.Markdown(f"## Topic Modeling Analysis ({method}, {n_topics} topics)"))
40
-
41
- # Show models being compared
42
- models = topic_results.get("models", [])
43
- if len(models) >= 2:
44
- output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}"))
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Visualize topics
47
  topics = topic_results.get("topics", [])
48
  if topics:
49
- output_components.append(gr.Markdown("### Discovered Topics"))
50
 
51
- for topic in topics:
52
- topic_id = topic.get("id", 0)
 
53
  words = topic.get("words", [])
54
- weights = topic.get("weights", [])
55
-
56
- # Create topic word bar chart
57
- if words and weights and len(words) == len(weights):
58
- # Create dataframe for plotting
59
- df = pd.DataFrame({
60
- 'word': words,
61
- 'weight': weights
62
- })
63
-
64
- # Sort by weight
65
- df = df.sort_values('weight', ascending=False)
66
-
67
- # Create bar chart
68
- fig = px.bar(
69
- df, x='word', y='weight',
70
- title=f"Topic {topic_id+1} Top Words",
71
- labels={'word': 'Word', 'weight': 'Weight'},
72
- height=300
73
- )
74
-
75
- output_components.append(gr.Plot(value=fig))
76
 
77
  # Visualize topic distributions for each model
78
  model_topics = topic_results.get("model_topics", {})
79
  if model_topics and all(model in model_topics for model in models):
80
- output_components.append(gr.Markdown("### Topic Distribution by Model"))
81
 
82
- # Create multi-model topic distribution comparison
83
- fig = go.Figure()
84
  for model in models:
85
  if model in model_topics:
86
- distribution = model_topics[model]
87
- fig.add_trace(go.Bar(
88
- x=[f"Topic {i+1}" for i in range(len(distribution))],
89
- y=distribution,
90
- name=model
91
- ))
92
-
93
- fig.update_layout(
94
- title="Topic Distributions Comparison",
95
- xaxis_title="Topic",
96
- yaxis_title="Weight",
97
- barmode='group',
98
- height=400
99
- )
100
-
101
- output_components.append(gr.Plot(value=fig))
102
-
103
- # Visualize topic differences
104
- comparisons = topic_results.get("comparisons", {})
105
- if comparisons:
106
- output_components.append(gr.Markdown("### Topic Distribution Differences"))
107
 
108
- for comparison_key, comparison_data in comparisons.items():
109
- js_divergence = comparison_data.get("js_divergence", 0)
110
- topic_differences = comparison_data.get("topic_differences", [])
 
 
 
 
 
 
 
 
 
 
111
 
112
- output_components.append(gr.Markdown(
113
- f"**{comparison_key}** - Jensen-Shannon Divergence: {js_divergence:.4f}"
114
- ))
115
-
116
- if topic_differences:
117
- # Create DataFrame for plotting
118
- model1, model2 = comparison_key.split(" vs ")
119
- df_diff = pd.DataFrame(topic_differences)
120
 
121
- # Create bar chart for topic differences
122
- fig = go.Figure()
123
- fig.add_trace(go.Bar(
124
- x=[f"Topic {d['topic_id']+1}" for d in topic_differences],
125
- y=[d["model1_weight"] for d in topic_differences],
126
- name=model1
127
- ))
128
- fig.add_trace(go.Bar(
129
- x=[f"Topic {d['topic_id']+1}" for d in topic_differences],
130
- y=[d["model2_weight"] for d in topic_differences],
131
- name=model2
132
- ))
133
 
134
  fig.update_layout(
135
- title="Topic Weight Comparison",
136
  xaxis_title="Topic",
137
  yaxis_title="Weight",
138
- barmode='group',
139
- height=400
140
  )
141
 
142
  output_components.append(gr.Plot(value=fig))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  # If no components were added, show a message
145
- if len(output_components) <= 1:
146
  output_components.append(gr.Markdown("No detailed Topic Modeling analysis found in results."))
147
 
148
  return output_components
@@ -159,10 +176,23 @@ def process_and_visualize_topic_analysis(analysis_results):
159
  list: List of gradio components for visualization
160
  """
161
  try:
162
- print(f"Starting visualization of topic modeling analysis results")
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  return create_topic_visualization(analysis_results)
164
  except Exception as e:
165
  import traceback
166
  error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
167
- print(error_msg)
168
  return [gr.Markdown(f"**Error during topic modeling visualization:**\n\n```\n{error_msg}\n```")]
 
1
  """
2
+ Improved visualization for topic modeling analysis results
3
  """
 
4
  import gradio as gr
5
  import json
6
  import numpy as np
 
8
  import plotly.express as px
9
  import plotly.graph_objects as go
10
  from plotly.subplots import make_subplots
11
+ import logging
12
+
13
+ # Set up logging
14
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
15
+ logger = logging.getLogger('topic_visualizer')
16
 
17
  def create_topic_visualization(analysis_results):
18
  """
 
29
 
30
  # Check if we have valid results
31
  if not analysis_results or "analyses" not in analysis_results:
32
+ logger.warning("No valid analysis results found")
33
  return [gr.Markdown("No analysis results found.")]
34
 
35
+ try:
36
+ # Process each prompt
37
+ for prompt, analyses in analysis_results["analyses"].items():
38
+ # Process Topic Modeling analysis if available
39
+ if "topic_modeling" in analyses:
40
+ topic_results = analyses["topic_modeling"]
41
+
42
+ # Check for errors in the analysis
43
+ if "error" in topic_results:
44
+ error_msg = topic_results.get("error", "Unknown error in topic modeling")
45
+ logger.warning(f"Topic modeling error: {error_msg}")
46
+ output_components.append(gr.Markdown(f"**Error in topic modeling analysis:** {error_msg}"))
47
+ continue
48
+
49
+ # Show method and number of topics
50
+ method = topic_results.get("method", "lda").upper()
51
+ n_topics = topic_results.get("n_topics", 3)
52
+ logger.info(f"Creating visualization for {method} with {n_topics} topics")
53
+
54
+ # Get models being compared
55
+ models = topic_results.get("models", [])
56
+ if not models or len(models) < 2:
57
+ logger.warning("Not enough models found in results")
58
+ output_components.append(gr.Markdown("Topic modeling requires at least two models to compare."))
59
+ continue
60
+
61
+ output_components.append(gr.Markdown(f"### Topic Modeling Analysis ({method}, {n_topics} topics)"))
62
+ output_components.append(gr.Markdown(f"Comparing responses from **{models[0]}** and **{models[1]}**"))
63
 
64
+ # Visualize discovered topics
65
  topics = topic_results.get("topics", [])
66
  if topics:
67
+ output_components.append(gr.Markdown("#### Discovered Topics"))
68
 
69
+ # Display topics in a more readable format
70
+ for i, topic in enumerate(topics):
71
+ topic_id = topic.get("id", i)
72
  words = topic.get("words", [])
73
+ if words:
74
+ topic_words = ", ".join(words[:5]) # Show top 5 words
75
+ output_components.append(gr.Markdown(f"**Topic {topic_id+1}**: {topic_words}"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  # Visualize topic distributions for each model
78
  model_topics = topic_results.get("model_topics", {})
79
  if model_topics and all(model in model_topics for model in models):
80
+ output_components.append(gr.Markdown("#### Topic Distribution by Model"))
81
 
82
+ # Display topic distributions in a readable format
 
83
  for model in models:
84
  if model in model_topics:
85
+ dist = model_topics[model]
86
+ # Format the distribution
87
+ dist_str = ", ".join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist[:n_topics])])
88
+ output_components.append(gr.Markdown(f"**{model}**: {dist_str}"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ # Create multi-model topic distribution visualization
91
+ try:
92
+ # Prepare data for visualization
93
+ model_data = []
94
+ for model in models:
95
+ if model in model_topics:
96
+ dist = model_topics[model]
97
+ for i, weight in enumerate(dist[:n_topics]):
98
+ model_data.append({
99
+ "Model": model,
100
+ "Topic": f"Topic {i+1}",
101
+ "Weight": weight
102
+ })
103
 
104
+ if model_data:
105
+ df = pd.DataFrame(model_data)
 
 
 
 
 
 
106
 
107
+ # Create grouped bar chart
108
+ fig = px.bar(
109
+ df,
110
+ x="Topic",
111
+ y="Weight",
112
+ color="Model",
113
+ title="Topic Distribution Comparison",
114
+ barmode="group",
115
+ height=400
116
+ )
 
 
117
 
118
  fig.update_layout(
 
119
  xaxis_title="Topic",
120
  yaxis_title="Weight",
121
+ legend_title="Model"
 
122
  )
123
 
124
  output_components.append(gr.Plot(value=fig))
125
+ except Exception as e:
126
+ logger.error(f"Error creating topic distribution plot: {str(e)}")
127
+ output_components.append(gr.Markdown(f"*Error creating visualization: {str(e)}*"))
128
+
129
+ # Display similarity metrics
130
+ comparisons = topic_results.get("comparisons", {})
131
+ if comparisons:
132
+ output_components.append(gr.Markdown("#### Similarity Metrics"))
133
+
134
+ for comparison_key, comparison_data in comparisons.items():
135
+ js_div = comparison_data.get("js_divergence", 0)
136
+
137
+ # Jensen-Shannon divergence interpretation
138
+ similarity_text = ""
139
+ if js_div < 0.2:
140
+ similarity_text = "very similar"
141
+ elif js_div < 0.4:
142
+ similarity_text = "somewhat similar"
143
+ elif js_div < 0.6:
144
+ similarity_text = "moderately different"
145
+ else:
146
+ similarity_text = "very different"
147
+
148
+ output_components.append(gr.Markdown(
149
+ f"**Topic Distribution Divergence**: {js_div:.4f} - Topic distributions are {similarity_text}"
150
+ ))
151
+
152
+ # Explain what the metric means
153
+ output_components.append(gr.Markdown(
154
+ "*Lower divergence values indicate more similar topic distributions between models*"
155
+ ))
156
+
157
+ except Exception as e:
158
+ logger.error(f"Error in create_topic_visualization: {str(e)}")
159
+ output_components.append(gr.Markdown(f"**Error creating topic visualization:** {str(e)}"))
160
 
161
  # If no components were added, show a message
162
+ if len(output_components) == 0:
163
  output_components.append(gr.Markdown("No detailed Topic Modeling analysis found in results."))
164
 
165
  return output_components
 
176
  list: List of gradio components for visualization
177
  """
178
  try:
179
+ logger.info(f"Starting visualization of topic modeling analysis results")
180
+ # Debug output - print the structure of analysis_results
181
+ if "analyses" in analysis_results:
182
+ for prompt, analyses in analysis_results["analyses"].items():
183
+ if "topic_modeling" in analyses:
184
+ topic_results = analyses["topic_modeling"]
185
+ logger.info(f"Found topic_modeling results with keys: {topic_results.keys()}")
186
+ if "models" in topic_results:
187
+ logger.info(f"Models: {topic_results['models']}")
188
+ if "topics" in topic_results:
189
+ logger.info(f"Found {len(topic_results['topics'])} topics")
190
+ if "model_topics" in topic_results:
191
+ logger.info(f"Model_topics keys: {topic_results['model_topics'].keys()}")
192
+
193
  return create_topic_visualization(analysis_results)
194
  except Exception as e:
195
  import traceback
196
  error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
197
+ logger.error(error_msg)
198
  return [gr.Markdown(f"**Error during topic modeling visualization:**\n\n```\n{error_msg}\n```")]