Spaces:
Sleeping
Sleeping
Ryan
commited on
Commit
·
f533950
1
Parent(s):
80636f0
update
Browse files- .DS_Store +0 -0
- app.py +92 -188
- improved_analysis_handler.py +186 -0
- processors/topic_modeling.py +213 -88
- visualization/topic_visualizer.py +126 -96
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
app.py
CHANGED
@@ -1,12 +1,27 @@
|
|
1 |
import gradio as gr
|
2 |
from ui.dataset_input import create_dataset_input, load_example_dataset
|
3 |
-
from ui.analysis_screen import create_analysis_screen
|
4 |
from visualization.bow_visualizer import process_and_visualize_analysis
|
5 |
import nltk
|
6 |
import os
|
7 |
-
import
|
|
|
|
|
8 |
|
9 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def download_nltk_resources():
|
11 |
"""Download required NLTK resources if not already downloaded"""
|
12 |
try:
|
@@ -33,25 +48,25 @@ def download_nltk_resources():
|
|
33 |
for location in locations:
|
34 |
try:
|
35 |
nltk.data.find(location)
|
36 |
-
|
37 |
found = True
|
38 |
break
|
39 |
except LookupError:
|
40 |
continue
|
41 |
|
42 |
if not found:
|
43 |
-
|
44 |
nltk.download(resource, quiet=True)
|
45 |
except Exception as e:
|
46 |
-
|
47 |
|
48 |
-
|
49 |
except Exception as e:
|
50 |
-
|
51 |
|
52 |
def create_app():
|
53 |
"""
|
54 |
-
Create a streamlined Gradio app for dataset input and
|
55 |
|
56 |
Returns:
|
57 |
gr.Blocks: The Gradio application
|
@@ -97,7 +112,7 @@ def create_app():
|
|
97 |
# Analysis Tab
|
98 |
with gr.Tab("Analysis"):
|
99 |
# Use create_analysis_screen to get UI components including visualization container
|
100 |
-
analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count
|
101 |
|
102 |
# Pre-create visualization components (initially hidden)
|
103 |
visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
|
@@ -122,7 +137,7 @@ def create_app():
|
|
122 |
status_message = gr.Markdown(visible=False)
|
123 |
|
124 |
# Define a helper function to extract parameter values and run the analysis
|
125 |
-
def run_analysis(dataset, selected_analysis, bow_top, ngram_n, ngram_top, topic_count
|
126 |
try:
|
127 |
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
128 |
return (
|
@@ -146,11 +161,10 @@ def create_app():
|
|
146 |
"bow_top": bow_top,
|
147 |
"ngram_n": ngram_n,
|
148 |
"ngram_top": ngram_top,
|
149 |
-
"topic_count": topic_count
|
150 |
-
"bias_methods": bias_methods # Add this parameter
|
151 |
}
|
152 |
-
|
153 |
-
|
154 |
|
155 |
# Process the analysis request - passing selected_analysis as a string
|
156 |
analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
|
@@ -326,10 +340,30 @@ def create_app():
|
|
326 |
- **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
|
327 |
"""
|
328 |
|
329 |
-
# Check for Topic Modeling analysis
|
330 |
elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
|
331 |
visualization_area_visible = True
|
332 |
topic_results = analyses["topic_modeling"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
models = topic_results.get("models", [])
|
334 |
method = topic_results.get("method", "lda").upper()
|
335 |
n_topics = topic_results.get("n_topics", 3)
|
@@ -347,7 +381,7 @@ def create_app():
|
|
347 |
if topics:
|
348 |
# Format topic info for display
|
349 |
topic_info = []
|
350 |
-
for topic in topics[:
|
351 |
topic_id = topic.get("id", 0)
|
352 |
words = topic.get("words", [])[:5] # Top 5 words per topic
|
353 |
|
@@ -378,9 +412,9 @@ def create_app():
|
|
378 |
dist2 = model_topics[model2_name]
|
379 |
|
380 |
model2_words_value = f"""
|
381 |
-
**{model1_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist1[:
|
382 |
|
383 |
-
**{model2_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist2[:
|
384 |
"""
|
385 |
|
386 |
# Add similarity metrics if available
|
@@ -392,10 +426,23 @@ def create_app():
|
|
392 |
metrics = comparisons[comparison_key]
|
393 |
js_div = metrics.get("js_divergence", 0)
|
394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
similarity_title_visible = True
|
396 |
similarity_metrics_visible = True
|
397 |
similarity_metrics_value = f"""
|
398 |
-
- **Topic Distribution Divergence**: {js_div:.4f}
|
|
|
|
|
399 |
"""
|
400 |
|
401 |
# Check for Classifier analysis
|
@@ -448,101 +495,6 @@ def create_app():
|
|
448 |
f"- **{category}**: {diff}"
|
449 |
for category, diff in differences.items()
|
450 |
])
|
451 |
-
|
452 |
-
# Check for Bias Detection analysis
|
453 |
-
elif selected_analysis == "Bias Detection" and "bias_detection" in analyses:
|
454 |
-
visualization_area_visible = True
|
455 |
-
bias_results = analyses["bias_detection"]
|
456 |
-
models = bias_results.get("models", [])
|
457 |
-
|
458 |
-
if len(models) >= 2:
|
459 |
-
prompt_title_visible = True
|
460 |
-
prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
|
461 |
-
|
462 |
-
models_compared_visible = True
|
463 |
-
models_compared_value = f"### Bias Analysis: Comparing responses from {models[0]} and {models[1]}"
|
464 |
-
|
465 |
-
# Display comparative bias results
|
466 |
-
model1_name = models[0]
|
467 |
-
model2_name = models[1]
|
468 |
-
|
469 |
-
if "comparative" in bias_results:
|
470 |
-
comparative = bias_results["comparative"]
|
471 |
-
|
472 |
-
# Format summary for display
|
473 |
-
model1_title_visible = True
|
474 |
-
model1_title_value = "#### Bias Detection Summary"
|
475 |
-
model1_words_visible = True
|
476 |
-
|
477 |
-
summary_parts = []
|
478 |
-
|
479 |
-
# Add sentiment comparison
|
480 |
-
if "sentiment" in comparative:
|
481 |
-
sent = comparative["sentiment"]
|
482 |
-
is_significant = sent.get("significant", False)
|
483 |
-
summary_parts.append(
|
484 |
-
f"**Sentiment Bias**: {model1_name} shows {sent.get(model1_name, 'N/A')} sentiment, " +
|
485 |
-
f"while {model2_name} shows {sent.get(model2_name, 'N/A')} sentiment. " +
|
486 |
-
f"({'Significant' if is_significant else 'Minor'} difference)"
|
487 |
-
)
|
488 |
-
|
489 |
-
# Add partisan comparison
|
490 |
-
if "partisan" in comparative:
|
491 |
-
part = comparative["partisan"]
|
492 |
-
is_significant = part.get("significant", False)
|
493 |
-
summary_parts.append(
|
494 |
-
f"**Partisan Leaning**: {model1_name} appears {part.get(model1_name, 'N/A')}, " +
|
495 |
-
f"while {model2_name} appears {part.get(model2_name, 'N/A')}. " +
|
496 |
-
f"({'Significant' if is_significant else 'Minor'} difference)"
|
497 |
-
)
|
498 |
-
|
499 |
-
# Add framing comparison
|
500 |
-
if "framing" in comparative:
|
501 |
-
frame = comparative["framing"]
|
502 |
-
different_frames = frame.get("different_frames", False)
|
503 |
-
m1_frame = frame.get(model1_name, "N/A").replace('_', ' ').title()
|
504 |
-
m2_frame = frame.get(model2_name, "N/A").replace('_', ' ').title()
|
505 |
-
summary_parts.append(
|
506 |
-
f"**Issue Framing**: {model1_name} primarily frames issues in {m1_frame} terms, " +
|
507 |
-
f"while {model2_name} uses {m2_frame} framing. " +
|
508 |
-
f"({'Different' if different_frames else 'Similar'} approaches)"
|
509 |
-
)
|
510 |
-
|
511 |
-
# Add overall assessment
|
512 |
-
if "overall" in comparative:
|
513 |
-
overall = comparative["overall"]
|
514 |
-
significant = overall.get("significant_bias_difference", False)
|
515 |
-
summary_parts.append(
|
516 |
-
f"**Overall Assessment**: " +
|
517 |
-
f"Analysis shows a {overall.get('difference', 0):.2f}/1.0 difference in bias patterns. " +
|
518 |
-
f"({'Significant' if significant else 'Minor'} overall bias difference)"
|
519 |
-
)
|
520 |
-
|
521 |
-
# Combine all parts
|
522 |
-
model1_words_value = "\n\n".join(summary_parts)
|
523 |
-
|
524 |
-
# Format detailed term analysis
|
525 |
-
if (model1_name in bias_results and "partisan" in bias_results[model1_name] and
|
526 |
-
model2_name in bias_results and "partisan" in bias_results[model2_name]):
|
527 |
-
|
528 |
-
model2_title_visible = True
|
529 |
-
model2_title_value = "#### Partisan Term Analysis"
|
530 |
-
model2_words_visible = True
|
531 |
-
|
532 |
-
m1_lib = bias_results[model1_name]["partisan"].get("liberal_terms", [])
|
533 |
-
m1_con = bias_results[model1_name]["partisan"].get("conservative_terms", [])
|
534 |
-
m2_lib = bias_results[model2_name]["partisan"].get("liberal_terms", [])
|
535 |
-
m2_con = bias_results[model2_name]["partisan"].get("conservative_terms", [])
|
536 |
-
|
537 |
-
model2_words_value = f"""
|
538 |
-
**{model1_name}**:
|
539 |
-
- Liberal terms: {', '.join(m1_lib) if m1_lib else 'None detected'}
|
540 |
-
- Conservative terms: {', '.join(m1_con) if m1_con else 'None detected'}
|
541 |
-
|
542 |
-
**{model2_name}**:
|
543 |
-
- Liberal terms: {', '.join(m2_lib) if m2_lib else 'None detected'}
|
544 |
-
- Conservative terms: {', '.join(m2_con) if m2_con else 'None detected'}
|
545 |
-
"""
|
546 |
|
547 |
# If we don't have visualization data from any analysis
|
548 |
if not visualization_area_visible:
|
@@ -584,7 +536,7 @@ def create_app():
|
|
584 |
except Exception as e:
|
585 |
import traceback
|
586 |
error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
|
587 |
-
|
588 |
|
589 |
return (
|
590 |
{"error": error_msg}, # analysis_results_state
|
@@ -602,80 +554,28 @@ def create_app():
|
|
602 |
True, # status_message_visible
|
603 |
gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
|
604 |
)
|
605 |
-
|
606 |
-
# Add a new LLM Analysis tab
|
607 |
-
with gr.Tab("LLM Analysis"):
|
608 |
-
gr.Markdown("## LLM-Based Response Analysis")
|
609 |
-
|
610 |
-
with gr.Row():
|
611 |
-
with gr.Column():
|
612 |
-
llm_analysis_type = gr.Radio(
|
613 |
-
choices=["Response Quality", "Response Comparison", "Factual Accuracy"],
|
614 |
-
label="Analysis Type",
|
615 |
-
value="Response Comparison"
|
616 |
-
)
|
617 |
-
|
618 |
-
llm_model = gr.Dropdown(
|
619 |
-
choices=["OpenAI GPT-4", "Anthropic Claude", "Local LLM"],
|
620 |
-
label="Analysis Model",
|
621 |
-
value="OpenAI GPT-4"
|
622 |
-
)
|
623 |
-
|
624 |
-
run_llm_analysis_btn = gr.Button("Run LLM Analysis", variant="primary")
|
625 |
-
|
626 |
-
with gr.Column():
|
627 |
-
llm_analysis_prompt = gr.Textbox(
|
628 |
-
label="Custom Analysis Instructions (Optional)",
|
629 |
-
placeholder="Enter any specific instructions for the analysis...",
|
630 |
-
lines=3
|
631 |
-
)
|
632 |
-
|
633 |
-
llm_analysis_status = gr.Markdown("*No analysis has been run*")
|
634 |
-
|
635 |
-
llm_analysis_result = gr.Markdown(visible=False)
|
636 |
-
|
637 |
-
# Placeholder function for LLM analysis
|
638 |
-
def run_llm_analysis(dataset, analysis_type, model, custom_prompt):
|
639 |
-
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
640 |
-
return (
|
641 |
-
gr.update(visible=True, value="❌ **Error:** No dataset loaded. Please create or load a dataset first."),
|
642 |
-
gr.update(visible=False)
|
643 |
-
)
|
644 |
-
|
645 |
-
# Placeholder for actual implementation
|
646 |
-
return (
|
647 |
-
gr.update(visible=True, value="⏳ **Implementation in progress**\n\nLLM-based analysis will be available in a future update."),
|
648 |
-
gr.update(visible=False)
|
649 |
-
)
|
650 |
-
|
651 |
# Connect the run button to the analysis function
|
652 |
-
|
653 |
-
fn=
|
654 |
-
inputs=[dataset_state,
|
655 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
656 |
)
|
657 |
-
|
658 |
-
# Run analysis with proper parameters
|
659 |
-
run_analysis_btn.click(
|
660 |
-
fn=run_analysis,
|
661 |
-
inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count, bias_methods],
|
662 |
-
outputs=[
|
663 |
-
analysis_results_state,
|
664 |
-
analysis_output,
|
665 |
-
visualization_area_visible,
|
666 |
-
analysis_title,
|
667 |
-
prompt_title,
|
668 |
-
models_compared,
|
669 |
-
model1_title,
|
670 |
-
model1_words,
|
671 |
-
model2_title,
|
672 |
-
model2_words,
|
673 |
-
similarity_metrics_title,
|
674 |
-
similarity_metrics,
|
675 |
-
status_message_visible,
|
676 |
-
status_message
|
677 |
-
]
|
678 |
-
)
|
679 |
|
680 |
return app
|
681 |
|
@@ -683,5 +583,9 @@ if __name__ == "__main__":
|
|
683 |
# Download required NLTK resources before launching the app
|
684 |
download_nltk_resources()
|
685 |
|
|
|
|
|
|
|
|
|
686 |
app = create_app()
|
687 |
app.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from ui.dataset_input import create_dataset_input, load_example_dataset
|
3 |
+
from ui.analysis_screen import create_analysis_screen
|
4 |
from visualization.bow_visualizer import process_and_visualize_analysis
|
5 |
import nltk
|
6 |
import os
|
7 |
+
import logging
|
8 |
+
import sys
|
9 |
+
import traceback
|
10 |
|
11 |
+
# Set up logging
|
12 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
13 |
+
logger = logging.getLogger('gradio_app')
|
14 |
+
|
15 |
+
# Import the process_analysis_request function
|
16 |
+
# Try to use the improved version if available, otherwise use original
|
17 |
+
try:
|
18 |
+
from improved_analysis_handler import process_analysis_request
|
19 |
+
logger.info("Using improved analysis handler")
|
20 |
+
except ImportError:
|
21 |
+
logger.info("Using original analysis handler")
|
22 |
+
from ui.analysis_screen import process_analysis_request
|
23 |
+
|
24 |
+
# Download necessary NLTK resources
|
25 |
def download_nltk_resources():
|
26 |
"""Download required NLTK resources if not already downloaded"""
|
27 |
try:
|
|
|
48 |
for location in locations:
|
49 |
try:
|
50 |
nltk.data.find(location)
|
51 |
+
logger.info(f"Resource {resource} already downloaded")
|
52 |
found = True
|
53 |
break
|
54 |
except LookupError:
|
55 |
continue
|
56 |
|
57 |
if not found:
|
58 |
+
logger.info(f"Downloading {resource}...")
|
59 |
nltk.download(resource, quiet=True)
|
60 |
except Exception as e:
|
61 |
+
logger.error(f"Error with resource {resource}: {e}")
|
62 |
|
63 |
+
logger.info("NLTK resources check completed")
|
64 |
except Exception as e:
|
65 |
+
logger.error(f"Error downloading NLTK resources: {e}")
|
66 |
|
67 |
def create_app():
|
68 |
"""
|
69 |
+
Create a streamlined Gradio app for dataset input and analysis.
|
70 |
|
71 |
Returns:
|
72 |
gr.Blocks: The Gradio application
|
|
|
112 |
# Analysis Tab
|
113 |
with gr.Tab("Analysis"):
|
114 |
# Use create_analysis_screen to get UI components including visualization container
|
115 |
+
analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count = create_analysis_screen()
|
116 |
|
117 |
# Pre-create visualization components (initially hidden)
|
118 |
visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
|
|
|
137 |
status_message = gr.Markdown(visible=False)
|
138 |
|
139 |
# Define a helper function to extract parameter values and run the analysis
|
140 |
+
def run_analysis(dataset, selected_analysis, bow_top, ngram_n, ngram_top, topic_count):
|
141 |
try:
|
142 |
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
143 |
return (
|
|
|
161 |
"bow_top": bow_top,
|
162 |
"ngram_n": ngram_n,
|
163 |
"ngram_top": ngram_top,
|
164 |
+
"topic_count": topic_count
|
|
|
165 |
}
|
166 |
+
logger.info(f"Running analysis with selected type: {selected_analysis}")
|
167 |
+
logger.info(f"Parameters: {parameters}")
|
168 |
|
169 |
# Process the analysis request - passing selected_analysis as a string
|
170 |
analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
|
|
|
340 |
- **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
|
341 |
"""
|
342 |
|
343 |
+
# Check for Topic Modeling analysis - IMPROVED HANDLING
|
344 |
elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
|
345 |
visualization_area_visible = True
|
346 |
topic_results = analyses["topic_modeling"]
|
347 |
+
|
348 |
+
# Check for errors in topic modeling
|
349 |
+
if "error" in topic_results:
|
350 |
+
return (
|
351 |
+
analysis_results,
|
352 |
+
False, # Don't show raw JSON
|
353 |
+
False, # Don't show visualization area
|
354 |
+
gr.update(visible=False),
|
355 |
+
gr.update(visible=False),
|
356 |
+
gr.update(visible=False),
|
357 |
+
gr.update(visible=False),
|
358 |
+
gr.update(visible=False),
|
359 |
+
gr.update(visible=False),
|
360 |
+
gr.update(visible=False),
|
361 |
+
gr.update(visible=False),
|
362 |
+
gr.update(visible=False),
|
363 |
+
True, # Show status message
|
364 |
+
gr.update(visible=True, value=f"❌ **Topic modeling error:** {topic_results['error']}")
|
365 |
+
)
|
366 |
+
|
367 |
models = topic_results.get("models", [])
|
368 |
method = topic_results.get("method", "lda").upper()
|
369 |
n_topics = topic_results.get("n_topics", 3)
|
|
|
381 |
if topics:
|
382 |
# Format topic info for display
|
383 |
topic_info = []
|
384 |
+
for topic in topics[:5]: # Show first 5 topics
|
385 |
topic_id = topic.get("id", 0)
|
386 |
words = topic.get("words", [])[:5] # Top 5 words per topic
|
387 |
|
|
|
412 |
dist2 = model_topics[model2_name]
|
413 |
|
414 |
model2_words_value = f"""
|
415 |
+
**{model1_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist1[:5])])}
|
416 |
|
417 |
+
**{model2_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist2[:5])])}
|
418 |
"""
|
419 |
|
420 |
# Add similarity metrics if available
|
|
|
426 |
metrics = comparisons[comparison_key]
|
427 |
js_div = metrics.get("js_divergence", 0)
|
428 |
|
429 |
+
# Add interpretation
|
430 |
+
similarity_text = ""
|
431 |
+
if js_div < 0.2:
|
432 |
+
similarity_text = "very similar"
|
433 |
+
elif js_div < 0.4:
|
434 |
+
similarity_text = "somewhat similar"
|
435 |
+
elif js_div < 0.6:
|
436 |
+
similarity_text = "moderately different"
|
437 |
+
else:
|
438 |
+
similarity_text = "very different"
|
439 |
+
|
440 |
similarity_title_visible = True
|
441 |
similarity_metrics_visible = True
|
442 |
similarity_metrics_value = f"""
|
443 |
+
- **Topic Distribution Divergence**: {js_div:.4f}
|
444 |
+
- The topic distributions between models are **{similarity_text}**
|
445 |
+
- *Lower divergence values indicate more similar topic distributions*
|
446 |
"""
|
447 |
|
448 |
# Check for Classifier analysis
|
|
|
495 |
f"- **{category}**: {diff}"
|
496 |
for category, diff in differences.items()
|
497 |
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
498 |
|
499 |
# If we don't have visualization data from any analysis
|
500 |
if not visualization_area_visible:
|
|
|
536 |
except Exception as e:
|
537 |
import traceback
|
538 |
error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
|
539 |
+
logger.error(error_msg)
|
540 |
|
541 |
return (
|
542 |
{"error": error_msg}, # analysis_results_state
|
|
|
554 |
True, # status_message_visible
|
555 |
gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
|
556 |
)
|
557 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
# Connect the run button to the analysis function
|
559 |
+
run_analysis_btn.click(
|
560 |
+
fn=run_analysis,
|
561 |
+
inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count],
|
562 |
+
outputs=[
|
563 |
+
analysis_results_state,
|
564 |
+
analysis_output,
|
565 |
+
visualization_area_visible,
|
566 |
+
analysis_title,
|
567 |
+
prompt_title,
|
568 |
+
models_compared,
|
569 |
+
model1_title,
|
570 |
+
model1_words,
|
571 |
+
model2_title,
|
572 |
+
model2_words,
|
573 |
+
similarity_metrics_title,
|
574 |
+
similarity_metrics,
|
575 |
+
status_message_visible,
|
576 |
+
status_message
|
577 |
+
]
|
578 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
579 |
|
580 |
return app
|
581 |
|
|
|
583 |
# Download required NLTK resources before launching the app
|
584 |
download_nltk_resources()
|
585 |
|
586 |
+
logger.info("Starting LLM Response Comparator application")
|
587 |
+
logger.info("===== Application Startup =====")
|
588 |
+
|
589 |
+
# Create and launch the application
|
590 |
app = create_app()
|
591 |
app.launch()
|
improved_analysis_handler.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import json
|
3 |
+
import logging
|
4 |
+
from visualization.bow_visualizer import process_and_visualize_analysis
|
5 |
+
from processors.topic_modeling import compare_topics
|
6 |
+
from processors.ngram_analysis import compare_ngrams
|
7 |
+
from processors.bow_analysis import compare_bow
|
8 |
+
from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
|
9 |
+
|
10 |
+
# Set up logging
|
11 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
12 |
+
logger = logging.getLogger('analysis_handler')
|
13 |
+
|
14 |
+
def process_analysis_request(dataset, selected_analysis, parameters):
|
15 |
+
"""
|
16 |
+
Process the analysis request based on the selected options.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
dataset (dict): The input dataset
|
20 |
+
selected_analysis (str): The selected analysis type
|
21 |
+
parameters (dict): Additional parameters for the analysis
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
tuple: A tuple containing (analysis_results, visualization_data)
|
25 |
+
"""
|
26 |
+
logger.info(f"Processing analysis request: {selected_analysis}")
|
27 |
+
|
28 |
+
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
29 |
+
logger.warning("No valid dataset provided for analysis")
|
30 |
+
return {}, None
|
31 |
+
|
32 |
+
# Initialize the results structure
|
33 |
+
results = {"analyses": {}}
|
34 |
+
|
35 |
+
# Get the prompt text from the first entry
|
36 |
+
prompt_text = dataset["entries"][0].get("prompt", "")
|
37 |
+
if not prompt_text:
|
38 |
+
logger.warning("No prompt found in dataset")
|
39 |
+
return {"error": "No prompt found in dataset"}, None
|
40 |
+
|
41 |
+
# Initialize the analysis container for this prompt
|
42 |
+
results["analyses"][prompt_text] = {}
|
43 |
+
|
44 |
+
# Get model names and responses
|
45 |
+
model1_name = dataset["entries"][0].get("model", "Model 1")
|
46 |
+
model2_name = dataset["entries"][1].get("model", "Model 2")
|
47 |
+
|
48 |
+
model1_response = dataset["entries"][0].get("response", "")
|
49 |
+
model2_response = dataset["entries"][1].get("response", "")
|
50 |
+
|
51 |
+
logger.info(f"Comparing responses from {model1_name} and {model2_name}")
|
52 |
+
|
53 |
+
try:
|
54 |
+
# Process based on the selected analysis type
|
55 |
+
if selected_analysis == "Bag of Words":
|
56 |
+
# Get the top_n parameter and ensure it's an integer
|
57 |
+
top_n = parameters.get("bow_top", 25)
|
58 |
+
if isinstance(top_n, str):
|
59 |
+
top_n = int(top_n)
|
60 |
+
|
61 |
+
logger.info(f"Running Bag of Words analysis with top_n={top_n}")
|
62 |
+
|
63 |
+
# Perform Bag of Words analysis using the processor
|
64 |
+
bow_results = compare_bow(
|
65 |
+
[model1_response, model2_response],
|
66 |
+
[model1_name, model2_name],
|
67 |
+
top_n=top_n
|
68 |
+
)
|
69 |
+
results["analyses"][prompt_text]["bag_of_words"] = bow_results
|
70 |
+
|
71 |
+
elif selected_analysis == "N-gram Analysis":
|
72 |
+
# Perform N-gram analysis
|
73 |
+
ngram_size = parameters.get("ngram_n", 2)
|
74 |
+
if isinstance(ngram_size, str):
|
75 |
+
ngram_size = int(ngram_size)
|
76 |
+
|
77 |
+
top_n = parameters.get("ngram_top", 15)
|
78 |
+
if isinstance(top_n, str):
|
79 |
+
top_n = int(top_n)
|
80 |
+
|
81 |
+
logger.info(f"Running N-gram analysis with n={ngram_size}, top_n={top_n}")
|
82 |
+
|
83 |
+
# Use the processor from the dedicated ngram_analysis module
|
84 |
+
from processors.ngram_analysis import compare_ngrams as ngram_processor
|
85 |
+
ngram_results = ngram_processor(
|
86 |
+
[model1_response, model2_response],
|
87 |
+
[model1_name, model2_name],
|
88 |
+
n=ngram_size,
|
89 |
+
top_n=top_n
|
90 |
+
)
|
91 |
+
results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
|
92 |
+
|
93 |
+
elif selected_analysis == "Topic Modeling":
|
94 |
+
# Perform topic modeling analysis
|
95 |
+
topic_count = parameters.get("topic_count", 3)
|
96 |
+
if isinstance(topic_count, str):
|
97 |
+
topic_count = int(topic_count)
|
98 |
+
|
99 |
+
logger.info(f"Running Topic Modeling analysis with n_topics={topic_count}")
|
100 |
+
|
101 |
+
try:
|
102 |
+
# Import the improved topic modeling module
|
103 |
+
try:
|
104 |
+
# First try to import from improved module if available
|
105 |
+
from improved_topic_modeling import compare_topics as improved_compare_topics
|
106 |
+
logger.info("Using improved topic modeling implementation")
|
107 |
+
topic_results = improved_compare_topics(
|
108 |
+
texts_set_1=[model1_response],
|
109 |
+
texts_set_2=[model2_response],
|
110 |
+
n_topics=topic_count,
|
111 |
+
model_names=[model1_name, model2_name])
|
112 |
+
except ImportError:
|
113 |
+
# Fall back to original implementation
|
114 |
+
logger.info("Using original topic modeling implementation")
|
115 |
+
from processors.topic_modeling import compare_topics
|
116 |
+
topic_results = compare_topics(
|
117 |
+
texts_set_1=[model1_response],
|
118 |
+
texts_set_2=[model2_response],
|
119 |
+
n_topics=topic_count,
|
120 |
+
model_names=[model1_name, model2_name])
|
121 |
+
|
122 |
+
results["analyses"][prompt_text]["topic_modeling"] = topic_results
|
123 |
+
|
124 |
+
# Ensure the topic modeling results contain the necessary fields
|
125 |
+
if "topics" not in topic_results or not topic_results["topics"]:
|
126 |
+
logger.warning("No topics found in topic modeling results")
|
127 |
+
topic_results["message"] = "No significant topics were discovered in the text. Try a different analysis method or adjust parameters."
|
128 |
+
|
129 |
+
if "model_topics" not in topic_results or not topic_results["model_topics"]:
|
130 |
+
logger.warning("No model topics found in topic modeling results")
|
131 |
+
if "message" not in topic_results:
|
132 |
+
topic_results["message"] = "Could not calculate topic distributions for the models."
|
133 |
+
|
134 |
+
except Exception as e:
|
135 |
+
import traceback
|
136 |
+
error_msg = f"Topic modeling error: {str(e)}\n{traceback.format_exc()}"
|
137 |
+
logger.error(error_msg)
|
138 |
+
results["analyses"][prompt_text]["topic_modeling"] = {
|
139 |
+
"models": [model1_name, model2_name],
|
140 |
+
"error": str(e),
|
141 |
+
"message": "Topic modeling failed. Please try with longer text or different parameters.",
|
142 |
+
"stack_trace": traceback.format_exc()
|
143 |
+
}
|
144 |
+
|
145 |
+
elif selected_analysis == "Classifier":
|
146 |
+
# Perform classifier analysis
|
147 |
+
logger.info("Running Classifier analysis")
|
148 |
+
|
149 |
+
results["analyses"][prompt_text]["classifier"] = {
|
150 |
+
"models": [model1_name, model2_name],
|
151 |
+
"classifications": {
|
152 |
+
model1_name: {
|
153 |
+
"formality": classify_formality(model1_response),
|
154 |
+
"sentiment": classify_sentiment(model1_response),
|
155 |
+
"complexity": classify_complexity(model1_response)
|
156 |
+
},
|
157 |
+
model2_name: {
|
158 |
+
"formality": classify_formality(model2_response),
|
159 |
+
"sentiment": classify_sentiment(model2_response),
|
160 |
+
"complexity": classify_complexity(model2_response)
|
161 |
+
}
|
162 |
+
},
|
163 |
+
"differences": compare_classifications(model1_response, model2_response)
|
164 |
+
}
|
165 |
+
|
166 |
+
else:
|
167 |
+
# Unknown analysis type
|
168 |
+
logger.warning(f"Unknown analysis type: {selected_analysis}")
|
169 |
+
results["analyses"][prompt_text]["message"] = "Please select a valid analysis type."
|
170 |
+
|
171 |
+
except Exception as e:
|
172 |
+
import traceback
|
173 |
+
error_msg = f"Error processing analysis request: {str(e)}\n{traceback.format_exc()}"
|
174 |
+
logger.error(error_msg)
|
175 |
+
results = {
|
176 |
+
"error": str(e),
|
177 |
+
"stack_trace": traceback.format_exc(),
|
178 |
+
"analyses": {
|
179 |
+
prompt_text: {
|
180 |
+
"message": f"Analysis failed: {str(e)}"
|
181 |
+
}
|
182 |
+
}
|
183 |
+
}
|
184 |
+
|
185 |
+
# Return both the analysis results and a placeholder for visualization data
|
186 |
+
return results, None
|
processors/topic_modeling.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
"""
|
2 |
-
|
|
|
3 |
"""
|
4 |
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
|
5 |
from sklearn.decomposition import LatentDirichletAllocation, NMF
|
@@ -7,6 +8,12 @@ import numpy as np
|
|
7 |
import nltk
|
8 |
from nltk.corpus import stopwords
|
9 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def preprocess_text(text):
|
12 |
"""
|
@@ -18,20 +25,25 @@ def preprocess_text(text):
|
|
18 |
Returns:
|
19 |
str: Preprocessed text
|
20 |
"""
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
def get_top_words_per_topic(model, feature_names, n_top_words=10):
|
37 |
"""
|
@@ -70,6 +82,14 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
|
|
70 |
Returns:
|
71 |
dict: Topic modeling results with topics and document-topic distributions
|
72 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
result = {
|
74 |
"method": method,
|
75 |
"n_topics": n_topics,
|
@@ -77,45 +97,102 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
|
|
77 |
"document_topics": []
|
78 |
}
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
#
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
|
|
|
|
|
|
|
|
|
105 |
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
-
#
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
"document_id": i,
|
115 |
-
"distribution": normalized_dist.tolist()
|
116 |
-
})
|
117 |
|
118 |
-
|
|
|
|
|
119 |
|
120 |
def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
|
121 |
"""
|
@@ -132,50 +209,98 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
|
|
132 |
Returns:
|
133 |
dict: Comparison results with topics from both sets and similarity metrics
|
134 |
"""
|
|
|
|
|
135 |
# Set default model names if not provided
|
136 |
if model_names is None:
|
137 |
model_names = ["Model 1", "Model 2"]
|
138 |
|
139 |
-
#
|
140 |
-
topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
|
141 |
-
topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
|
142 |
-
|
143 |
-
# Calculate similarity between topics
|
144 |
-
similarity_matrix = []
|
145 |
-
for topic1 in topics_set_1["topics"]:
|
146 |
-
topic_similarities = []
|
147 |
-
words1 = set(topic1["words"])
|
148 |
-
for topic2 in topics_set_2["topics"]:
|
149 |
-
words2 = set(topic2["words"])
|
150 |
-
# Jaccard similarity: intersection over union
|
151 |
-
intersection = len(words1.intersection(words2))
|
152 |
-
union = len(words1.union(words2))
|
153 |
-
similarity = intersection / union if union > 0 else 0
|
154 |
-
topic_similarities.append(similarity)
|
155 |
-
similarity_matrix.append(topic_similarities)
|
156 |
-
|
157 |
-
# Find the best matching topic pairs
|
158 |
-
matched_topics = []
|
159 |
-
for i, similarities in enumerate(similarity_matrix):
|
160 |
-
best_match_idx = np.argmax(similarities)
|
161 |
-
matched_topics.append({
|
162 |
-
"set1_topic_id": i,
|
163 |
-
"set1_topic_words": topics_set_1["topics"][i]["words"],
|
164 |
-
"set2_topic_id": best_match_idx,
|
165 |
-
"set2_topic_words": topics_set_2["topics"][best_match_idx]["words"],
|
166 |
-
"similarity": similarities[best_match_idx]
|
167 |
-
})
|
168 |
-
|
169 |
-
# Construct result
|
170 |
result = {
|
171 |
"method": method,
|
172 |
"n_topics": n_topics,
|
173 |
-
"
|
174 |
-
"
|
175 |
-
"
|
176 |
-
"
|
177 |
-
"average_similarity": np.mean([match["similarity"] for match in matched_topics]),
|
178 |
-
"models": model_names # Add model names to result
|
179 |
}
|
180 |
|
181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""
|
2 |
+
Enhanced topic modeling processor for comparing text responses with better error handling
|
3 |
+
and more robust algorithm configuration
|
4 |
"""
|
5 |
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
|
6 |
from sklearn.decomposition import LatentDirichletAllocation, NMF
|
|
|
8 |
import nltk
|
9 |
from nltk.corpus import stopwords
|
10 |
import re
|
11 |
+
from scipy.spatial import distance
|
12 |
+
import logging
|
13 |
+
|
14 |
+
# Set up logging
|
15 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
16 |
+
logger = logging.getLogger('topic_modeling')
|
17 |
|
18 |
def preprocess_text(text):
|
19 |
"""
|
|
|
25 |
Returns:
|
26 |
str: Preprocessed text
|
27 |
"""
|
28 |
+
try:
|
29 |
+
# Convert to lowercase
|
30 |
+
text = text.lower()
|
31 |
+
|
32 |
+
# Remove special characters and digits
|
33 |
+
text = re.sub(r'[^a-zA-Z\s]', '', text)
|
34 |
+
|
35 |
+
# Tokenize
|
36 |
+
tokens = nltk.word_tokenize(text)
|
37 |
+
|
38 |
+
# Remove stopwords
|
39 |
+
stop_words = set(stopwords.words('english'))
|
40 |
+
tokens = [token for token in tokens if token not in stop_words and len(token) > 3]
|
41 |
+
|
42 |
+
return ' '.join(tokens)
|
43 |
+
except Exception as e:
|
44 |
+
logger.error(f"Error in preprocess_text: {str(e)}")
|
45 |
+
# Return original text if preprocessing fails
|
46 |
+
return text
|
47 |
|
48 |
def get_top_words_per_topic(model, feature_names, n_top_words=10):
|
49 |
"""
|
|
|
82 |
Returns:
|
83 |
dict: Topic modeling results with topics and document-topic distributions
|
84 |
"""
|
85 |
+
if isinstance(n_topics, str):
|
86 |
+
n_topics = int(n_topics)
|
87 |
+
|
88 |
+
# Ensure n_topics is at least 2
|
89 |
+
n_topics = max(2, n_topics)
|
90 |
+
|
91 |
+
logger.info(f"Starting topic modeling with method={method}, n_topics={n_topics}")
|
92 |
+
|
93 |
result = {
|
94 |
"method": method,
|
95 |
"n_topics": n_topics,
|
|
|
97 |
"document_topics": []
|
98 |
}
|
99 |
|
100 |
+
try:
|
101 |
+
# Preprocess texts
|
102 |
+
logger.info("Preprocessing texts")
|
103 |
+
preprocessed_texts = [preprocess_text(text) for text in texts]
|
104 |
+
|
105 |
+
# Check if texts are not empty after preprocessing
|
106 |
+
preprocessed_texts = [text for text in preprocessed_texts if len(text.strip()) > 0]
|
107 |
+
if not preprocessed_texts:
|
108 |
+
logger.warning("All texts are empty after preprocessing")
|
109 |
+
return result
|
110 |
+
|
111 |
+
# Create document-term matrix
|
112 |
+
logger.info(f"Creating document-term matrix using {method}")
|
113 |
+
if method == "nmf":
|
114 |
+
# For NMF, use TF-IDF vectorization
|
115 |
+
vectorizer = TfidfVectorizer(max_features=1000, min_df=1, max_df=0.95, stop_words='english')
|
116 |
+
else:
|
117 |
+
# For LDA, use CountVectorizer
|
118 |
+
vectorizer = CountVectorizer(max_features=1000, min_df=1, max_df=0.95, stop_words='english')
|
119 |
+
|
120 |
+
try:
|
121 |
+
X = vectorizer.fit_transform(preprocessed_texts)
|
122 |
+
feature_names = vectorizer.get_feature_names_out()
|
123 |
+
|
124 |
+
# Check if we have enough features
|
125 |
+
if X.shape[1] < n_topics:
|
126 |
+
logger.warning(f"Only {X.shape[1]} features found, reducing n_topics from {n_topics}")
|
127 |
+
n_topics = max(2, X.shape[1] - 1)
|
128 |
+
result["n_topics"] = n_topics
|
129 |
+
|
130 |
+
# Apply topic modeling
|
131 |
+
logger.info(f"Applying {method.upper()} with {n_topics} topics")
|
132 |
+
if method == "nmf":
|
133 |
+
# Non-negative Matrix Factorization
|
134 |
+
model = NMF(n_components=n_topics, random_state=42, max_iter=1000)
|
135 |
+
else:
|
136 |
+
# Latent Dirichlet Allocation
|
137 |
+
model = LatentDirichletAllocation(
|
138 |
+
n_components=n_topics,
|
139 |
+
random_state=42,
|
140 |
+
max_iter=20,
|
141 |
+
learning_method='online'
|
142 |
+
)
|
143 |
+
|
144 |
+
topic_distribution = model.fit_transform(X)
|
145 |
+
|
146 |
+
# Get top words for each topic
|
147 |
+
logger.info("Extracting top words for each topic")
|
148 |
+
result["topics"] = get_top_words_per_topic(model, feature_names, n_top_words)
|
149 |
+
|
150 |
+
# Get topic distribution for each document
|
151 |
+
logger.info("Calculating topic distributions for documents")
|
152 |
+
for i, dist in enumerate(topic_distribution):
|
153 |
+
# Normalize for easier comparison
|
154 |
+
normalized_dist = dist / np.sum(dist) if np.sum(dist) > 0 else dist
|
155 |
+
result["document_topics"].append({
|
156 |
+
"document_id": i,
|
157 |
+
"distribution": normalized_dist.tolist()
|
158 |
+
})
|
159 |
+
|
160 |
+
logger.info("Topic modeling completed successfully")
|
161 |
+
|
162 |
+
except Exception as e:
|
163 |
+
logger.error(f"Error in vectorization or modeling: {str(e)}")
|
164 |
+
result["error"] = f"Topic modeling failed: {str(e)}"
|
165 |
+
|
166 |
+
except Exception as e:
|
167 |
+
logger.error(f"General error in extract_topics: {str(e)}")
|
168 |
+
result["error"] = f"Topic modeling failed: {str(e)}"
|
169 |
|
170 |
+
return result
|
171 |
+
|
172 |
+
def calculate_jensen_shannon_divergence(p, q):
|
173 |
+
"""
|
174 |
+
Calculate Jensen-Shannon divergence between two probability distributions
|
175 |
|
176 |
+
Args:
|
177 |
+
p (array): First probability distribution
|
178 |
+
q (array): Second probability distribution
|
179 |
+
|
180 |
+
Returns:
|
181 |
+
float: Jensen-Shannon divergence
|
182 |
+
"""
|
183 |
+
# Ensure inputs are numpy arrays
|
184 |
+
p = np.array(p)
|
185 |
+
q = np.array(q)
|
186 |
|
187 |
+
# Normalize if not already normalized
|
188 |
+
if np.sum(p) != 1.0:
|
189 |
+
p = p / np.sum(p) if np.sum(p) > 0 else p
|
190 |
+
if np.sum(q) != 1.0:
|
191 |
+
q = q / np.sum(q) if np.sum(q) > 0 else q
|
|
|
|
|
|
|
192 |
|
193 |
+
# Calculate Jensen-Shannon divergence
|
194 |
+
m = 0.5 * (p + q)
|
195 |
+
return 0.5 * (distance.jensenshannon(p, m) + distance.jensenshannon(q, m))
|
196 |
|
197 |
def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
|
198 |
"""
|
|
|
209 |
Returns:
|
210 |
dict: Comparison results with topics from both sets and similarity metrics
|
211 |
"""
|
212 |
+
logger.info(f"Starting topic comparison with n_topics={n_topics}, method={method}")
|
213 |
+
|
214 |
# Set default model names if not provided
|
215 |
if model_names is None:
|
216 |
model_names = ["Model 1", "Model 2"]
|
217 |
|
218 |
+
# Initialize the result structure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
result = {
|
220 |
"method": method,
|
221 |
"n_topics": n_topics,
|
222 |
+
"models": model_names,
|
223 |
+
"model_topics": {},
|
224 |
+
"topics": [],
|
225 |
+
"comparisons": {}
|
|
|
|
|
226 |
}
|
227 |
|
228 |
+
try:
|
229 |
+
# Extract topics for each set separately
|
230 |
+
# For very short texts, try combining all texts from each model
|
231 |
+
combined_text_1 = " ".join(texts_set_1)
|
232 |
+
combined_text_2 = " ".join(texts_set_2)
|
233 |
+
|
234 |
+
# Process all texts together to find common topics
|
235 |
+
all_texts = texts_set_1 + texts_set_2
|
236 |
+
logger.info(f"Processing {len(all_texts)} total texts")
|
237 |
+
|
238 |
+
# Extract topics from combined corpus
|
239 |
+
combined_result = extract_topics(all_texts, n_topics, n_top_words, method)
|
240 |
+
|
241 |
+
# Check for errors
|
242 |
+
if "error" in combined_result:
|
243 |
+
logger.warning(f"Error in combined topic extraction: {combined_result['error']}")
|
244 |
+
result["error"] = combined_result["error"]
|
245 |
+
return result
|
246 |
+
|
247 |
+
# Store topics from combined analysis
|
248 |
+
result["topics"] = combined_result["topics"]
|
249 |
+
|
250 |
+
# Now process each text set to get their topic distributions
|
251 |
+
model1_doc_topics = []
|
252 |
+
model2_doc_topics = []
|
253 |
+
|
254 |
+
# Try to use the same model from combined analysis for consistency
|
255 |
+
if "document_topics" in combined_result and len(combined_result["document_topics"]) == len(all_texts):
|
256 |
+
# Get document topics for each model
|
257 |
+
n_docs_model1 = len(texts_set_1)
|
258 |
+
for i, doc_topic in enumerate(combined_result["document_topics"]):
|
259 |
+
if i < n_docs_model1:
|
260 |
+
model1_doc_topics.append(doc_topic["distribution"])
|
261 |
+
else:
|
262 |
+
model2_doc_topics.append(doc_topic["distribution"])
|
263 |
+
else:
|
264 |
+
# Fallback: run separate topic modeling for each model
|
265 |
+
logger.info("Using separate topic modeling for each model")
|
266 |
+
model1_result = extract_topics([combined_text_1], n_topics, n_top_words, method)
|
267 |
+
model2_result = extract_topics([combined_text_2], n_topics, n_top_words, method)
|
268 |
+
|
269 |
+
if "document_topics" in model1_result and model1_result["document_topics"]:
|
270 |
+
model1_doc_topics = [doc["distribution"] for doc in model1_result["document_topics"]]
|
271 |
+
|
272 |
+
if "document_topics" in model2_result and model2_result["document_topics"]:
|
273 |
+
model2_doc_topics = [doc["distribution"] for doc in model2_result["document_topics"]]
|
274 |
+
|
275 |
+
# Calculate average topic distribution for each model
|
276 |
+
if model1_doc_topics:
|
277 |
+
model1_avg_distribution = np.mean(model1_doc_topics, axis=0).tolist()
|
278 |
+
result["model_topics"][model_names[0]] = model1_avg_distribution
|
279 |
+
|
280 |
+
if model2_doc_topics:
|
281 |
+
model2_avg_distribution = np.mean(model2_doc_topics, axis=0).tolist()
|
282 |
+
result["model_topics"][model_names[1]] = model2_avg_distribution
|
283 |
+
|
284 |
+
# Calculate similarity between models' topic distributions
|
285 |
+
if model_names[0] in result["model_topics"] and model_names[1] in result["model_topics"]:
|
286 |
+
comparison_key = f"{model_names[0]} vs {model_names[1]}"
|
287 |
+
dist1 = result["model_topics"][model_names[0]]
|
288 |
+
dist2 = result["model_topics"][model_names[1]]
|
289 |
+
|
290 |
+
# Calculate Jensen-Shannon divergence (smaller means more similar)
|
291 |
+
js_div = calculate_jensen_shannon_divergence(dist1, dist2)
|
292 |
+
|
293 |
+
# Create comparison result
|
294 |
+
result["comparisons"][comparison_key] = {
|
295 |
+
"js_divergence": js_div
|
296 |
+
}
|
297 |
+
|
298 |
+
logger.info(f"Topic comparison completed successfully. JS divergence: {js_div:.4f}")
|
299 |
+
else:
|
300 |
+
logger.warning("Could not calculate model comparisons due to missing topic distributions")
|
301 |
+
|
302 |
+
except Exception as e:
|
303 |
+
logger.error(f"Error in compare_topics: {str(e)}")
|
304 |
+
result["error"] = f"Topic comparison failed: {str(e)}"
|
305 |
+
|
306 |
+
return result
|
visualization/topic_visualizer.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
"""
|
2 |
-
|
3 |
"""
|
4 |
-
from visualization.ngram_visualizer import create_ngram_visualization
|
5 |
import gradio as gr
|
6 |
import json
|
7 |
import numpy as np
|
@@ -9,6 +8,11 @@ import pandas as pd
|
|
9 |
import plotly.express as px
|
10 |
import plotly.graph_objects as go
|
11 |
from plotly.subplots import make_subplots
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def create_topic_visualization(analysis_results):
|
14 |
"""
|
@@ -25,124 +29,137 @@ def create_topic_visualization(analysis_results):
|
|
25 |
|
26 |
# Check if we have valid results
|
27 |
if not analysis_results or "analyses" not in analysis_results:
|
|
|
28 |
return [gr.Markdown("No analysis results found.")]
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
# Visualize topics
|
47 |
topics = topic_results.get("topics", [])
|
48 |
if topics:
|
49 |
-
output_components.append(gr.Markdown("
|
50 |
|
51 |
-
|
52 |
-
|
|
|
53 |
words = topic.get("words", [])
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
if words and weights and len(words) == len(weights):
|
58 |
-
# Create dataframe for plotting
|
59 |
-
df = pd.DataFrame({
|
60 |
-
'word': words,
|
61 |
-
'weight': weights
|
62 |
-
})
|
63 |
-
|
64 |
-
# Sort by weight
|
65 |
-
df = df.sort_values('weight', ascending=False)
|
66 |
-
|
67 |
-
# Create bar chart
|
68 |
-
fig = px.bar(
|
69 |
-
df, x='word', y='weight',
|
70 |
-
title=f"Topic {topic_id+1} Top Words",
|
71 |
-
labels={'word': 'Word', 'weight': 'Weight'},
|
72 |
-
height=300
|
73 |
-
)
|
74 |
-
|
75 |
-
output_components.append(gr.Plot(value=fig))
|
76 |
|
77 |
# Visualize topic distributions for each model
|
78 |
model_topics = topic_results.get("model_topics", {})
|
79 |
if model_topics and all(model in model_topics for model in models):
|
80 |
-
output_components.append(gr.Markdown("
|
81 |
|
82 |
-
#
|
83 |
-
fig = go.Figure()
|
84 |
for model in models:
|
85 |
if model in model_topics:
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
name=model
|
91 |
-
))
|
92 |
-
|
93 |
-
fig.update_layout(
|
94 |
-
title="Topic Distributions Comparison",
|
95 |
-
xaxis_title="Topic",
|
96 |
-
yaxis_title="Weight",
|
97 |
-
barmode='group',
|
98 |
-
height=400
|
99 |
-
)
|
100 |
-
|
101 |
-
output_components.append(gr.Plot(value=fig))
|
102 |
-
|
103 |
-
# Visualize topic differences
|
104 |
-
comparisons = topic_results.get("comparisons", {})
|
105 |
-
if comparisons:
|
106 |
-
output_components.append(gr.Markdown("### Topic Distribution Differences"))
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
))
|
115 |
-
|
116 |
-
if topic_differences:
|
117 |
-
# Create DataFrame for plotting
|
118 |
-
model1, model2 = comparison_key.split(" vs ")
|
119 |
-
df_diff = pd.DataFrame(topic_differences)
|
120 |
|
121 |
-
# Create bar chart
|
122 |
-
fig =
|
123 |
-
|
124 |
-
x=
|
125 |
-
y=
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
name=model2
|
132 |
-
))
|
133 |
|
134 |
fig.update_layout(
|
135 |
-
title="Topic Weight Comparison",
|
136 |
xaxis_title="Topic",
|
137 |
yaxis_title="Weight",
|
138 |
-
|
139 |
-
height=400
|
140 |
)
|
141 |
|
142 |
output_components.append(gr.Plot(value=fig))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
# If no components were added, show a message
|
145 |
-
if len(output_components)
|
146 |
output_components.append(gr.Markdown("No detailed Topic Modeling analysis found in results."))
|
147 |
|
148 |
return output_components
|
@@ -159,10 +176,23 @@ def process_and_visualize_topic_analysis(analysis_results):
|
|
159 |
list: List of gradio components for visualization
|
160 |
"""
|
161 |
try:
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
return create_topic_visualization(analysis_results)
|
164 |
except Exception as e:
|
165 |
import traceback
|
166 |
error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
|
167 |
-
|
168 |
return [gr.Markdown(f"**Error during topic modeling visualization:**\n\n```\n{error_msg}\n```")]
|
|
|
1 |
"""
|
2 |
+
Improved visualization for topic modeling analysis results
|
3 |
"""
|
|
|
4 |
import gradio as gr
|
5 |
import json
|
6 |
import numpy as np
|
|
|
8 |
import plotly.express as px
|
9 |
import plotly.graph_objects as go
|
10 |
from plotly.subplots import make_subplots
|
11 |
+
import logging
|
12 |
+
|
13 |
+
# Set up logging
|
14 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
15 |
+
logger = logging.getLogger('topic_visualizer')
|
16 |
|
17 |
def create_topic_visualization(analysis_results):
|
18 |
"""
|
|
|
29 |
|
30 |
# Check if we have valid results
|
31 |
if not analysis_results or "analyses" not in analysis_results:
|
32 |
+
logger.warning("No valid analysis results found")
|
33 |
return [gr.Markdown("No analysis results found.")]
|
34 |
|
35 |
+
try:
|
36 |
+
# Process each prompt
|
37 |
+
for prompt, analyses in analysis_results["analyses"].items():
|
38 |
+
# Process Topic Modeling analysis if available
|
39 |
+
if "topic_modeling" in analyses:
|
40 |
+
topic_results = analyses["topic_modeling"]
|
41 |
+
|
42 |
+
# Check for errors in the analysis
|
43 |
+
if "error" in topic_results:
|
44 |
+
error_msg = topic_results.get("error", "Unknown error in topic modeling")
|
45 |
+
logger.warning(f"Topic modeling error: {error_msg}")
|
46 |
+
output_components.append(gr.Markdown(f"**Error in topic modeling analysis:** {error_msg}"))
|
47 |
+
continue
|
48 |
+
|
49 |
+
# Show method and number of topics
|
50 |
+
method = topic_results.get("method", "lda").upper()
|
51 |
+
n_topics = topic_results.get("n_topics", 3)
|
52 |
+
logger.info(f"Creating visualization for {method} with {n_topics} topics")
|
53 |
+
|
54 |
+
# Get models being compared
|
55 |
+
models = topic_results.get("models", [])
|
56 |
+
if not models or len(models) < 2:
|
57 |
+
logger.warning("Not enough models found in results")
|
58 |
+
output_components.append(gr.Markdown("Topic modeling requires at least two models to compare."))
|
59 |
+
continue
|
60 |
+
|
61 |
+
output_components.append(gr.Markdown(f"### Topic Modeling Analysis ({method}, {n_topics} topics)"))
|
62 |
+
output_components.append(gr.Markdown(f"Comparing responses from **{models[0]}** and **{models[1]}**"))
|
63 |
|
64 |
+
# Visualize discovered topics
|
65 |
topics = topic_results.get("topics", [])
|
66 |
if topics:
|
67 |
+
output_components.append(gr.Markdown("#### Discovered Topics"))
|
68 |
|
69 |
+
# Display topics in a more readable format
|
70 |
+
for i, topic in enumerate(topics):
|
71 |
+
topic_id = topic.get("id", i)
|
72 |
words = topic.get("words", [])
|
73 |
+
if words:
|
74 |
+
topic_words = ", ".join(words[:5]) # Show top 5 words
|
75 |
+
output_components.append(gr.Markdown(f"**Topic {topic_id+1}**: {topic_words}"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
# Visualize topic distributions for each model
|
78 |
model_topics = topic_results.get("model_topics", {})
|
79 |
if model_topics and all(model in model_topics for model in models):
|
80 |
+
output_components.append(gr.Markdown("#### Topic Distribution by Model"))
|
81 |
|
82 |
+
# Display topic distributions in a readable format
|
|
|
83 |
for model in models:
|
84 |
if model in model_topics:
|
85 |
+
dist = model_topics[model]
|
86 |
+
# Format the distribution
|
87 |
+
dist_str = ", ".join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist[:n_topics])])
|
88 |
+
output_components.append(gr.Markdown(f"**{model}**: {dist_str}"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
# Create multi-model topic distribution visualization
|
91 |
+
try:
|
92 |
+
# Prepare data for visualization
|
93 |
+
model_data = []
|
94 |
+
for model in models:
|
95 |
+
if model in model_topics:
|
96 |
+
dist = model_topics[model]
|
97 |
+
for i, weight in enumerate(dist[:n_topics]):
|
98 |
+
model_data.append({
|
99 |
+
"Model": model,
|
100 |
+
"Topic": f"Topic {i+1}",
|
101 |
+
"Weight": weight
|
102 |
+
})
|
103 |
|
104 |
+
if model_data:
|
105 |
+
df = pd.DataFrame(model_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
+
# Create grouped bar chart
|
108 |
+
fig = px.bar(
|
109 |
+
df,
|
110 |
+
x="Topic",
|
111 |
+
y="Weight",
|
112 |
+
color="Model",
|
113 |
+
title="Topic Distribution Comparison",
|
114 |
+
barmode="group",
|
115 |
+
height=400
|
116 |
+
)
|
|
|
|
|
117 |
|
118 |
fig.update_layout(
|
|
|
119 |
xaxis_title="Topic",
|
120 |
yaxis_title="Weight",
|
121 |
+
legend_title="Model"
|
|
|
122 |
)
|
123 |
|
124 |
output_components.append(gr.Plot(value=fig))
|
125 |
+
except Exception as e:
|
126 |
+
logger.error(f"Error creating topic distribution plot: {str(e)}")
|
127 |
+
output_components.append(gr.Markdown(f"*Error creating visualization: {str(e)}*"))
|
128 |
+
|
129 |
+
# Display similarity metrics
|
130 |
+
comparisons = topic_results.get("comparisons", {})
|
131 |
+
if comparisons:
|
132 |
+
output_components.append(gr.Markdown("#### Similarity Metrics"))
|
133 |
+
|
134 |
+
for comparison_key, comparison_data in comparisons.items():
|
135 |
+
js_div = comparison_data.get("js_divergence", 0)
|
136 |
+
|
137 |
+
# Jensen-Shannon divergence interpretation
|
138 |
+
similarity_text = ""
|
139 |
+
if js_div < 0.2:
|
140 |
+
similarity_text = "very similar"
|
141 |
+
elif js_div < 0.4:
|
142 |
+
similarity_text = "somewhat similar"
|
143 |
+
elif js_div < 0.6:
|
144 |
+
similarity_text = "moderately different"
|
145 |
+
else:
|
146 |
+
similarity_text = "very different"
|
147 |
+
|
148 |
+
output_components.append(gr.Markdown(
|
149 |
+
f"**Topic Distribution Divergence**: {js_div:.4f} - Topic distributions are {similarity_text}"
|
150 |
+
))
|
151 |
+
|
152 |
+
# Explain what the metric means
|
153 |
+
output_components.append(gr.Markdown(
|
154 |
+
"*Lower divergence values indicate more similar topic distributions between models*"
|
155 |
+
))
|
156 |
+
|
157 |
+
except Exception as e:
|
158 |
+
logger.error(f"Error in create_topic_visualization: {str(e)}")
|
159 |
+
output_components.append(gr.Markdown(f"**Error creating topic visualization:** {str(e)}"))
|
160 |
|
161 |
# If no components were added, show a message
|
162 |
+
if len(output_components) == 0:
|
163 |
output_components.append(gr.Markdown("No detailed Topic Modeling analysis found in results."))
|
164 |
|
165 |
return output_components
|
|
|
176 |
list: List of gradio components for visualization
|
177 |
"""
|
178 |
try:
|
179 |
+
logger.info(f"Starting visualization of topic modeling analysis results")
|
180 |
+
# Debug output - print the structure of analysis_results
|
181 |
+
if "analyses" in analysis_results:
|
182 |
+
for prompt, analyses in analysis_results["analyses"].items():
|
183 |
+
if "topic_modeling" in analyses:
|
184 |
+
topic_results = analyses["topic_modeling"]
|
185 |
+
logger.info(f"Found topic_modeling results with keys: {topic_results.keys()}")
|
186 |
+
if "models" in topic_results:
|
187 |
+
logger.info(f"Models: {topic_results['models']}")
|
188 |
+
if "topics" in topic_results:
|
189 |
+
logger.info(f"Found {len(topic_results['topics'])} topics")
|
190 |
+
if "model_topics" in topic_results:
|
191 |
+
logger.info(f"Model_topics keys: {topic_results['model_topics'].keys()}")
|
192 |
+
|
193 |
return create_topic_visualization(analysis_results)
|
194 |
except Exception as e:
|
195 |
import traceback
|
196 |
error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
|
197 |
+
logger.error(error_msg)
|
198 |
return [gr.Markdown(f"**Error during topic modeling visualization:**\n\n```\n{error_msg}\n```")]
|