Ryan commited on
Commit
524b56d
·
1 Parent(s): 30d74ab
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.idea/workspace.xml CHANGED
@@ -4,7 +4,9 @@
4
  <option name="autoReloadType" value="SELECTIVE" />
5
  </component>
6
  <component name="ChangeListManager">
7
- <list default="true" id="8e67814c-7f04-433c-ab7a-2b65a1106d4c" name="Changes" comment="" />
 
 
8
  <option name="SHOW_DIALOG" value="false" />
9
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
10
  <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
@@ -60,7 +62,7 @@
60
  <option name="presentableId" value="Default" />
61
  <updated>1745170754325</updated>
62
  <workItem from="1745170755404" duration="245000" />
63
- <workItem from="1745172030020" duration="22565000" />
64
  </task>
65
  <servers />
66
  </component>
 
4
  <option name="autoReloadType" value="SELECTIVE" />
5
  </component>
6
  <component name="ChangeListManager">
7
+ <list default="true" id="8e67814c-7f04-433c-ab7a-2b65a1106d4c" name="Changes" comment="">
8
+ <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
9
+ </list>
10
  <option name="SHOW_DIALOG" value="false" />
11
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
12
  <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
 
62
  <option name="presentableId" value="Default" />
63
  <updated>1745170754325</updated>
64
  <workItem from="1745170755404" duration="245000" />
65
+ <workItem from="1745172030020" duration="23339000" />
66
  </task>
67
  <servers />
68
  </component>
analysis_runner.py DELETED
@@ -1,158 +0,0 @@
1
- import logging
2
- import traceback
3
- import gradio as gr
4
-
5
- # Import the processor functions
6
- from processors.bow_processor import process_bow_analysis
7
- from processors.ngram_processor import process_ngram_analysis
8
- from processors.topic_processor import process_topic_modeling
9
- from processors.classifier_processor import process_classifier_analysis
10
- from processors.bias_processor import process_bias_detection
11
-
12
- # Import the empty visualization response function
13
- from visualization_handler import create_empty_visualization_response
14
-
15
- # Import process_analysis_request with fallback
16
- try:
17
- from improved_analysis_handler import process_analysis_request
18
- logger = logging.getLogger("gradio_app.analysis_runner")
19
- logger.info("Using improved analysis handler")
20
- except ImportError:
21
- from ui.analysis_screen import process_analysis_request
22
- logger = logging.getLogger("gradio_app.analysis_runner")
23
- logger.warning("Improved analysis handler not found, using default")
24
-
25
-
26
- def run_analysis(dataset, selected_analysis, ngram_n, ngram_top, topic_count):
27
- """
28
- Run the selected analysis on the provided dataset and return visualization components.
29
-
30
- Args:
31
- dataset (dict): The dataset to analyze.
32
- selected_analysis (str): The type of analysis to run.
33
- ngram_n (int): N-gram size for N-gram analysis.
34
- ngram_top (int): Number of top N-grams to display.
35
- topic_count (int): Number of topics for topic modeling.
36
-
37
- Returns:
38
- tuple: Updated visualization components.
39
- """
40
- try:
41
- # Create parameter dictionary for the selected analysis
42
- parameters = {
43
- "ngram_n": ngram_n,
44
- "ngram_top": ngram_top,
45
- "topic_count": topic_count
46
- }
47
-
48
- logger.info(f"Running analysis with selected type: {selected_analysis}")
49
- logger.info(f"Parameters: {parameters}")
50
-
51
- # Validate dataset
52
- if not dataset or "entries" not in dataset or not dataset["entries"]:
53
- return default_no_dataset()
54
-
55
- # Process the analysis request
56
- analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
57
-
58
- # Check for valid results
59
- if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
60
- return default_no_results()
61
-
62
- # Extract information and conduct routing
63
- prompt = list(analysis_results["analyses"].keys())[0] # Get the first prompt
64
- analyses = analysis_results["analyses"][prompt]
65
-
66
- # Handle cases where analysis provides a message
67
- if "message" in analyses:
68
- return default_message_response(analyses["message"])
69
-
70
- # Route to processors for specific analysis types
71
- if selected_analysis == "Bag of Words" and "bag_of_words" in analyses:
72
- return process_bow_analysis(analysis_results, prompt, analyses)
73
-
74
- elif selected_analysis == "N-gram Analysis" and "ngram_analysis" in analyses:
75
- return process_ngram_analysis(analysis_results, prompt, analyses)
76
-
77
- elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
78
- return process_topic_modeling(analysis_results, prompt, analyses)
79
-
80
- elif selected_analysis == "Classifier" and "classifier" in analyses:
81
- return process_classifier_analysis(analysis_results, prompt, analyses)
82
-
83
- elif selected_analysis == "Bias Detection" and "bias_detection" in analyses:
84
- return process_bias_detection(analysis_results, prompt, analyses)
85
-
86
- # Fallback if no visualization is available
87
- return default_no_visualization(analysis_results)
88
-
89
- except Exception as e:
90
- # Log and return an error response
91
- error_message = f"Error during analysis execution: {str(e)}\n{traceback.format_exc()}"
92
- logger.error(error_message)
93
- return default_error_response(error_message, str(e))
94
-
95
-
96
- # ========= Default Responses for Handling Errors and Messages =========
97
-
98
- def default_no_dataset():
99
- """
100
- Returns a default response when no dataset is provided.
101
- """
102
- components = create_empty_visualization_response()
103
- components[2] = False # Hide visualization
104
- components[12] = True # Show status message
105
- components[13] = "❌ **Error:** No dataset provided. Please upload or create a dataset."
106
- return tuple(components)
107
-
108
-
109
- def default_no_results():
110
- """
111
- Returns a default response when no analysis results are found.
112
- """
113
- components = create_empty_visualization_response()
114
- components[2] = False # Hide visualization
115
- components[12] = True # Show status message
116
- components[13] = "❌ **Error:** No results found for the selected analysis."
117
- return tuple(components)
118
-
119
-
120
- def default_message_response(message):
121
- """
122
- Returns a response to display a specific message from the analysis system.
123
- """
124
- components = create_empty_visualization_response()
125
- components[2] = True # Show visualization area
126
- components[12] = False # No error notification
127
- components[13] = f"**Message from analysis:** {message}"
128
- return tuple(components)
129
-
130
-
131
- def default_no_visualization(analysis_results):
132
- """
133
- Returns a response when no visualization is available for the results.
134
- """
135
- components = create_empty_visualization_response()
136
- components[0] = analysis_results # Pass through current analysis state
137
- components[2] = True # Show visualization area
138
- components[12] = False # No error notification
139
- components[13] = "**No visualizations available for this analysis type.**"
140
- return tuple(components)
141
-
142
-
143
- def default_error_response(error_message, detailed_message):
144
- """
145
- Returns a response for errors encountered during the analysis process.
146
-
147
- Args:
148
- error_message (str): The main error message to display.
149
- detailed_message (str): A detailed message (e.g., stack trace).
150
-
151
- Returns:
152
- tuple: Components to present the error.
153
- """
154
- components = create_empty_visualization_response()
155
- components[2] = False # Hide visualization
156
- components[12] = True # Show status message
157
- components[13] = f"❌ **Error:** {detailed_message}"
158
- return tuple(components)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,30 +1,58 @@
1
  import gradio as gr
2
- import logging
3
- from data_handler import download_nltk_resources
4
- from analysis_runner import run_analysis
5
- from visualization_handler import create_visualization_components
6
  from ui.dataset_input import create_dataset_input, load_example_dataset
7
- from ui.analysis_screen import create_analysis_screen
8
-
9
- # Set up logging
10
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
11
- logger = logging.getLogger('gradio_app')
12
-
13
- # Import the process_analysis_request function
14
- # Try to use the improved version if available, otherwise use original
15
- try:
16
- from improved_analysis_handler import process_analysis_request
17
-
18
- logger.info("Using improved analysis handler")
19
- except ImportError:
20
- logger.info("Using original analysis handler")
21
- from ui.analysis_screen import process_analysis_request
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def create_app():
25
  """
26
- Create a streamlined Gradio app for dataset input and analysis.
27
-
28
  Returns:
29
  gr.Blocks: The Gradio application
30
  """
@@ -32,14 +60,14 @@ def create_app():
32
  # Application state to share data between tabs
33
  dataset_state = gr.State({})
34
  analysis_results_state = gr.State({})
35
-
36
  # Dataset Input Tab
37
  with gr.Tab("Dataset Input"):
38
  dataset_inputs, example_dropdown, load_example_btn, create_btn, prompt, response1, model1, response2, model2 = create_dataset_input()
39
-
40
  # Add status indicator to show when dataset is created
41
  dataset_status = gr.Markdown("*No dataset loaded*")
42
-
43
  # Load example dataset
44
  load_example_btn.click(
45
  fn=load_example_dataset,
@@ -51,7 +79,7 @@ def create_app():
51
  def create_dataset(p, r1, m1, r2, m2):
52
  if not p or not r1 or not r2:
53
  return {}, "❌ **Error:** Please fill in at least the prompt and both responses"
54
-
55
  dataset = {
56
  "entries": [
57
  {"prompt": p, "response": r1, "model": m1 or "Model 1"},
@@ -59,48 +87,601 @@ def create_app():
59
  ]
60
  }
61
  return dataset, "✅ **Dataset created successfully!** You can now go to the Analysis tab"
62
-
63
  create_btn.click(
64
  fn=create_dataset,
65
  inputs=[prompt, response1, model1, response2, model2],
66
  outputs=[dataset_state, dataset_status]
67
  )
68
-
69
  # Analysis Tab
70
  with gr.Tab("Analysis"):
71
- # Create analysis screen
72
- analysis_components = create_analysis_screen()
73
- analysis_options = analysis_components[0]
74
- analysis_params = analysis_components[1]
75
- run_analysis_btn = analysis_components[2]
76
- analysis_output = analysis_components[3]
77
- ngram_n = analysis_components[4]
78
- topic_count = analysis_components[5]
79
 
80
- # Add the missing ngram_top parameter
81
- ngram_top = gr.Slider(minimum=5, maximum=50, value=20, step=5,
82
- label="Number of Top N-grams to Display")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # Create visualization components
85
- visualization_components = create_visualization_components()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  # Connect the run button to the analysis function
88
- run_analysis_btn.click(
89
- fn=run_analysis,
90
- inputs=[dataset_state, analysis_options, ngram_n, ngram_top, topic_count],
91
- outputs=visualization_components
92
  )
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  return app
95
 
96
-
97
  if __name__ == "__main__":
98
  # Download required NLTK resources before launching the app
99
  download_nltk_resources()
100
-
101
- logger.info("Starting LLM Response Comparator application")
102
- logger.info("===== Application Startup =====")
103
-
104
- # Create and launch the application
105
  app = create_app()
106
  app.launch()
 
1
  import gradio as gr
 
 
 
 
2
  from ui.dataset_input import create_dataset_input, load_example_dataset
3
+ from ui.analysis_screen import create_analysis_screen, process_analysis_request
4
+ from visualization.bow_visualizer import process_and_visualize_analysis
5
+ import nltk
6
+ import os
7
+ import json
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Download necessary NLTK resources function remains unchanged
10
+ def download_nltk_resources():
11
+ """Download required NLTK resources if not already downloaded"""
12
+ try:
13
+ # Create nltk_data directory in the user's home directory if it doesn't exist
14
+ nltk_data_path = os.path.expanduser("~/nltk_data")
15
+ os.makedirs(nltk_data_path, exist_ok=True)
16
+
17
+ # Add this path to NLTK's data path
18
+ nltk.data.path.append(nltk_data_path)
19
+
20
+ # Download required resources
21
+ resources = ['punkt', 'wordnet', 'stopwords', 'punkt_tab']
22
+ for resource in resources:
23
+ try:
24
+ # Different resources can be in different directories in NLTK
25
+ locations = [
26
+ f'tokenizers/{resource}',
27
+ f'corpora/{resource}',
28
+ f'taggers/{resource}',
29
+ f'{resource}'
30
+ ]
31
+
32
+ found = False
33
+ for location in locations:
34
+ try:
35
+ nltk.data.find(location)
36
+ print(f"Resource {resource} already downloaded")
37
+ found = True
38
+ break
39
+ except LookupError:
40
+ continue
41
+
42
+ if not found:
43
+ print(f"Downloading {resource}...")
44
+ nltk.download(resource, quiet=True)
45
+ except Exception as e:
46
+ print(f"Error with resource {resource}: {e}")
47
+
48
+ print("NLTK resources check completed")
49
+ except Exception as e:
50
+ print(f"Error downloading NLTK resources: {e}")
51
 
52
  def create_app():
53
  """
54
+ Create a streamlined Gradio app for dataset input and Bag of Words analysis.
55
+
56
  Returns:
57
  gr.Blocks: The Gradio application
58
  """
 
60
  # Application state to share data between tabs
61
  dataset_state = gr.State({})
62
  analysis_results_state = gr.State({})
63
+
64
  # Dataset Input Tab
65
  with gr.Tab("Dataset Input"):
66
  dataset_inputs, example_dropdown, load_example_btn, create_btn, prompt, response1, model1, response2, model2 = create_dataset_input()
67
+
68
  # Add status indicator to show when dataset is created
69
  dataset_status = gr.Markdown("*No dataset loaded*")
70
+
71
  # Load example dataset
72
  load_example_btn.click(
73
  fn=load_example_dataset,
 
79
  def create_dataset(p, r1, m1, r2, m2):
80
  if not p or not r1 or not r2:
81
  return {}, "❌ **Error:** Please fill in at least the prompt and both responses"
82
+
83
  dataset = {
84
  "entries": [
85
  {"prompt": p, "response": r1, "model": m1 or "Model 1"},
 
87
  ]
88
  }
89
  return dataset, "✅ **Dataset created successfully!** You can now go to the Analysis tab"
90
+
91
  create_btn.click(
92
  fn=create_dataset,
93
  inputs=[prompt, response1, model1, response2, model2],
94
  outputs=[dataset_state, dataset_status]
95
  )
96
+
97
  # Analysis Tab
98
  with gr.Tab("Analysis"):
99
+ # Use create_analysis_screen to get UI components including visualization container
100
+ analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count, bias_methods = create_analysis_screen()
101
+
102
+ # Pre-create visualization components (initially hidden)
103
+ visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
104
+ analysis_title = gr.Markdown("## Analysis Results", visible=False)
105
+ prompt_title = gr.Markdown(visible=False)
106
+ models_compared = gr.Markdown(visible=False)
107
 
108
+ # Container for model 1 words
109
+ model1_title = gr.Markdown(visible=False)
110
+ model1_words = gr.Markdown(visible=False)
111
+
112
+ # Container for model 2 words
113
+ model2_title = gr.Markdown(visible=False)
114
+ model2_words = gr.Markdown(visible=False)
115
+
116
+ # Similarity metrics
117
+ similarity_metrics_title = gr.Markdown("### Similarity Metrics", visible=False)
118
+ similarity_metrics = gr.Markdown(visible=False)
119
+
120
+ # Status or error message area
121
+ status_message_visible = gr.Checkbox(value=False, visible=False, label="Status Message Visible")
122
+ status_message = gr.Markdown(visible=False)
123
+
124
+ # Define a helper function to extract parameter values and run the analysis
125
+ def run_analysis(dataset, selected_analysis, bow_top, ngram_n, ngram_top, topic_count, bias_methods):
126
+ try:
127
+ if not dataset or "entries" not in dataset or not dataset["entries"]:
128
+ return (
129
+ {}, # analysis_results_state
130
+ False, # analysis_output visibility
131
+ False, # visualization_area_visible
132
+ gr.update(visible=False), # analysis_title
133
+ gr.update(visible=False), # prompt_title
134
+ gr.update(visible=False), # models_compared
135
+ gr.update(visible=False), # model1_title
136
+ gr.update(visible=False), # model1_words
137
+ gr.update(visible=False), # model2_title
138
+ gr.update(visible=False), # model2_words
139
+ gr.update(visible=False), # similarity_metrics_title
140
+ gr.update(visible=False), # similarity_metrics
141
+ True, # status_message_visible
142
+ gr.update(visible=True, value="❌ **Error:** No dataset loaded. Please create or load a dataset first.") # status_message
143
+ )
144
+
145
+ parameters = {
146
+ "bow_top": bow_top,
147
+ "ngram_n": ngram_n,
148
+ "ngram_top": ngram_top,
149
+ "topic_count": topic_count,
150
+ "bias_methods": bias_methods # Add this parameter
151
+ }
152
+ print(f"Running analysis with selected type: {selected_analysis}")
153
+ print("Parameters:", parameters)
154
+
155
+ # Process the analysis request - passing selected_analysis as a string
156
+ analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
157
+
158
+ # If there's an error or no results
159
+ if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
160
+ return (
161
+ analysis_results,
162
+ False,
163
+ False,
164
+ gr.update(visible=False),
165
+ gr.update(visible=False),
166
+ gr.update(visible=False),
167
+ gr.update(visible=False),
168
+ gr.update(visible=False),
169
+ gr.update(visible=False),
170
+ gr.update(visible=False),
171
+ gr.update(visible=False),
172
+ gr.update(visible=False),
173
+ True,
174
+ gr.update(visible=True, value="❌ **No results found.** Try a different analysis option.")
175
+ )
176
+
177
+ # Extract information to display in components
178
+ prompt = list(analysis_results["analyses"].keys())[0]
179
+ analyses = analysis_results["analyses"][prompt]
180
+
181
+ # Initialize visualization components visibilities and contents
182
+ visualization_area_visible = False
183
+ prompt_title_visible = False
184
+ prompt_title_value = ""
185
+ models_compared_visible = False
186
+ models_compared_value = ""
187
+
188
+ model1_title_visible = False
189
+ model1_title_value = ""
190
+ model1_words_visible = False
191
+ model1_words_value = ""
192
+
193
+ model2_title_visible = False
194
+ model2_title_value = ""
195
+ model2_words_visible = False
196
+ model2_words_value = ""
197
+
198
+ similarity_title_visible = False
199
+ similarity_metrics_visible = False
200
+ similarity_metrics_value = ""
201
+
202
+ # Check for messages from placeholder analyses
203
+ if "message" in analyses:
204
+ return (
205
+ analysis_results,
206
+ False,
207
+ False,
208
+ gr.update(visible=False),
209
+ gr.update(visible=False),
210
+ gr.update(visible=False),
211
+ gr.update(visible=False),
212
+ gr.update(visible=False),
213
+ gr.update(visible=False),
214
+ gr.update(visible=False),
215
+ gr.update(visible=False),
216
+ gr.update(visible=False),
217
+ True,
218
+ gr.update(visible=True, value=f"ℹ️ **{analyses['message']}**")
219
+ )
220
+
221
+ # Process based on the selected analysis type
222
+ if selected_analysis == "Bag of Words" and "bag_of_words" in analyses:
223
+ visualization_area_visible = True
224
+ bow_results = analyses["bag_of_words"]
225
+ models = bow_results.get("models", [])
226
+
227
+ if len(models) >= 2:
228
+ prompt_title_visible = True
229
+ prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
230
+
231
+ models_compared_visible = True
232
+ models_compared_value = f"### Comparing responses from {models[0]} and {models[1]}"
233
+
234
+ # Extract and format information for display
235
+ model1_name = models[0]
236
+ model2_name = models[1]
237
+
238
+ # Format important words for each model
239
+ important_words = bow_results.get("important_words", {})
240
+
241
+ if model1_name in important_words:
242
+ model1_title_visible = True
243
+ model1_title_value = f"#### Top Words Used by {model1_name}"
244
+
245
+ word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model1_name][:10]]
246
+ model1_words_visible = True
247
+ model1_words_value = ", ".join(word_list)
248
+
249
+ if model2_name in important_words:
250
+ model2_title_visible = True
251
+ model2_title_value = f"#### Top Words Used by {model2_name}"
252
+
253
+ word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model2_name][:10]]
254
+ model2_words_visible = True
255
+ model2_words_value = ", ".join(word_list)
256
+
257
+ # Format similarity metrics
258
+ comparisons = bow_results.get("comparisons", {})
259
+ comparison_key = f"{model1_name} vs {model2_name}"
260
+
261
+ if comparison_key in comparisons:
262
+ metrics = comparisons[comparison_key]
263
+ cosine = metrics.get("cosine_similarity", 0)
264
+ jaccard = metrics.get("jaccard_similarity", 0)
265
+ semantic = metrics.get("semantic_similarity", 0)
266
+ common_words = metrics.get("common_word_count", 0)
267
+
268
+ similarity_title_visible = True
269
+ similarity_metrics_visible = True
270
+ similarity_metrics_value = f"""
271
+ - **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns)
272
+ - **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap)
273
+ - **Semantic Similarity**: {semantic:.2f} (higher means more similar meaning)
274
+ - **Common Words**: {common_words} words appear in both responses
275
+ """
276
+
277
+ # Check for N-gram analysis
278
+ elif selected_analysis == "N-gram Analysis" and "ngram_analysis" in analyses:
279
+ visualization_area_visible = True
280
+ ngram_results = analyses["ngram_analysis"]
281
+ models = ngram_results.get("models", [])
282
+ ngram_size = ngram_results.get("ngram_size", 2)
283
+ size_name = "Unigrams" if ngram_size == 1 else f"{ngram_size}-grams"
284
+
285
+ if len(models) >= 2:
286
+ prompt_title_visible = True
287
+ prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
288
+
289
+ models_compared_visible = True
290
+ models_compared_value = f"### {size_name} Analysis: Comparing responses from {models[0]} and {models[1]}"
291
+
292
+ # Extract and format information for display
293
+ model1_name = models[0]
294
+ model2_name = models[1]
295
+
296
+ # Format important n-grams for each model
297
+ important_ngrams = ngram_results.get("important_ngrams", {})
298
+
299
+ if model1_name in important_ngrams:
300
+ model1_title_visible = True
301
+ model1_title_value = f"#### Top {size_name} Used by {model1_name}"
302
+
303
+ ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in important_ngrams[model1_name][:10]]
304
+ model1_words_visible = True
305
+ model1_words_value = ", ".join(ngram_list)
306
+
307
+ if model2_name in important_ngrams:
308
+ model2_title_visible = True
309
+ model2_title_value = f"#### Top {size_name} Used by {model2_name}"
310
+
311
+ ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in important_ngrams[model2_name][:10]]
312
+ model2_words_visible = True
313
+ model2_words_value = ", ".join(ngram_list)
314
+
315
+ # Format similarity metrics if available
316
+ if "comparisons" in ngram_results:
317
+ comparison_key = f"{model1_name} vs {model2_name}"
318
+
319
+ if comparison_key in ngram_results["comparisons"]:
320
+ metrics = ngram_results["comparisons"][comparison_key]
321
+ common_count = metrics.get("common_ngram_count", 0)
322
+
323
+ similarity_title_visible = True
324
+ similarity_metrics_visible = True
325
+ similarity_metrics_value = f"""
326
+ - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
327
+ """
328
+
329
+ # Check for Topic Modeling analysis
330
+ elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
331
+ visualization_area_visible = True
332
+ topic_results = analyses["topic_modeling"]
333
+ models = topic_results.get("models", [])
334
+ method = topic_results.get("method", "lda").upper()
335
+ n_topics = topic_results.get("n_topics", 3)
336
+
337
+ if len(models) >= 2:
338
+ prompt_title_visible = True
339
+ prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
340
+
341
+ models_compared_visible = True
342
+ models_compared_value = f"### Topic Modeling Analysis ({method}, {n_topics} topics)"
343
+
344
+ # Extract and format topic information
345
+ topics = topic_results.get("topics", [])
346
+
347
+ if topics:
348
+ # Format topic info for display
349
+ topic_info = []
350
+ for topic in topics[:3]: # Show first 3 topics
351
+ topic_id = topic.get("id", 0)
352
+ words = topic.get("words", [])[:5] # Top 5 words per topic
353
+
354
+ if words:
355
+ topic_info.append(f"**Topic {topic_id+1}**: {', '.join(words)}")
356
+
357
+ if topic_info:
358
+ model1_title_visible = True
359
+ model1_title_value = "#### Discovered Topics"
360
+ model1_words_visible = True
361
+ model1_words_value = "\n".join(topic_info)
362
+
363
+ # Get topic distributions for models
364
+ model_topics = topic_results.get("model_topics", {})
365
+
366
+ if model_topics:
367
+ model1_name = models[0]
368
+ model2_name = models[1]
369
+
370
+ # Format topic distribution info
371
+ if model1_name in model_topics and model2_name in model_topics:
372
+ model2_title_visible = True
373
+ model2_title_value = "#### Topic Distribution"
374
+ model2_words_visible = True
375
+
376
+ # Simple distribution display
377
+ dist1 = model_topics[model1_name]
378
+ dist2 = model_topics[model2_name]
379
+
380
+ model2_words_value = f"""
381
+ **{model1_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist1[:3])])}
382
+
383
+ **{model2_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist2[:3])])}
384
+ """
385
+
386
+ # Add similarity metrics if available
387
+ comparisons = topic_results.get("comparisons", {})
388
+ if comparisons:
389
+ comparison_key = f"{model1_name} vs {model2_name}"
390
+
391
+ if comparison_key in comparisons:
392
+ metrics = comparisons[comparison_key]
393
+ js_div = metrics.get("js_divergence", 0)
394
+
395
+ similarity_title_visible = True
396
+ similarity_metrics_visible = True
397
+ similarity_metrics_value = f"""
398
+ - **Topic Distribution Divergence**: {js_div:.4f} (lower means more similar topic distributions)
399
+ """
400
+
401
+ # Check for Classifier analysis
402
+ elif selected_analysis == "Classifier" and "classifier" in analyses:
403
+ visualization_area_visible = True
404
+ classifier_results = analyses["classifier"]
405
+ models = classifier_results.get("models", [])
406
+
407
+ if len(models) >= 2:
408
+ prompt_title_visible = True
409
+ prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
410
+
411
+ models_compared_visible = True
412
+ models_compared_value = f"### Classifier Analysis for {models[0]} and {models[1]}"
413
+
414
+ # Extract and format classifier information
415
+ model1_name = models[0]
416
+ model2_name = models[1]
417
+
418
+ # Display classifications for each model
419
+ classifications = classifier_results.get("classifications", {})
420
+
421
+ if classifications:
422
+ model1_title_visible = True
423
+ model1_title_value = f"#### Classification Results"
424
+ model1_words_visible = True
425
+
426
+ model1_results = classifications.get(model1_name, {})
427
+ model2_results = classifications.get(model2_name, {})
428
+
429
+ model1_words_value = f"""
430
+ **{model1_name}**:
431
+ - Formality: {model1_results.get('formality', 'N/A')}
432
+ - Sentiment: {model1_results.get('sentiment', 'N/A')}
433
+ - Complexity: {model1_results.get('complexity', 'N/A')}
434
+
435
+ **{model2_name}**:
436
+ - Formality: {model2_results.get('formality', 'N/A')}
437
+ - Sentiment: {model2_results.get('sentiment', 'N/A')}
438
+ - Complexity: {model2_results.get('complexity', 'N/A')}
439
+ """
440
+
441
+ # Show comparison
442
+ model2_title_visible = True
443
+ model2_title_value = f"#### Classification Comparison"
444
+ model2_words_visible = True
445
+
446
+ differences = classifier_results.get("differences", {})
447
+ model2_words_value = "\n".join([
448
+ f"- **{category}**: {diff}"
449
+ for category, diff in differences.items()
450
+ ])
451
 
452
+ # Check for Bias Detection analysis
453
+ elif selected_analysis == "Bias Detection" and "bias_detection" in analyses:
454
+ visualization_area_visible = True
455
+ bias_results = analyses["bias_detection"]
456
+ models = bias_results.get("models", [])
457
+
458
+ if len(models) >= 2:
459
+ prompt_title_visible = True
460
+ prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
461
+
462
+ models_compared_visible = True
463
+ models_compared_value = f"### Bias Analysis: Comparing responses from {models[0]} and {models[1]}"
464
+
465
+ # Display comparative bias results
466
+ model1_name = models[0]
467
+ model2_name = models[1]
468
+
469
+ if "comparative" in bias_results:
470
+ comparative = bias_results["comparative"]
471
+
472
+ # Format summary for display
473
+ model1_title_visible = True
474
+ model1_title_value = "#### Bias Detection Summary"
475
+ model1_words_visible = True
476
+
477
+ summary_parts = []
478
+
479
+ # Add sentiment comparison
480
+ if "sentiment" in comparative:
481
+ sent = comparative["sentiment"]
482
+ is_significant = sent.get("significant", False)
483
+ summary_parts.append(
484
+ f"**Sentiment Bias**: {model1_name} shows {sent.get(model1_name, 'N/A')} sentiment, " +
485
+ f"while {model2_name} shows {sent.get(model2_name, 'N/A')} sentiment. " +
486
+ f"({'Significant' if is_significant else 'Minor'} difference)"
487
+ )
488
+
489
+ # Add partisan comparison
490
+ if "partisan" in comparative:
491
+ part = comparative["partisan"]
492
+ is_significant = part.get("significant", False)
493
+ summary_parts.append(
494
+ f"**Partisan Leaning**: {model1_name} appears {part.get(model1_name, 'N/A')}, " +
495
+ f"while {model2_name} appears {part.get(model2_name, 'N/A')}. " +
496
+ f"({'Significant' if is_significant else 'Minor'} difference)"
497
+ )
498
+
499
+ # Add framing comparison
500
+ if "framing" in comparative:
501
+ frame = comparative["framing"]
502
+ different_frames = frame.get("different_frames", False)
503
+ m1_frame = frame.get(model1_name, "N/A").replace('_', ' ').title()
504
+ m2_frame = frame.get(model2_name, "N/A").replace('_', ' ').title()
505
+ summary_parts.append(
506
+ f"**Issue Framing**: {model1_name} primarily frames issues in {m1_frame} terms, " +
507
+ f"while {model2_name} uses {m2_frame} framing. " +
508
+ f"({'Different' if different_frames else 'Similar'} approaches)"
509
+ )
510
+
511
+ # Add overall assessment
512
+ if "overall" in comparative:
513
+ overall = comparative["overall"]
514
+ significant = overall.get("significant_bias_difference", False)
515
+ summary_parts.append(
516
+ f"**Overall Assessment**: " +
517
+ f"Analysis shows a {overall.get('difference', 0):.2f}/1.0 difference in bias patterns. " +
518
+ f"({'Significant' if significant else 'Minor'} overall bias difference)"
519
+ )
520
+
521
+ # Combine all parts
522
+ model1_words_value = "\n\n".join(summary_parts)
523
+
524
+ # Format detailed term analysis
525
+ if (model1_name in bias_results and "partisan" in bias_results[model1_name] and
526
+ model2_name in bias_results and "partisan" in bias_results[model2_name]):
527
+
528
+ model2_title_visible = True
529
+ model2_title_value = "#### Partisan Term Analysis"
530
+ model2_words_visible = True
531
+
532
+ m1_lib = bias_results[model1_name]["partisan"].get("liberal_terms", [])
533
+ m1_con = bias_results[model1_name]["partisan"].get("conservative_terms", [])
534
+ m2_lib = bias_results[model2_name]["partisan"].get("liberal_terms", [])
535
+ m2_con = bias_results[model2_name]["partisan"].get("conservative_terms", [])
536
+
537
+ model2_words_value = f"""
538
+ **{model1_name}**:
539
+ - Liberal terms: {', '.join(m1_lib) if m1_lib else 'None detected'}
540
+ - Conservative terms: {', '.join(m1_con) if m1_con else 'None detected'}
541
+
542
+ **{model2_name}**:
543
+ - Liberal terms: {', '.join(m2_lib) if m2_lib else 'None detected'}
544
+ - Conservative terms: {', '.join(m2_con) if m2_con else 'None detected'}
545
+ """
546
+
547
+ # If we don't have visualization data from any analysis
548
+ if not visualization_area_visible:
549
+ return (
550
+ analysis_results,
551
+ False,
552
+ False,
553
+ gr.update(visible=False),
554
+ gr.update(visible=False),
555
+ gr.update(visible=False),
556
+ gr.update(visible=False),
557
+ gr.update(visible=False),
558
+ gr.update(visible=False),
559
+ gr.update(visible=False),
560
+ gr.update(visible=False),
561
+ gr.update(visible=False),
562
+ True,
563
+ gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
564
+ )
565
 
566
+ # Return all updated component values
567
+ return (
568
+ analysis_results, # analysis_results_state
569
+ False, # analysis_output visibility
570
+ True, # visualization_area_visible
571
+ gr.update(visible=True), # analysis_title
572
+ gr.update(visible=prompt_title_visible, value=prompt_title_value), # prompt_title
573
+ gr.update(visible=models_compared_visible, value=models_compared_value), # models_compared
574
+ gr.update(visible=model1_title_visible, value=model1_title_value), # model1_title
575
+ gr.update(visible=model1_words_visible, value=model1_words_value), # model1_words
576
+ gr.update(visible=model2_title_visible, value=model2_title_value), # model2_title
577
+ gr.update(visible=model2_words_visible, value=model2_words_value), # model2_words
578
+ gr.update(visible=similarity_title_visible), # similarity_metrics_title
579
+ gr.update(visible=similarity_metrics_visible, value=similarity_metrics_value), # similarity_metrics
580
+ False, # status_message_visible
581
+ gr.update(visible=False) # status_message
582
+ )
583
+
584
+ except Exception as e:
585
+ import traceback
586
+ error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
587
+ print(error_msg)
588
+
589
+ return (
590
+ {"error": error_msg}, # analysis_results_state
591
+ True, # analysis_output visibility (show raw JSON for debugging)
592
+ False, # visualization_area_visible
593
+ gr.update(visible=False),
594
+ gr.update(visible=False),
595
+ gr.update(visible=False),
596
+ gr.update(visible=False),
597
+ gr.update(visible=False),
598
+ gr.update(visible=False),
599
+ gr.update(visible=False),
600
+ gr.update(visible=False),
601
+ gr.update(visible=False),
602
+ True, # status_message_visible
603
+ gr.update(visible=True, value=f"❌ **Error during analysis:**\n\n```\n{str(e)}\n```") # status_message
604
+ )
605
+
606
+ # Add a new LLM Analysis tab
607
+ with gr.Tab("LLM Analysis"):
608
+ gr.Markdown("## LLM-Based Response Analysis")
609
+
610
+ with gr.Row():
611
+ with gr.Column():
612
+ llm_analysis_type = gr.Radio(
613
+ choices=["Response Quality", "Response Comparison", "Factual Accuracy"],
614
+ label="Analysis Type",
615
+ value="Response Comparison"
616
+ )
617
+
618
+ llm_model = gr.Dropdown(
619
+ choices=["OpenAI GPT-4", "Anthropic Claude", "Local LLM"],
620
+ label="Analysis Model",
621
+ value="OpenAI GPT-4"
622
+ )
623
+
624
+ run_llm_analysis_btn = gr.Button("Run LLM Analysis", variant="primary")
625
+
626
+ with gr.Column():
627
+ llm_analysis_prompt = gr.Textbox(
628
+ label="Custom Analysis Instructions (Optional)",
629
+ placeholder="Enter any specific instructions for the analysis...",
630
+ lines=3
631
+ )
632
+
633
+ llm_analysis_status = gr.Markdown("*No analysis has been run*")
634
+
635
+ llm_analysis_result = gr.Markdown(visible=False)
636
+
637
+ # Placeholder function for LLM analysis
638
+ def run_llm_analysis(dataset, analysis_type, model, custom_prompt):
639
+ if not dataset or "entries" not in dataset or not dataset["entries"]:
640
+ return (
641
+ gr.update(visible=True, value="❌ **Error:** No dataset loaded. Please create or load a dataset first."),
642
+ gr.update(visible=False)
643
+ )
644
+
645
+ # Placeholder for actual implementation
646
+ return (
647
+ gr.update(visible=True, value="⏳ **Implementation in progress**\n\nLLM-based analysis will be available in a future update."),
648
+ gr.update(visible=False)
649
+ )
650
+
651
  # Connect the run button to the analysis function
652
+ run_llm_analysis_btn.click(
653
+ fn=run_llm_analysis,
654
+ inputs=[dataset_state, llm_analysis_type, llm_model, llm_analysis_prompt],
655
+ outputs=[llm_analysis_status, llm_analysis_result]
656
  )
657
 
658
+ # Run analysis with proper parameters
659
+ run_analysis_btn.click(
660
+ fn=run_analysis,
661
+ inputs=[dataset_state, analysis_options, bow_top_slider, ngram_n, ngram_top, topic_count, bias_methods],
662
+ outputs=[
663
+ analysis_results_state,
664
+ analysis_output,
665
+ visualization_area_visible,
666
+ analysis_title,
667
+ prompt_title,
668
+ models_compared,
669
+ model1_title,
670
+ model1_words,
671
+ model2_title,
672
+ model2_words,
673
+ similarity_metrics_title,
674
+ similarity_metrics,
675
+ status_message_visible,
676
+ status_message
677
+ ]
678
+ )
679
+
680
  return app
681
 
 
682
  if __name__ == "__main__":
683
  # Download required NLTK resources before launching the app
684
  download_nltk_resources()
685
+
 
 
 
 
686
  app = create_app()
687
  app.launch()
data_handler.py DELETED
@@ -1,67 +0,0 @@
1
- import os
2
- import nltk
3
- import logging
4
-
5
- # Set up logging
6
- logger = logging.getLogger('gradio_app.data_handler')
7
-
8
-
9
- def download_nltk_resources():
10
- """
11
- Download required NLTK resources if not already downloaded.
12
-
13
- Ensures that all necessary NLTK resources are available for the application.
14
- """
15
- try:
16
- # Define the path for NLTK data
17
- nltk_data_path = os.path.expanduser("~/nltk_data")
18
- os.makedirs(nltk_data_path, exist_ok=True)
19
-
20
- # Add this path to NLTK's data path
21
- if nltk_data_path not in nltk.data.path:
22
- nltk.data.path.append(nltk_data_path)
23
-
24
- # List of required NLTK resources
25
- resources = ['punkt', 'wordnet', 'stopwords', 'vader_lexicon']
26
-
27
- for resource in resources:
28
- try:
29
- # Check if the resource is already available
30
- if not is_nltk_resource_available(resource):
31
- logger.info(f"Downloading {resource}...")
32
- nltk.download(resource, download_dir=nltk_data_path, quiet=True)
33
- else:
34
- logger.info(f"Resource {resource} already downloaded")
35
- except Exception as e:
36
- logger.error(f"Error downloading resource {resource}: {e}")
37
-
38
- logger.info("NLTK resources check completed successfully")
39
- except Exception as e:
40
- logger.error(f"Error during NLTK resource setup: {e}")
41
-
42
-
43
- def is_nltk_resource_available(resource):
44
- """
45
- Check if an NLTK resource is already available.
46
-
47
- Args:
48
- resource (str): The name of the NLTK resource to check.
49
-
50
- Returns:
51
- bool: True if the resource is available, False otherwise.
52
- """
53
- locations = [
54
- f'tokenizers/{resource}',
55
- f'corpora/{resource}',
56
- f'taggers/{resource}',
57
- f'{resource}'
58
- ]
59
-
60
- for location in locations:
61
- try:
62
- nltk.data.find(location)
63
- return True
64
- except LookupError:
65
- continue
66
-
67
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
improved_analysis_handler.py DELETED
@@ -1,217 +0,0 @@
1
- import gradio as gr
2
- import json
3
- import logging
4
- from visualization.bow_visualizer import process_and_visualize_analysis
5
- from processors.topic_modeling import compare_topics
6
- from processors.ngram_analysis import compare_ngrams
7
- from processors.bow_analysis import compare_bow
8
- from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
9
- # Add import for bias detection
10
- from processors.bias_detection import compare_bias
11
-
12
- # Set up logging
13
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
14
- logger = logging.getLogger('analysis_handler')
15
-
16
- def process_analysis_request(dataset, selected_analysis, parameters):
17
- """
18
- Process the analysis request based on the selected options.
19
-
20
- Args:
21
- dataset (dict): The input dataset
22
- selected_analysis (str): The selected analysis type
23
- parameters (dict): Additional parameters for the analysis
24
-
25
- Returns:
26
- tuple: A tuple containing (analysis_results, visualization_data)
27
- """
28
- logger.info(f"Processing analysis request: {selected_analysis}")
29
-
30
- if not dataset or "entries" not in dataset or not dataset["entries"]:
31
- logger.warning("No valid dataset provided for analysis")
32
- return {}, None
33
-
34
- # Initialize the results structure
35
- results = {"analyses": {}}
36
-
37
- # Get the prompt text from the first entry
38
- prompt_text = dataset["entries"][0].get("prompt", "")
39
- if not prompt_text:
40
- logger.warning("No prompt found in dataset")
41
- return {"error": "No prompt found in dataset"}, None
42
-
43
- # Initialize the analysis container for this prompt
44
- results["analyses"][prompt_text] = {}
45
-
46
- # Get model names and responses
47
- model1_name = dataset["entries"][0].get("model", "Model 1")
48
- model2_name = dataset["entries"][1].get("model", "Model 2")
49
-
50
- model1_response = dataset["entries"][0].get("response", "")
51
- model2_response = dataset["entries"][1].get("response", "")
52
-
53
- logger.info(f"Comparing responses from {model1_name} and {model2_name}")
54
-
55
- try:
56
- # Process based on the selected analysis type
57
- if selected_analysis == "Bag of Words":
58
- # Get the top_n parameter and ensure it's an integer
59
- top_n = parameters.get("bow_top", 25)
60
- if isinstance(top_n, str):
61
- top_n = int(top_n)
62
-
63
- logger.info(f"Running Bag of Words analysis with top_n={top_n}")
64
-
65
- # Perform Bag of Words analysis using the processor
66
- bow_results = compare_bow(
67
- [model1_response, model2_response],
68
- [model1_name, model2_name],
69
- top_n=top_n
70
- )
71
- results["analyses"][prompt_text]["bag_of_words"] = bow_results
72
-
73
- elif selected_analysis == "N-gram Analysis":
74
- # Perform N-gram analysis
75
- ngram_size = parameters.get("ngram_n", 2)
76
- if isinstance(ngram_size, str):
77
- ngram_size = int(ngram_size)
78
-
79
- top_n = parameters.get("ngram_top", 15)
80
- if isinstance(top_n, str):
81
- top_n = int(top_n)
82
-
83
- logger.info(f"Running N-gram analysis with n={ngram_size}, top_n={top_n}")
84
-
85
- # Use the processor from the dedicated ngram_analysis module
86
- from processors.ngram_analysis import compare_ngrams as ngram_processor
87
- ngram_results = ngram_processor(
88
- [model1_response, model2_response],
89
- [model1_name, model2_name],
90
- n=ngram_size,
91
- top_n=top_n
92
- )
93
- results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
94
-
95
- elif selected_analysis == "Topic Modeling":
96
- # Perform topic modeling analysis
97
- topic_count = parameters.get("topic_count", 3)
98
- if isinstance(topic_count, str):
99
- topic_count = int(topic_count)
100
-
101
- logger.info(f"Running Topic Modeling analysis with n_topics={topic_count}")
102
-
103
- try:
104
- # Import the improved topic modeling module
105
- try:
106
- # First try to import from improved module if available
107
- from improved_topic_modeling import compare_topics as improved_compare_topics
108
- logger.info("Using improved topic modeling implementation")
109
- topic_results = improved_compare_topics(
110
- texts_set_1=[model1_response],
111
- texts_set_2=[model2_response],
112
- n_topics=topic_count,
113
- model_names=[model1_name, model2_name])
114
- except ImportError:
115
- # Fall back to original implementation
116
- logger.info("Using original topic modeling implementation")
117
- from processors.topic_modeling import compare_topics
118
- topic_results = compare_topics(
119
- texts_set_1=[model1_response],
120
- texts_set_2=[model2_response],
121
- n_topics=topic_count,
122
- model_names=[model1_name, model2_name])
123
-
124
- results["analyses"][prompt_text]["topic_modeling"] = topic_results
125
-
126
- # Ensure the topic modeling results contain the necessary fields
127
- if "topics" not in topic_results or not topic_results["topics"]:
128
- logger.warning("No topics found in topic modeling results")
129
- topic_results["message"] = "No significant topics were discovered in the text. Try a different analysis method or adjust parameters."
130
-
131
- if "model_topics" not in topic_results or not topic_results["model_topics"]:
132
- logger.warning("No model topics found in topic modeling results")
133
- if "message" not in topic_results:
134
- topic_results["message"] = "Could not calculate topic distributions for the models."
135
-
136
- except Exception as e:
137
- import traceback
138
- error_msg = f"Topic modeling error: {str(e)}\n{traceback.format_exc()}"
139
- logger.error(error_msg)
140
- results["analyses"][prompt_text]["topic_modeling"] = {
141
- "models": [model1_name, model2_name],
142
- "error": str(e),
143
- "message": "Topic modeling failed. Please try with longer text or different parameters.",
144
- "stack_trace": traceback.format_exc()
145
- }
146
-
147
- elif selected_analysis == "Classifier":
148
- # Perform classifier analysis
149
- logger.info("Running Classifier analysis")
150
-
151
- results["analyses"][prompt_text]["classifier"] = {
152
- "models": [model1_name, model2_name],
153
- "classifications": {
154
- model1_name: {
155
- "formality": classify_formality(model1_response),
156
- "sentiment": classify_sentiment(model1_response),
157
- "complexity": classify_complexity(model1_response)
158
- },
159
- model2_name: {
160
- "formality": classify_formality(model2_response),
161
- "sentiment": classify_sentiment(model2_response),
162
- "complexity": classify_complexity(model2_response)
163
- }
164
- },
165
- "differences": compare_classifications(model1_response, model2_response)
166
- }
167
-
168
- elif selected_analysis == "Bias Detection":
169
- # Perform bias detection analysis
170
- logger.info("Running Bias Detection analysis")
171
-
172
- try:
173
- # Perform bias detection analysis
174
- logger.info(f"Starting bias detection for {model1_name} and {model2_name}")
175
- logger.info(f"Text lengths - Text1: {len(model1_response)}, Text2: {len(model2_response)}")
176
-
177
- bias_results = compare_bias(
178
- model1_response,
179
- model2_response,
180
- model_names=[model1_name, model2_name]
181
- )
182
-
183
- logger.info(f"Bias detection complete. Result has keys: {bias_results.keys() if bias_results else 'None'}")
184
- results["analyses"][prompt_text]["bias_detection"] = bias_results
185
-
186
- except Exception as e:
187
- import traceback
188
- error_msg = f"Bias detection error: {str(e)}\n{traceback.format_exc()}"
189
- logger.error(error_msg)
190
- results["analyses"][prompt_text]["bias_detection"] = {
191
- "models": [model1_name, model2_name],
192
- "error": str(e),
193
- "message": "Bias detection failed. Try with different parameters.",
194
- "stack_trace": traceback.format_exc()
195
- }
196
-
197
- else:
198
- # Unknown analysis type
199
- logger.warning(f"Unknown analysis type: {selected_analysis}")
200
- results["analyses"][prompt_text]["message"] = "Please select a valid analysis type."
201
-
202
- except Exception as e:
203
- import traceback
204
- error_msg = f"Error processing analysis request: {str(e)}\n{traceback.format_exc()}"
205
- logger.error(error_msg)
206
- results = {
207
- "error": str(e),
208
- "stack_trace": traceback.format_exc(),
209
- "analyses": {
210
- prompt_text: {
211
- "message": f"Analysis failed: {str(e)}"
212
- }
213
- }
214
- }
215
-
216
- # Return both the analysis results and a placeholder for visualization data
217
- return results, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processors/bias_detection.py CHANGED
@@ -8,22 +8,13 @@ import re
8
  import json
9
  import os
10
  import numpy as np
11
- import logging
12
-
13
- # Set up logging
14
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
15
- logger = logging.getLogger('bias_detection')
16
 
17
  # Ensure NLTK resources are available
18
  def download_nltk_resources():
19
  """Download required NLTK resources if not already downloaded"""
20
  try:
21
- logger.info("Downloading NLTK resources for bias detection...")
22
  nltk.download('vader_lexicon', quiet=True)
23
- nltk.download('punkt', quiet=True)
24
- nltk.download('stopwords', quiet=True)
25
- except Exception as e:
26
- logger.error(f"Error downloading NLTK resources: {e}")
27
  pass
28
 
29
  download_nltk_resources()
@@ -33,15 +24,11 @@ download_nltk_resources()
33
  PARTISAN_WORDS = {
34
  "liberal": [
35
  "progressive", "equity", "climate", "reform", "collective",
36
- "diversity", "inclusive", "sustainable", "justice", "regulation",
37
- "equity", "social", "community", "rights", "environment",
38
- "equality", "welfare", "public", "protection", "universal"
39
  ],
40
  "conservative": [
41
  "traditional", "freedom", "liberty", "individual", "faith",
42
- "values", "efficient", "deregulation", "patriot", "security",
43
- "family", "business", "market", "nation", "protect",
44
- "heritage", "responsibility", "constitution", "fiscal", "private"
45
  ]
46
  }
47
 
@@ -75,37 +62,25 @@ def detect_sentiment_bias(text):
75
  Returns:
76
  dict: Sentiment analysis results
77
  """
78
- try:
79
- logger.info(f"Starting sentiment analysis on text (length: {len(text)})")
80
- sia = SentimentIntensityAnalyzer()
81
- sentiment = sia.polarity_scores(text)
82
-
83
- # Determine if sentiment indicates bias
84
- if sentiment['compound'] >= 0.25:
85
- bias_direction = "positive"
86
- bias_strength = min(1.0, sentiment['compound'] * 2) # Scale to 0-1
87
- elif sentiment['compound'] <= -0.25:
88
- bias_direction = "negative"
89
- bias_strength = min(1.0, abs(sentiment['compound'] * 2)) # Scale to 0-1
90
- else:
91
- bias_direction = "neutral"
92
- bias_strength = 0.0
93
-
94
- logger.info(f"Sentiment analysis complete. Direction: {bias_direction}, Strength: {bias_strength:.2f}")
95
- return {
96
- "sentiment_scores": sentiment,
97
- "bias_direction": bias_direction,
98
- "bias_strength": bias_strength
99
- }
100
- except Exception as e:
101
- logger.error(f"Error in sentiment analysis: {str(e)}")
102
- # Return a default neutral sentiment to prevent failures
103
- return {
104
- "sentiment_scores": {"pos": 0, "neg": 0, "neu": 1, "compound": 0},
105
- "bias_direction": "neutral",
106
- "bias_strength": 0.0,
107
- "error": str(e)
108
- }
109
 
110
  def detect_partisan_leaning(text):
111
  """
@@ -117,73 +92,55 @@ def detect_partisan_leaning(text):
117
  Returns:
118
  dict: Partisan leaning analysis results
119
  """
120
- try:
121
- logger.info(f"Starting partisan leaning analysis on text (length: {len(text)})")
122
- text_lower = text.lower()
123
-
124
- # Count partisan words
125
- liberal_count = 0
126
- conservative_count = 0
127
-
128
- liberal_matches = []
129
- conservative_matches = []
130
-
131
- # Search for partisan words in text
132
- for word in PARTISAN_WORDS["liberal"]:
133
- matches = re.findall(r'\b' + word + r'\b', text_lower)
134
- if matches:
135
- liberal_count += len(matches)
136
- liberal_matches.extend(matches)
137
-
138
- for word in PARTISAN_WORDS["conservative"]:
139
- matches = re.findall(r'\b' + word + r'\b', text_lower)
140
- if matches:
141
- conservative_count += len(matches)
142
- conservative_matches.extend(matches)
143
-
144
- logger.info(f"Found {liberal_count} liberal terms and {conservative_count} conservative terms")
145
-
146
- # Calculate partisan lean score (-1 to 1, negative = liberal, positive = conservative)
147
- total_count = liberal_count + conservative_count
148
- if total_count > 0:
149
- lean_score = (conservative_count - liberal_count) / total_count
150
- else:
151
- lean_score = 0
152
-
153
- # Determine leaning based on score
154
- if lean_score <= -0.2:
155
- leaning = "liberal"
156
- strength = min(1.0, abs(lean_score * 2))
157
- elif lean_score >= 0.2:
158
- leaning = "conservative"
159
- strength = min(1.0, lean_score * 2)
160
- else:
161
- leaning = "balanced"
162
- strength = 0.0
163
-
164
- logger.info(f"Partisan analysis complete. Leaning: {leaning}, Score: {lean_score:.2f}")
165
- return {
166
- "liberal_count": liberal_count,
167
- "conservative_count": conservative_count,
168
- "liberal_terms": liberal_matches,
169
- "conservative_terms": conservative_matches,
170
- "lean_score": lean_score,
171
- "leaning": leaning,
172
- "strength": strength
173
- }
174
- except Exception as e:
175
- logger.error(f"Error in partisan leaning analysis: {str(e)}")
176
- # Return default balanced values to prevent failures
177
- return {
178
- "liberal_count": 0,
179
- "conservative_count": 0,
180
- "liberal_terms": [],
181
- "conservative_terms": [],
182
- "lean_score": 0,
183
- "leaning": "balanced",
184
- "strength": 0.0,
185
- "error": str(e)
186
- }
187
 
188
  def detect_framing_bias(text):
189
  """
@@ -195,61 +152,45 @@ def detect_framing_bias(text):
195
  Returns:
196
  dict: Framing analysis results
197
  """
198
- try:
199
- logger.info(f"Starting framing analysis on text (length: {len(text)})")
200
- text_lower = text.lower()
201
- framing_counts = {}
202
- framing_examples = {}
203
-
204
- # Count framing patterns
205
- for frame, patterns in FRAMING_PATTERNS.items():
206
- framing_counts[frame] = 0
207
- framing_examples[frame] = []
208
-
209
- for pattern in patterns:
210
- matches = re.findall(pattern, text_lower)
211
- if matches:
212
- framing_counts[frame] += len(matches)
213
- # Store up to 5 examples of each frame
214
- unique_matches = set(matches)
215
- framing_examples[frame].extend(list(unique_matches)[:5])
216
-
217
- logger.info(f"Frame counts: {framing_counts}")
218
-
219
- # Calculate dominant frame
220
- total_framing = sum(framing_counts.values())
221
- framing_distribution = {}
222
 
223
- if total_framing > 0:
224
- for frame, count in framing_counts.items():
225
- framing_distribution[frame] = count / total_framing
226
-
227
- dominant_frame = max(framing_counts.items(), key=lambda x: x[1])[0]
228
- frame_bias_strength = max(0.0, framing_distribution[dominant_frame] - 0.25)
229
- else:
230
- dominant_frame = "none"
231
- frame_bias_strength = 0.0
232
- framing_distribution = {frame: 0.0 for frame in FRAMING_PATTERNS.keys()}
 
 
 
 
 
233
 
234
- logger.info(f"Framing analysis complete. Dominant frame: {dominant_frame}")
235
- return {
236
- "framing_counts": framing_counts,
237
- "framing_examples": framing_examples,
238
- "framing_distribution": framing_distribution,
239
- "dominant_frame": dominant_frame,
240
- "frame_bias_strength": frame_bias_strength
241
- }
242
- except Exception as e:
243
- logger.error(f"Error in framing analysis: {str(e)}")
244
- # Return default values to prevent failures
245
- return {
246
- "framing_counts": {frame: 0 for frame in FRAMING_PATTERNS.keys()},
247
- "framing_examples": {frame: [] for frame in FRAMING_PATTERNS.keys()},
248
- "framing_distribution": {frame: 0.0 for frame in FRAMING_PATTERNS.keys()},
249
- "dominant_frame": "none",
250
- "frame_bias_strength": 0.0,
251
- "error": str(e)
252
- }
253
 
254
  def compare_bias(text1, text2, model_names=None):
255
  """
@@ -263,112 +204,71 @@ def compare_bias(text1, text2, model_names=None):
263
  Returns:
264
  dict: Comparative bias analysis
265
  """
266
- logger.info(f"Starting bias comparison analysis")
267
- logger.info(f"Text lengths - Text1: {len(text1)}, Text2: {len(text2)}")
268
-
269
  # Set default model names if not provided
270
  if model_names is None or len(model_names) < 2:
271
- logger.info(f"Using default model names")
272
  model_names = ["Model 1", "Model 2"]
273
- else:
274
- logger.info(f"Using provided model names: {model_names}")
275
 
276
  model1_name, model2_name = model_names[0], model_names[1]
277
 
278
- try:
279
- # Analyze each text
280
- sentiment_results1 = detect_sentiment_bias(text1)
281
- sentiment_results2 = detect_sentiment_bias(text2)
282
-
283
- partisan_results1 = detect_partisan_leaning(text1)
284
- partisan_results2 = detect_partisan_leaning(text2)
285
-
286
- framing_results1 = detect_framing_bias(text1)
287
- framing_results2 = detect_framing_bias(text2)
288
-
289
- # Determine if there's a significant difference in bias
290
- sentiment_difference = abs(sentiment_results1["bias_strength"] - sentiment_results2["bias_strength"])
291
-
292
- # For partisan leaning, compare the scores (negative is liberal, positive is conservative)
293
- partisan_difference = abs(partisan_results1["lean_score"] - partisan_results2["lean_score"])
294
-
295
- # Calculate overall bias difference
296
- overall_difference = (sentiment_difference + partisan_difference) / 2
297
-
298
- # Compare dominant frames
299
- frame_difference = framing_results1["dominant_frame"] != framing_results2["dominant_frame"] and \
300
- (framing_results1["frame_bias_strength"] > 0.1 or framing_results2["frame_bias_strength"] > 0.1)
301
-
302
- logger.info(f"Differences calculated - Sentiment: {sentiment_difference:.2f}, Partisan: {partisan_difference:.2f}")
303
-
304
- # Create comparative analysis
305
- comparative = {
306
- "sentiment": {
307
- model1_name: sentiment_results1["bias_direction"],
308
- model2_name: sentiment_results2["bias_direction"],
309
- "difference": sentiment_difference,
310
- "significant": sentiment_difference > 0.3
311
- },
312
- "partisan": {
313
- model1_name: partisan_results1["leaning"],
314
- model2_name: partisan_results2["leaning"],
315
- "difference": partisan_difference,
316
- "significant": partisan_difference > 0.4
317
- },
318
- "framing": {
319
- model1_name: framing_results1["dominant_frame"],
320
- model2_name: framing_results2["dominant_frame"],
321
- "different_frames": frame_difference
322
- },
323
- "overall": {
324
- "difference": overall_difference,
325
- "significant_bias_difference": overall_difference > 0.35
326
- }
327
- }
328
-
329
- # Assemble the complete result
330
- result = {
331
- "models": model_names,
332
- model1_name: {
333
- "sentiment": sentiment_results1,
334
- "partisan": partisan_results1,
335
- "framing": framing_results1
336
- },
337
- model2_name: {
338
- "sentiment": sentiment_results2,
339
- "partisan": partisan_results2,
340
- "framing": framing_results2
341
- },
342
- "comparative": comparative
343
  }
344
-
345
- logger.info(f"Bias comparison complete. Result has {len(result)} top-level keys.")
346
- logger.info(f"Result keys: {result.keys()}")
347
- return result
348
-
349
- except Exception as e:
350
- import traceback
351
- error_msg = f"Error in bias comparison: {str(e)}\n{traceback.format_exc()}"
352
- logger.error(error_msg)
353
-
354
- # Return an error result that won't break visualization
355
- return {
356
- "models": model_names,
357
- "error": str(e),
358
- "comparative": {
359
- "sentiment": {model1_name: "neutral", model2_name: "neutral", "difference": 0, "significant": False},
360
- "partisan": {model1_name: "balanced", model2_name: "balanced", "difference": 0, "significant": False},
361
- "framing": {model1_name: "none", model2_name: "none", "different_frames": False},
362
- "overall": {"difference": 0, "significant_bias_difference": False}
363
- },
364
- model1_name: {
365
- "sentiment": {"bias_direction": "neutral", "bias_strength": 0},
366
- "partisan": {"leaning": "balanced", "strength": 0},
367
- "framing": {"dominant_frame": "none"}
368
- },
369
- model2_name: {
370
- "sentiment": {"bias_direction": "neutral", "bias_strength": 0},
371
- "partisan": {"leaning": "balanced", "strength": 0},
372
- "framing": {"dominant_frame": "none"}
373
- }
374
- }
 
8
  import json
9
  import os
10
  import numpy as np
 
 
 
 
 
11
 
12
  # Ensure NLTK resources are available
13
  def download_nltk_resources():
14
  """Download required NLTK resources if not already downloaded"""
15
  try:
 
16
  nltk.download('vader_lexicon', quiet=True)
17
+ except:
 
 
 
18
  pass
19
 
20
  download_nltk_resources()
 
24
  PARTISAN_WORDS = {
25
  "liberal": [
26
  "progressive", "equity", "climate", "reform", "collective",
27
+ "diversity", "inclusive", "sustainable", "justice", "regulation"
 
 
28
  ],
29
  "conservative": [
30
  "traditional", "freedom", "liberty", "individual", "faith",
31
+ "values", "efficient", "deregulation", "patriot", "security"
 
 
32
  ]
33
  }
34
 
 
62
  Returns:
63
  dict: Sentiment analysis results
64
  """
65
+ sia = SentimentIntensityAnalyzer()
66
+ sentiment = sia.polarity_scores(text)
67
+
68
+ # Determine if sentiment indicates bias
69
+ if sentiment['compound'] >= 0.25:
70
+ bias_direction = "positive"
71
+ bias_strength = min(1.0, sentiment['compound'] * 2) # Scale to 0-1
72
+ elif sentiment['compound'] <= -0.25:
73
+ bias_direction = "negative"
74
+ bias_strength = min(1.0, abs(sentiment['compound'] * 2)) # Scale to 0-1
75
+ else:
76
+ bias_direction = "neutral"
77
+ bias_strength = 0.0
78
+
79
+ return {
80
+ "sentiment_scores": sentiment,
81
+ "bias_direction": bias_direction,
82
+ "bias_strength": bias_strength
83
+ }
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  def detect_partisan_leaning(text):
86
  """
 
92
  Returns:
93
  dict: Partisan leaning analysis results
94
  """
95
+ text_lower = text.lower()
96
+
97
+ # Count partisan words
98
+ liberal_count = 0
99
+ conservative_count = 0
100
+
101
+ liberal_matches = []
102
+ conservative_matches = []
103
+
104
+ # Search for partisan words in text
105
+ for word in PARTISAN_WORDS["liberal"]:
106
+ matches = re.findall(r'\b' + word + r'\b', text_lower)
107
+ if matches:
108
+ liberal_count += len(matches)
109
+ liberal_matches.extend(matches)
110
+
111
+ for word in PARTISAN_WORDS["conservative"]:
112
+ matches = re.findall(r'\b' + word + r'\b', text_lower)
113
+ if matches:
114
+ conservative_count += len(matches)
115
+ conservative_matches.extend(matches)
116
+
117
+ # Calculate partisan lean score (-1 to 1, negative = liberal, positive = conservative)
118
+ total_count = liberal_count + conservative_count
119
+ if total_count > 0:
120
+ lean_score = (conservative_count - liberal_count) / total_count
121
+ else:
122
+ lean_score = 0
123
+
124
+ # Determine leaning based on score
125
+ if lean_score <= -0.2:
126
+ leaning = "liberal"
127
+ strength = min(1.0, abs(lean_score * 2))
128
+ elif lean_score >= 0.2:
129
+ leaning = "conservative"
130
+ strength = min(1.0, lean_score * 2)
131
+ else:
132
+ leaning = "balanced"
133
+ strength = 0.0
134
+
135
+ return {
136
+ "liberal_count": liberal_count,
137
+ "conservative_count": conservative_count,
138
+ "liberal_terms": liberal_matches,
139
+ "conservative_terms": conservative_matches,
140
+ "lean_score": lean_score,
141
+ "leaning": leaning,
142
+ "strength": strength
143
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  def detect_framing_bias(text):
146
  """
 
152
  Returns:
153
  dict: Framing analysis results
154
  """
155
+ text_lower = text.lower()
156
+ framing_counts = {}
157
+ framing_examples = {}
158
+
159
+ # Count framing patterns
160
+ for frame, patterns in FRAMING_PATTERNS.items():
161
+ framing_counts[frame] = 0
162
+ framing_examples[frame] = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
+ for pattern in patterns:
165
+ matches = re.findall(pattern, text_lower)
166
+ if matches:
167
+ framing_counts[frame] += len(matches)
168
+ # Store up to 5 examples of each frame
169
+ unique_matches = set(matches)
170
+ framing_examples[frame].extend(list(unique_matches)[:5])
171
+
172
+ # Calculate dominant frame
173
+ total_framing = sum(framing_counts.values())
174
+ framing_distribution = {}
175
+
176
+ if total_framing > 0:
177
+ for frame, count in framing_counts.items():
178
+ framing_distribution[frame] = count / total_framing
179
 
180
+ dominant_frame = max(framing_counts.items(), key=lambda x: x[1])[0]
181
+ frame_bias_strength = max(0.0, framing_distribution[dominant_frame] - 0.25)
182
+ else:
183
+ dominant_frame = "none"
184
+ frame_bias_strength = 0.0
185
+ framing_distribution = {frame: 0.0 for frame in FRAMING_PATTERNS.keys()}
186
+
187
+ return {
188
+ "framing_counts": framing_counts,
189
+ "framing_examples": framing_examples,
190
+ "framing_distribution": framing_distribution,
191
+ "dominant_frame": dominant_frame,
192
+ "frame_bias_strength": frame_bias_strength
193
+ }
 
 
 
 
 
194
 
195
  def compare_bias(text1, text2, model_names=None):
196
  """
 
204
  Returns:
205
  dict: Comparative bias analysis
206
  """
 
 
 
207
  # Set default model names if not provided
208
  if model_names is None or len(model_names) < 2:
 
209
  model_names = ["Model 1", "Model 2"]
 
 
210
 
211
  model1_name, model2_name = model_names[0], model_names[1]
212
 
213
+ # Analyze each text
214
+ sentiment_results1 = detect_sentiment_bias(text1)
215
+ sentiment_results2 = detect_sentiment_bias(text2)
216
+
217
+ partisan_results1 = detect_partisan_leaning(text1)
218
+ partisan_results2 = detect_partisan_leaning(text2)
219
+
220
+ framing_results1 = detect_framing_bias(text1)
221
+ framing_results2 = detect_framing_bias(text2)
222
+
223
+ # Determine if there's a significant difference in bias
224
+ sentiment_difference = abs(sentiment_results1["bias_strength"] - sentiment_results2["bias_strength"])
225
+
226
+ # For partisan leaning, compare the scores (negative is liberal, positive is conservative)
227
+ partisan_difference = abs(partisan_results1["lean_score"] - partisan_results2["lean_score"])
228
+
229
+ # Calculate overall bias difference
230
+ overall_difference = (sentiment_difference + partisan_difference) / 2
231
+
232
+ # Compare dominant frames
233
+ frame_difference = framing_results1["dominant_frame"] != framing_results2["dominant_frame"] and \
234
+ (framing_results1["frame_bias_strength"] > 0.1 or framing_results2["frame_bias_strength"] > 0.1)
235
+
236
+ # Create comparative analysis
237
+ comparative = {
238
+ "sentiment": {
239
+ model1_name: sentiment_results1["bias_direction"],
240
+ model2_name: sentiment_results2["bias_direction"],
241
+ "difference": sentiment_difference,
242
+ "significant": sentiment_difference > 0.3
243
+ },
244
+ "partisan": {
245
+ model1_name: partisan_results1["leaning"],
246
+ model2_name: partisan_results2["leaning"],
247
+ "difference": partisan_difference,
248
+ "significant": partisan_difference > 0.4
249
+ },
250
+ "framing": {
251
+ model1_name: framing_results1["dominant_frame"],
252
+ model2_name: framing_results2["dominant_frame"],
253
+ "different_frames": frame_difference
254
+ },
255
+ "overall": {
256
+ "difference": overall_difference,
257
+ "significant_bias_difference": overall_difference > 0.35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  }
259
+ }
260
+
261
+ return {
262
+ "models": model_names,
263
+ model1_name: {
264
+ "sentiment": sentiment_results1,
265
+ "partisan": partisan_results1,
266
+ "framing": framing_results1
267
+ },
268
+ model2_name: {
269
+ "sentiment": sentiment_results2,
270
+ "partisan": partisan_results2,
271
+ "framing": framing_results2
272
+ },
273
+ "comparative": comparative
274
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processors/bias_processor.py DELETED
@@ -1,56 +0,0 @@
1
- import gradio as gr
2
- import logging
3
-
4
- logger = logging.getLogger("gradio_app.bias_processor")
5
-
6
-
7
- def process_bias_detection(analysis_results, prompt, analyses):
8
- """
9
- Process and return visualization components for bias detection.
10
-
11
- Args:
12
- analysis_results (dict): The full analysis results.
13
- prompt (str): The prompt or key associated with the analysis.
14
- analyses (dict): The specific analysis results for bias detection.
15
-
16
- Returns:
17
- tuple: Visualization components for the Gradio app.
18
- """
19
- try:
20
- logger.info("Processing Bias Detection results...")
21
-
22
- # Import the bias visualizer function
23
- from visualization.bias_visualizer import process_and_visualize_bias_analysis
24
-
25
- # Use the specialized bias visualizer
26
- return process_and_visualize_bias_analysis(analysis_results)
27
-
28
- except Exception as e:
29
- # Log and return an error response
30
- import traceback
31
- error_message = f"Error processing Bias Detection results: {str(e)}\n{traceback.format_exc()}"
32
- logger.error(error_message)
33
- return default_error_response(error_message)
34
-
35
-
36
- def default_error_response(error_message):
37
- """Returns default error response for bias detection visualization"""
38
- # Create a complete response with the correct number of components (16)
39
- return [
40
- {}, # analysis_results_state
41
- False, # analysis_output visibility
42
- True, # visualization_area_visible
43
- gr.update(visible=True), # analysis_title
44
- gr.update(visible=False), # prompt_title
45
- gr.update(visible=False), # models_compared
46
- gr.update(visible=False), # model1_title
47
- gr.update(visible=False), # model1_words
48
- gr.update(visible=False), # model2_title
49
- gr.update(visible=False), # model2_words
50
- gr.update(visible=False), # similarity_metrics_title
51
- gr.update(visible=False), # similarity_metrics
52
- True, # status_message_visible
53
- gr.update(visible=True, value=f"Error: {error_message}"), # status_message
54
- gr.update(visible=False), # column
55
- gr.update(visible=False) # html/bias_visualizations
56
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processors/bow_processor.py DELETED
@@ -1,107 +0,0 @@
1
- import gradio as gr
2
- import logging
3
-
4
- # Set up logging
5
- logger = logging.getLogger('gradio_app.processors.bow')
6
-
7
-
8
- def process_bow_analysis(analysis_results, prompt, analyses):
9
- """
10
- Process Bag of Words analysis and return UI updates
11
-
12
- Args:
13
- analysis_results (dict): Complete analysis results
14
- prompt (str): The prompt being analyzed
15
- analyses (dict): Analysis data for the prompt
16
-
17
- Returns:
18
- tuple: UI component updates
19
- """
20
- visualization_area_visible = True
21
- bow_results = analyses["bag_of_words"]
22
- models = bow_results.get("models", [])
23
-
24
- if len(models) < 2:
25
- from analysis_runner import default_no_visualization
26
- return default_no_visualization(analysis_results)
27
-
28
- prompt_title_visible = True
29
- prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
30
-
31
- models_compared_visible = True
32
- models_compared_value = f"### Comparing responses from {models[0]} and {models[1]}"
33
-
34
- # Extract and format information for display
35
- model1_name = models[0]
36
- model2_name = models[1]
37
-
38
- # Format important words for each model
39
- important_words = bow_results.get("important_words", {})
40
-
41
- model1_title_visible = False
42
- model1_title_value = ""
43
- model1_words_visible = False
44
- model1_words_value = ""
45
-
46
- if model1_name in important_words:
47
- model1_title_visible = True
48
- model1_title_value = f"#### Top Words Used by {model1_name}"
49
-
50
- word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model1_name][:10]]
51
- model1_words_visible = True
52
- model1_words_value = ", ".join(word_list)
53
-
54
- model2_title_visible = False
55
- model2_title_value = ""
56
- model2_words_visible = False
57
- model2_words_value = ""
58
-
59
- if model2_name in important_words:
60
- model2_title_visible = True
61
- model2_title_value = f"#### Top Words Used by {model2_name}"
62
-
63
- word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model2_name][:10]]
64
- model2_words_visible = True
65
- model2_words_value = ", ".join(word_list)
66
-
67
- similarity_title_visible = False
68
- similarity_metrics_visible = False
69
- similarity_metrics_value = ""
70
-
71
- # Format similarity metrics
72
- comparisons = bow_results.get("comparisons", {})
73
- comparison_key = f"{model1_name} vs {model2_name}"
74
-
75
- if comparison_key in comparisons:
76
- metrics = comparisons[comparison_key]
77
- cosine = metrics.get("cosine_similarity", 0)
78
- jaccard = metrics.get("jaccard_similarity", 0)
79
- semantic = metrics.get("semantic_similarity", 0)
80
- common_words = metrics.get("common_word_count", 0)
81
-
82
- similarity_title_visible = True
83
- similarity_metrics_visible = True
84
- similarity_metrics_value = f"""
85
- - **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns)
86
- - **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap)
87
- - **Semantic Similarity**: {semantic:.2f} (higher means more similar meaning)
88
- - **Common Words**: {common_words} words appear in both responses
89
- """
90
-
91
- return (
92
- analysis_results, # analysis_results_state
93
- False, # analysis_output visibility
94
- True, # visualization_area_visible
95
- gr.update(visible=True), # analysis_title
96
- gr.update(visible=prompt_title_visible, value=prompt_title_value), # prompt_title
97
- gr.update(visible=models_compared_visible, value=models_compared_value), # models_compared
98
- gr.update(visible=model1_title_visible, value=model1_title_value), # model1_title
99
- gr.update(visible=model1_words_visible, value=model1_words_value), # model1_words
100
- gr.update(visible=model2_title_visible, value=model2_title_value), # model2_title
101
- gr.update(visible=model2_words_visible, value=model2_words_value), # model2_words
102
- gr.update(visible=similarity_title_visible), # similarity_metrics_title
103
- gr.update(visible=similarity_metrics_visible, value=similarity_metrics_value), # similarity_metrics
104
- False, # status_message_visible
105
- gr.update(visible=False), # status_message
106
- gr.update(visible=False) # bias_visualizations - Not visible for BoW analysis
107
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processors/classifier_processor.py DELETED
@@ -1,99 +0,0 @@
1
- import gradio as gr
2
- import logging
3
-
4
- # Set up logging
5
- logger = logging.getLogger('gradio_app.processors.classifier')
6
-
7
-
8
- def process_classifier_analysis(analysis_results, prompt, analyses):
9
- """
10
- Process Classifier analysis and return UI updates
11
-
12
- Args:
13
- analysis_results (dict): Complete analysis results
14
- prompt (str): The prompt being analyzed
15
- analyses (dict): Analysis data for the prompt
16
-
17
- Returns:
18
- tuple: UI component updates
19
- """
20
- visualization_area_visible = True
21
- classifier_results = analyses["classifier"]
22
- models = classifier_results.get("models", [])
23
-
24
- if len(models) < 2:
25
- from analysis_runner import default_no_visualization
26
- return default_no_visualization(analysis_results)
27
-
28
- prompt_title_visible = True
29
- prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
30
-
31
- models_compared_visible = True
32
- models_compared_value = f"### Classifier Analysis for {models[0]} and {models[1]}"
33
-
34
- # Extract and format classifier information
35
- model1_name = models[0]
36
- model2_name = models[1]
37
-
38
- # Display classifications for each model
39
- classifications = classifier_results.get("classifications", {})
40
-
41
- model1_title_visible = False
42
- model1_title_value = ""
43
- model1_words_visible = False
44
- model1_words_value = ""
45
-
46
- if classifications:
47
- model1_title_visible = True
48
- model1_title_value = f"#### Classification Results"
49
- model1_words_visible = True
50
-
51
- model1_results = classifications.get(model1_name, {})
52
- model2_results = classifications.get(model2_name, {})
53
-
54
- model1_words_value = f"""
55
- **{model1_name}**:
56
- - Formality: {model1_results.get('formality', 'N/A')}
57
- - Sentiment: {model1_results.get('sentiment', 'N/A')}
58
- - Complexity: {model1_results.get('complexity', 'N/A')}
59
-
60
- **{model2_name}**:
61
- - Formality: {model2_results.get('formality', 'N/A')}
62
- - Sentiment: {model2_results.get('sentiment', 'N/A')}
63
- - Complexity: {model2_results.get('complexity', 'N/A')}
64
- """
65
-
66
- # Show comparison
67
- model2_title_visible = False
68
- model2_title_value = ""
69
- model2_words_visible = False
70
- model2_words_value = ""
71
-
72
- differences = classifier_results.get("differences", {})
73
- if differences:
74
- model2_title_visible = True
75
- model2_title_value = f"#### Classification Comparison"
76
- model2_words_visible = True
77
-
78
- model2_words_value = "\n".join([
79
- f"- **{category}**: {diff}"
80
- for category, diff in differences.items()
81
- ])
82
-
83
- return (
84
- analysis_results, # analysis_results_state
85
- False, # analysis_output visibility
86
- True, # visualization_area_visible
87
- gr.update(visible=True), # analysis_title
88
- gr.update(visible=prompt_title_visible, value=prompt_title_value), # prompt_title
89
- gr.update(visible=models_compared_visible, value=models_compared_value), # models_compared
90
- gr.update(visible=model1_title_visible, value=model1_title_value), # model1_title
91
- gr.update(visible=model1_words_visible, value=model1_words_value), # model1_words
92
- gr.update(visible=model2_title_visible, value=model2_title_value), # model2_title
93
- gr.update(visible=model2_words_visible, value=model2_words_value), # model2_words
94
- gr.update(visible=False), # similarity_metrics_title
95
- gr.update(visible=False), # similarity_metrics
96
- False, # status_message_visible
97
- gr.update(visible=False), # status_message
98
- gr.update(visible=False) # bias_visualizations - Not visible for Classifier analysis
99
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processors/ngram_processor.py DELETED
@@ -1,103 +0,0 @@
1
- import gradio as gr
2
- import logging
3
-
4
- # Set up logging
5
- logger = logging.getLogger('gradio_app.processors.ngram')
6
-
7
-
8
- def process_ngram_analysis(analysis_results, prompt, analyses):
9
- """
10
- Process N-gram analysis and return UI updates
11
-
12
- Args:
13
- analysis_results (dict): Complete analysis results
14
- prompt (str): The prompt being analyzed
15
- analyses (dict): Analysis data for the prompt
16
-
17
- Returns:
18
- tuple: UI component updates
19
- """
20
- visualization_area_visible = True
21
- ngram_results = analyses["ngram_analysis"]
22
- models = ngram_results.get("models", [])
23
- ngram_size = ngram_results.get("ngram_size", 2)
24
- size_name = "Unigrams" if ngram_size == 1 else f"{ngram_size}-grams"
25
-
26
- if len(models) < 2:
27
- from analysis_runner import default_no_visualization
28
- return default_no_visualization(analysis_results)
29
-
30
- prompt_title_visible = True
31
- prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
32
-
33
- models_compared_visible = True
34
- models_compared_value = f"### {size_name} Analysis: Comparing responses from {models[0]} and {models[1]}"
35
-
36
- # Extract and format information for display
37
- model1_name = models[0]
38
- model2_name = models[1]
39
-
40
- # Format important n-grams for each model
41
- important_ngrams = ngram_results.get("important_ngrams", {})
42
-
43
- model1_title_visible = False
44
- model1_title_value = ""
45
- model1_words_visible = False
46
- model1_words_value = ""
47
-
48
- if model1_name in important_ngrams:
49
- model1_title_visible = True
50
- model1_title_value = f"#### Top {size_name} Used by {model1_name}"
51
-
52
- ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in important_ngrams[model1_name][:10]]
53
- model1_words_visible = True
54
- model1_words_value = ", ".join(ngram_list)
55
-
56
- model2_title_visible = False
57
- model2_title_value = ""
58
- model2_words_visible = False
59
- model2_words_value = ""
60
-
61
- if model2_name in important_ngrams:
62
- model2_title_visible = True
63
- model2_title_value = f"#### Top {size_name} Used by {model2_name}"
64
-
65
- ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in important_ngrams[model2_name][:10]]
66
- model2_words_visible = True
67
- model2_words_value = ", ".join(ngram_list)
68
-
69
- similarity_title_visible = False
70
- similarity_metrics_visible = False
71
- similarity_metrics_value = ""
72
-
73
- # Format similarity metrics if available
74
- if "comparisons" in ngram_results:
75
- comparison_key = f"{model1_name} vs {model2_name}"
76
-
77
- if comparison_key in ngram_results["comparisons"]:
78
- metrics = ngram_results["comparisons"][comparison_key]
79
- common_count = metrics.get("common_ngram_count", 0)
80
-
81
- similarity_title_visible = True
82
- similarity_metrics_visible = True
83
- similarity_metrics_value = f"""
84
- - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
85
- """
86
-
87
- return (
88
- analysis_results, # analysis_results_state
89
- False, # analysis_output visibility
90
- True, # visualization_area_visible
91
- gr.update(visible=True), # analysis_title
92
- gr.update(visible=prompt_title_visible, value=prompt_title_value), # prompt_title
93
- gr.update(visible=models_compared_visible, value=models_compared_value), # models_compared
94
- gr.update(visible=model1_title_visible, value=model1_title_value), # model1_title
95
- gr.update(visible=model1_words_visible, value=model1_words_value), # model1_words
96
- gr.update(visible=model2_title_visible, value=model2_title_value), # model2_title
97
- gr.update(visible=model2_words_visible, value=model2_words_value), # model2_words
98
- gr.update(visible=similarity_title_visible), # similarity_metrics_title
99
- gr.update(visible=similarity_metrics_visible, value=similarity_metrics_value), # similarity_metrics
100
- False, # status_message_visible
101
- gr.update(visible=False), # status_message
102
- gr.update(visible=False) # bias_visualizations - Not visible for N-gram analysis
103
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processors/topic_modeling.py CHANGED
@@ -1,6 +1,5 @@
1
  """
2
- Enhanced topic modeling processor for comparing text responses with better error handling
3
- and more robust algorithm configuration
4
  """
5
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
6
  from sklearn.decomposition import LatentDirichletAllocation, NMF
@@ -8,12 +7,6 @@ import numpy as np
8
  import nltk
9
  from nltk.corpus import stopwords
10
  import re
11
- from scipy.spatial import distance
12
- import logging
13
-
14
- # Set up logging
15
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
16
- logger = logging.getLogger('topic_modeling')
17
 
18
  def preprocess_text(text):
19
  """
@@ -25,25 +18,20 @@ def preprocess_text(text):
25
  Returns:
26
  str: Preprocessed text
27
  """
28
- try:
29
- # Convert to lowercase
30
- text = text.lower()
31
-
32
- # Remove special characters and digits
33
- text = re.sub(r'[^a-zA-Z\s]', '', text)
34
-
35
- # Tokenize
36
- tokens = nltk.word_tokenize(text)
37
-
38
- # Remove stopwords
39
- stop_words = set(stopwords.words('english'))
40
- tokens = [token for token in tokens if token not in stop_words and len(token) > 3]
41
-
42
- return ' '.join(tokens)
43
- except Exception as e:
44
- logger.error(f"Error in preprocess_text: {str(e)}")
45
- # Return original text if preprocessing fails
46
- return text
47
 
48
  def get_top_words_per_topic(model, feature_names, n_top_words=10):
49
  """
@@ -82,14 +70,6 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
82
  Returns:
83
  dict: Topic modeling results with topics and document-topic distributions
84
  """
85
- if isinstance(n_topics, str):
86
- n_topics = int(n_topics)
87
-
88
- # Ensure n_topics is at least 2
89
- n_topics = max(2, n_topics)
90
-
91
- logger.info(f"Starting topic modeling with method={method}, n_topics={n_topics}")
92
-
93
  result = {
94
  "method": method,
95
  "n_topics": n_topics,
@@ -97,102 +77,45 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
97
  "document_topics": []
98
  }
99
 
100
- try:
101
- # Preprocess texts
102
- logger.info("Preprocessing texts")
103
- preprocessed_texts = [preprocess_text(text) for text in texts]
104
-
105
- # Check if texts are not empty after preprocessing
106
- preprocessed_texts = [text for text in preprocessed_texts if len(text.strip()) > 0]
107
- if not preprocessed_texts:
108
- logger.warning("All texts are empty after preprocessing")
109
- return result
110
-
111
- # Create document-term matrix
112
- logger.info(f"Creating document-term matrix using {method}")
113
- if method == "nmf":
114
- # For NMF, use TF-IDF vectorization
115
- vectorizer = TfidfVectorizer(max_features=1000, min_df=1, max_df=0.95, stop_words='english')
116
- else:
117
- # For LDA, use CountVectorizer
118
- vectorizer = CountVectorizer(max_features=1000, min_df=1, max_df=0.95, stop_words='english')
119
-
120
- try:
121
- X = vectorizer.fit_transform(preprocessed_texts)
122
- feature_names = vectorizer.get_feature_names_out()
123
-
124
- # Check if we have enough features
125
- if X.shape[1] < n_topics:
126
- logger.warning(f"Only {X.shape[1]} features found, reducing n_topics from {n_topics}")
127
- n_topics = max(2, X.shape[1] - 1)
128
- result["n_topics"] = n_topics
129
-
130
- # Apply topic modeling
131
- logger.info(f"Applying {method.upper()} with {n_topics} topics")
132
- if method == "nmf":
133
- # Non-negative Matrix Factorization
134
- model = NMF(n_components=n_topics, random_state=42, max_iter=1000)
135
- else:
136
- # Latent Dirichlet Allocation
137
- model = LatentDirichletAllocation(
138
- n_components=n_topics,
139
- random_state=42,
140
- max_iter=20,
141
- learning_method='online'
142
- )
143
-
144
- topic_distribution = model.fit_transform(X)
145
-
146
- # Get top words for each topic
147
- logger.info("Extracting top words for each topic")
148
- result["topics"] = get_top_words_per_topic(model, feature_names, n_top_words)
149
-
150
- # Get topic distribution for each document
151
- logger.info("Calculating topic distributions for documents")
152
- for i, dist in enumerate(topic_distribution):
153
- # Normalize for easier comparison
154
- normalized_dist = dist / np.sum(dist) if np.sum(dist) > 0 else dist
155
- result["document_topics"].append({
156
- "document_id": i,
157
- "distribution": normalized_dist.tolist()
158
- })
159
-
160
- logger.info("Topic modeling completed successfully")
161
-
162
- except Exception as e:
163
- logger.error(f"Error in vectorization or modeling: {str(e)}")
164
- result["error"] = f"Topic modeling failed: {str(e)}"
165
-
166
- except Exception as e:
167
- logger.error(f"General error in extract_topics: {str(e)}")
168
- result["error"] = f"Topic modeling failed: {str(e)}"
169
 
170
- return result
171
-
172
- def calculate_jensen_shannon_divergence(p, q):
173
- """
174
- Calculate Jensen-Shannon divergence between two probability distributions
 
 
 
 
175
 
176
- Args:
177
- p (array): First probability distribution
178
- q (array): Second probability distribution
179
-
180
- Returns:
181
- float: Jensen-Shannon divergence
182
- """
183
- # Ensure inputs are numpy arrays
184
- p = np.array(p)
185
- q = np.array(q)
 
 
186
 
187
- # Normalize if not already normalized
188
- if np.sum(p) != 1.0:
189
- p = p / np.sum(p) if np.sum(p) > 0 else p
190
- if np.sum(q) != 1.0:
191
- q = q / np.sum(q) if np.sum(q) > 0 else q
192
 
193
- # Calculate Jensen-Shannon divergence
194
- m = 0.5 * (p + q)
195
- return 0.5 * (distance.jensenshannon(p, m) + distance.jensenshannon(q, m))
 
 
 
 
 
 
 
196
 
197
  def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
198
  """
@@ -209,98 +132,50 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
209
  Returns:
210
  dict: Comparison results with topics from both sets and similarity metrics
211
  """
212
- logger.info(f"Starting topic comparison with n_topics={n_topics}, method={method}")
213
-
214
  # Set default model names if not provided
215
  if model_names is None:
216
  model_names = ["Model 1", "Model 2"]
217
 
218
- # Initialize the result structure
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  result = {
220
  "method": method,
221
  "n_topics": n_topics,
222
- "models": model_names,
223
- "model_topics": {},
224
- "topics": [],
225
- "comparisons": {}
 
 
226
  }
227
 
228
- try:
229
- # Extract topics for each set separately
230
- # For very short texts, try combining all texts from each model
231
- combined_text_1 = " ".join(texts_set_1)
232
- combined_text_2 = " ".join(texts_set_2)
233
-
234
- # Process all texts together to find common topics
235
- all_texts = texts_set_1 + texts_set_2
236
- logger.info(f"Processing {len(all_texts)} total texts")
237
-
238
- # Extract topics from combined corpus
239
- combined_result = extract_topics(all_texts, n_topics, n_top_words, method)
240
-
241
- # Check for errors
242
- if "error" in combined_result:
243
- logger.warning(f"Error in combined topic extraction: {combined_result['error']}")
244
- result["error"] = combined_result["error"]
245
- return result
246
-
247
- # Store topics from combined analysis
248
- result["topics"] = combined_result["topics"]
249
-
250
- # Now process each text set to get their topic distributions
251
- model1_doc_topics = []
252
- model2_doc_topics = []
253
-
254
- # Try to use the same model from combined analysis for consistency
255
- if "document_topics" in combined_result and len(combined_result["document_topics"]) == len(all_texts):
256
- # Get document topics for each model
257
- n_docs_model1 = len(texts_set_1)
258
- for i, doc_topic in enumerate(combined_result["document_topics"]):
259
- if i < n_docs_model1:
260
- model1_doc_topics.append(doc_topic["distribution"])
261
- else:
262
- model2_doc_topics.append(doc_topic["distribution"])
263
- else:
264
- # Fallback: run separate topic modeling for each model
265
- logger.info("Using separate topic modeling for each model")
266
- model1_result = extract_topics([combined_text_1], n_topics, n_top_words, method)
267
- model2_result = extract_topics([combined_text_2], n_topics, n_top_words, method)
268
-
269
- if "document_topics" in model1_result and model1_result["document_topics"]:
270
- model1_doc_topics = [doc["distribution"] for doc in model1_result["document_topics"]]
271
-
272
- if "document_topics" in model2_result and model2_result["document_topics"]:
273
- model2_doc_topics = [doc["distribution"] for doc in model2_result["document_topics"]]
274
-
275
- # Calculate average topic distribution for each model
276
- if model1_doc_topics:
277
- model1_avg_distribution = np.mean(model1_doc_topics, axis=0).tolist()
278
- result["model_topics"][model_names[0]] = model1_avg_distribution
279
-
280
- if model2_doc_topics:
281
- model2_avg_distribution = np.mean(model2_doc_topics, axis=0).tolist()
282
- result["model_topics"][model_names[1]] = model2_avg_distribution
283
-
284
- # Calculate similarity between models' topic distributions
285
- if model_names[0] in result["model_topics"] and model_names[1] in result["model_topics"]:
286
- comparison_key = f"{model_names[0]} vs {model_names[1]}"
287
- dist1 = result["model_topics"][model_names[0]]
288
- dist2 = result["model_topics"][model_names[1]]
289
-
290
- # Calculate Jensen-Shannon divergence (smaller means more similar)
291
- js_div = calculate_jensen_shannon_divergence(dist1, dist2)
292
-
293
- # Create comparison result
294
- result["comparisons"][comparison_key] = {
295
- "js_divergence": js_div
296
- }
297
-
298
- logger.info(f"Topic comparison completed successfully. JS divergence: {js_div:.4f}")
299
- else:
300
- logger.warning("Could not calculate model comparisons due to missing topic distributions")
301
-
302
- except Exception as e:
303
- logger.error(f"Error in compare_topics: {str(e)}")
304
- result["error"] = f"Topic comparison failed: {str(e)}"
305
-
306
- return result
 
1
  """
2
+ Topic modeling processor for comparing text responses
 
3
  """
4
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
5
  from sklearn.decomposition import LatentDirichletAllocation, NMF
 
7
  import nltk
8
  from nltk.corpus import stopwords
9
  import re
 
 
 
 
 
 
10
 
11
  def preprocess_text(text):
12
  """
 
18
  Returns:
19
  str: Preprocessed text
20
  """
21
+ # Convert to lowercase
22
+ text = text.lower()
23
+
24
+ # Remove special characters and digits
25
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
26
+
27
+ # Tokenize
28
+ tokens = nltk.word_tokenize(text)
29
+
30
+ # Remove stopwords
31
+ stop_words = set(stopwords.words('english'))
32
+ tokens = [token for token in tokens if token not in stop_words and len(token) > 3]
33
+
34
+ return ' '.join(tokens)
 
 
 
 
 
35
 
36
  def get_top_words_per_topic(model, feature_names, n_top_words=10):
37
  """
 
70
  Returns:
71
  dict: Topic modeling results with topics and document-topic distributions
72
  """
 
 
 
 
 
 
 
 
73
  result = {
74
  "method": method,
75
  "n_topics": n_topics,
 
77
  "document_topics": []
78
  }
79
 
80
+ # Preprocess texts
81
+ preprocessed_texts = [preprocess_text(text) for text in texts]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Create document-term matrix
84
+ if method == "nmf":
85
+ # For NMF, use TF-IDF vectorization
86
+ # Adjust min_df and max_df for small document sets
87
+ vectorizer = TfidfVectorizer(max_features=1000, min_df=1, max_df=1.0)
88
+ else:
89
+ # For LDA, use CountVectorizer
90
+ # Adjust min_df and max_df for small document sets
91
+ vectorizer = CountVectorizer(max_features=1000, min_df=1, max_df=1.0)
92
 
93
+ X = vectorizer.fit_transform(preprocessed_texts)
94
+ feature_names = vectorizer.get_feature_names_out()
95
+
96
+ # Apply topic modeling
97
+ if method == "nmf":
98
+ # Non-negative Matrix Factorization
99
+ model = NMF(n_components=n_topics, random_state=42, max_iter=1000)
100
+ else:
101
+ # Latent Dirichlet Allocation
102
+ model = LatentDirichletAllocation(n_components=n_topics, random_state=42, max_iter=20)
103
+
104
+ topic_distribution = model.fit_transform(X)
105
 
106
+ # Get top words for each topic
107
+ result["topics"] = get_top_words_per_topic(model, feature_names, n_top_words)
 
 
 
108
 
109
+ # Get topic distribution for each document
110
+ for i, dist in enumerate(topic_distribution):
111
+ # Normalize for easier comparison
112
+ normalized_dist = dist / np.sum(dist) if np.sum(dist) > 0 else dist
113
+ result["document_topics"].append({
114
+ "document_id": i,
115
+ "distribution": normalized_dist.tolist()
116
+ })
117
+
118
+ return result
119
 
120
  def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
121
  """
 
132
  Returns:
133
  dict: Comparison results with topics from both sets and similarity metrics
134
  """
 
 
135
  # Set default model names if not provided
136
  if model_names is None:
137
  model_names = ["Model 1", "Model 2"]
138
 
139
+ # Extract topics for each set
140
+ topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
141
+ topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
142
+
143
+ # Calculate similarity between topics
144
+ similarity_matrix = []
145
+ for topic1 in topics_set_1["topics"]:
146
+ topic_similarities = []
147
+ words1 = set(topic1["words"])
148
+ for topic2 in topics_set_2["topics"]:
149
+ words2 = set(topic2["words"])
150
+ # Jaccard similarity: intersection over union
151
+ intersection = len(words1.intersection(words2))
152
+ union = len(words1.union(words2))
153
+ similarity = intersection / union if union > 0 else 0
154
+ topic_similarities.append(similarity)
155
+ similarity_matrix.append(topic_similarities)
156
+
157
+ # Find the best matching topic pairs
158
+ matched_topics = []
159
+ for i, similarities in enumerate(similarity_matrix):
160
+ best_match_idx = np.argmax(similarities)
161
+ matched_topics.append({
162
+ "set1_topic_id": i,
163
+ "set1_topic_words": topics_set_1["topics"][i]["words"],
164
+ "set2_topic_id": best_match_idx,
165
+ "set2_topic_words": topics_set_2["topics"][best_match_idx]["words"],
166
+ "similarity": similarities[best_match_idx]
167
+ })
168
+
169
+ # Construct result
170
  result = {
171
  "method": method,
172
  "n_topics": n_topics,
173
+ "set1_topics": topics_set_1["topics"],
174
+ "set2_topics": topics_set_2["topics"],
175
+ "similarity_matrix": similarity_matrix,
176
+ "matched_topics": matched_topics,
177
+ "average_similarity": np.mean([match["similarity"] for match in matched_topics]),
178
+ "models": model_names # Add model names to result
179
  }
180
 
181
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processors/topic_processor.py DELETED
@@ -1,156 +0,0 @@
1
- import gradio as gr
2
- import logging
3
-
4
- # Set up logging
5
- logger = logging.getLogger('gradio_app.processors.topic')
6
-
7
-
8
- def process_topic_modeling(analysis_results, prompt, analyses):
9
- """
10
- Process Topic Modeling analysis and return UI updates
11
-
12
- Args:
13
- analysis_results (dict): Complete analysis results
14
- prompt (str): The prompt being analyzed
15
- analyses (dict): Analysis data for the prompt
16
-
17
- Returns:
18
- tuple: UI component updates
19
- """
20
- topic_results = analyses["topic_modeling"]
21
-
22
- # Check for errors in topic modeling
23
- if "error" in topic_results:
24
- return (
25
- analysis_results,
26
- False, # Don't show raw JSON
27
- False, # Don't show visualization area
28
- gr.update(visible=False),
29
- gr.update(visible=False),
30
- gr.update(visible=False),
31
- gr.update(visible=False),
32
- gr.update(visible=False),
33
- gr.update(visible=False),
34
- gr.update(visible=False),
35
- gr.update(visible=False),
36
- gr.update(visible=False),
37
- True, # Show status message
38
- gr.update(visible=True, value=f"❌ **Topic modeling error:** {topic_results['error']}"),
39
- gr.update(visible=False) # bias_visualizations
40
- )
41
-
42
- visualization_area_visible = True
43
- models = topic_results.get("models", [])
44
- method = topic_results.get("method", "lda").upper()
45
- n_topics = topic_results.get("n_topics", 3)
46
-
47
- if len(models) < 2:
48
- from analysis_runner import default_no_visualization
49
- return default_no_visualization(analysis_results)
50
-
51
- prompt_title_visible = True
52
- prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
53
-
54
- models_compared_visible = True
55
- models_compared_value = f"### Topic Modeling Analysis ({method}, {n_topics} topics)"
56
-
57
- # Initialize component visibility and values
58
- model1_title_visible = False
59
- model1_title_value = ""
60
- model1_words_visible = False
61
- model1_words_value = ""
62
- model2_title_visible = False
63
- model2_title_value = ""
64
- model2_words_visible = False
65
- model2_words_value = ""
66
- similarity_title_visible = False
67
- similarity_metrics_visible = False
68
- similarity_metrics_value = ""
69
-
70
- # Extract and format topic information
71
- topics = topic_results.get("topics", [])
72
-
73
- if topics:
74
- # Format topic info for display
75
- topic_info = []
76
- for topic in topics[:5]: # Show first 5 topics
77
- topic_id = topic.get("id", 0)
78
- words = topic.get("words", [])[:5] # Top 5 words per topic
79
-
80
- if words:
81
- topic_info.append(f"**Topic {topic_id + 1}**: {', '.join(words)}")
82
-
83
- if topic_info:
84
- model1_title_visible = True
85
- model1_title_value = "#### Discovered Topics"
86
- model1_words_visible = True
87
- model1_words_value = "\n".join(topic_info)
88
-
89
- # Get topic distributions for models
90
- model_topics = topic_results.get("model_topics", {})
91
-
92
- if model_topics:
93
- model1_name = models[0]
94
- model2_name = models[1]
95
-
96
- # Format topic distribution info
97
- if model1_name in model_topics and model2_name in model_topics:
98
- model2_title_visible = True
99
- model2_title_value = "#### Topic Distribution"
100
- model2_words_visible = True
101
-
102
- # Simple distribution display
103
- dist1 = model_topics[model1_name]
104
- dist2 = model_topics[model2_name]
105
-
106
- model2_words_value = f"""
107
- **{model1_name}**: {', '.join([f"Topic {i + 1}: {v:.2f}" for i, v in enumerate(dist1[:5])])}
108
-
109
- **{model2_name}**: {', '.join([f"Topic {i + 1}: {v:.2f}" for i, v in enumerate(dist2[:5])])}
110
- """
111
-
112
- # Add similarity metrics if available
113
- comparisons = topic_results.get("comparisons", {})
114
- if comparisons:
115
- comparison_key = f"{model1_name} vs {model2_name}"
116
-
117
- if comparison_key in comparisons:
118
- metrics = comparisons[comparison_key]
119
- js_div = metrics.get("js_divergence", 0)
120
-
121
- # Add interpretation
122
- similarity_text = ""
123
- if js_div < 0.2:
124
- similarity_text = "very similar"
125
- elif js_div < 0.4:
126
- similarity_text = "somewhat similar"
127
- elif js_div < 0.6:
128
- similarity_text = "moderately different"
129
- else:
130
- similarity_text = "very different"
131
-
132
- similarity_title_visible = True
133
- similarity_metrics_visible = True
134
- similarity_metrics_value = f"""
135
- - **Topic Distribution Divergence**: {js_div:.4f}
136
- - The topic distributions between models are **{similarity_text}**
137
- - *Lower divergence values indicate more similar topic distributions*
138
- """
139
-
140
- return (
141
- analysis_results, # analysis_results_state
142
- False, # analysis_output visibility
143
- True, # visualization_area_visible
144
- gr.update(visible=True), # analysis_title
145
- gr.update(visible=prompt_title_visible, value=prompt_title_value), # prompt_title
146
- gr.update(visible=models_compared_visible, value=models_compared_value), # models_compared
147
- gr.update(visible=model1_title_visible, value=model1_title_value), # model1_title
148
- gr.update(visible=model1_words_visible, value=model1_words_value), # model1_words
149
- gr.update(visible=model2_title_visible, value=model2_title_value), # model2_title
150
- gr.update(visible=model2_words_visible, value=model2_words_value), # model2_words
151
- gr.update(visible=similarity_title_visible), # similarity_metrics_title
152
- gr.update(visible=similarity_metrics_visible, value=similarity_metrics_value), # similarity_metrics
153
- False, # status_message_visible
154
- gr.update(visible=False), # status_message
155
- gr.update(visible=False) # bias_visualizations - Not visible for Topic Modeling
156
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ui/analysis_screen.py CHANGED
@@ -8,18 +8,13 @@ from processors.ngram_analysis import compare_ngrams
8
  from processors.bow_analysis import compare_bow
9
  from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
10
  from processors.bias_detection import compare_bias
11
- import logging
12
-
13
- # Set up logging
14
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
15
- logger = logging.getLogger('analysis_screen')
16
 
17
  def create_analysis_screen():
18
  """
19
  Create the analysis options screen
20
 
21
  Returns:
22
- tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, ngram_n, topic_count)
23
  """
24
  with gr.Column() as analysis_screen:
25
  gr.Markdown("## Analysis Options")
@@ -40,14 +35,25 @@ def create_analysis_screen():
40
  label="Select Analysis Type"
41
  )
42
 
 
 
 
 
 
 
 
 
43
  # Create N-gram parameters accessible at top level
44
  ngram_n = gr.Radio(
45
  choices=["1", "2", "3"], value="2",
46
  label="N-gram Size",
47
  visible=False
48
  )
49
-
50
- # Removed ngram_top slider
 
 
 
51
 
52
  # Create topic modeling parameter accessible at top level
53
  topic_count = gr.Slider(
@@ -55,6 +61,15 @@ def create_analysis_screen():
55
  label="Number of Topics",
56
  visible=False
57
  )
 
 
 
 
 
 
 
 
 
58
 
59
  # Parameters for each analysis type
60
  with gr.Group() as analysis_params:
@@ -63,16 +78,20 @@ def create_analysis_screen():
63
  gr.Markdown("### Topic Modeling Parameters")
64
  # We'll use the topic_count defined above
65
 
66
- # N-gram parameters group (using external ngram_n, removed ngram_top)
67
  with gr.Group(visible=False) as ngram_params:
68
  gr.Markdown("### N-gram Parameters")
69
- # We're already using ngram_n defined above
70
 
71
- # Bias detection parameters - simplified with no checkboxes
72
  with gr.Group(visible=False) as bias_params:
73
  gr.Markdown("### Bias Detection Parameters")
74
- gr.Markdown("Using partisan leaning bias detection and sentiment analysis")
75
- gr.Markdown("This analysis detects sentiment bias, partisan leaning, and issue framing patterns.")
 
 
 
 
76
 
77
  # Classifier parameters
78
  with gr.Group(visible=False) as classifier_params:
@@ -87,7 +106,9 @@ def create_analysis_screen():
87
  bias_params: gr.update(visible=selected == "Bias Detection"),
88
  classifier_params: gr.update(visible=selected == "Classifier"),
89
  ngram_n: gr.update(visible=selected == "N-gram Analysis"),
 
90
  topic_count: gr.update(visible=selected == "Topic Modeling"),
 
91
  }
92
 
93
  # Set up event handler for analysis selection
@@ -100,7 +121,9 @@ def create_analysis_screen():
100
  bias_params,
101
  classifier_params,
102
  ngram_n,
 
103
  topic_count,
 
104
  ]
105
  )
106
 
@@ -110,8 +133,8 @@ def create_analysis_screen():
110
  # Analysis output area - hidden JSON component to store raw results
111
  analysis_output = gr.JSON(label="Analysis Results", visible=False)
112
 
113
- # Return the components needed by app.py, with ngram_top removed
114
- return analysis_options, analysis_params, run_analysis_btn, analysis_output, ngram_n, topic_count
115
 
116
  # Add the implementation of these helper functions
117
  def extract_important_words(text, top_n=20):
@@ -324,10 +347,7 @@ def process_analysis_request(dataset, selected_analysis, parameters):
324
  Returns:
325
  tuple: A tuple containing (analysis_results, visualization_data)
326
  """
327
- logger.info(f"Processing analysis request: {selected_analysis}")
328
-
329
  if not dataset or "entries" not in dataset or not dataset["entries"]:
330
- logger.warning("No valid dataset provided for analysis")
331
  return {}, None
332
 
333
  # Initialize the results structure
@@ -336,7 +356,6 @@ def process_analysis_request(dataset, selected_analysis, parameters):
336
  # Get the prompt text from the first entry
337
  prompt_text = dataset["entries"][0].get("prompt", "")
338
  if not prompt_text:
339
- logger.warning("No prompt found in dataset")
340
  return {"error": "No prompt found in dataset"}, None
341
 
342
  # Initialize the analysis container for this prompt
@@ -349,138 +368,132 @@ def process_analysis_request(dataset, selected_analysis, parameters):
349
  model1_response = dataset["entries"][0].get("response", "")
350
  model2_response = dataset["entries"][1].get("response", "")
351
 
352
- logger.info(f"Comparing responses from {model1_name} and {model2_name}")
353
-
354
- try:
355
- # Process based on the selected analysis type
356
- if selected_analysis == "Bag of Words":
357
- # Use fixed default value of 25 for top_n
358
- top_n = 25
359
-
360
- logger.info(f"Running Bag of Words analysis with top_n={top_n}")
361
-
362
- # Perform Bag of Words analysis using the processor
363
- bow_results = compare_bow(
364
- [model1_response, model2_response],
365
- [model1_name, model2_name],
366
- top_n=top_n
367
- )
368
- results["analyses"][prompt_text]["bag_of_words"] = bow_results
369
-
370
- elif selected_analysis == "N-gram Analysis":
371
- # Perform N-gram analysis
372
- ngram_size = parameters.get("ngram_n", 2)
373
- if isinstance(ngram_size, str):
374
- ngram_size = int(ngram_size)
375
-
376
- top_n = parameters.get("ngram_top", 15)
377
- if isinstance(top_n, str):
378
- top_n = int(top_n)
379
-
380
- logger.info(f"Running N-gram analysis with n={ngram_size}, top_n={top_n}")
381
-
382
- # Use the processor from the dedicated ngram_analysis module
383
- from processors.ngram_analysis import compare_ngrams as ngram_processor
384
- ngram_results = ngram_processor(
385
- [model1_response, model2_response],
386
- [model1_name, model2_name],
387
- n=ngram_size,
388
- top_n=top_n
389
- )
390
- results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
391
-
392
- elif selected_analysis == "Topic Modeling":
393
- # Perform topic modeling analysis
394
- topic_count = parameters.get("topic_count", 3)
395
- if isinstance(topic_count, str):
396
- topic_count = int(topic_count)
397
-
398
- logger.info(f"Running Topic Modeling analysis with n_topics={topic_count}")
399
 
400
- try:
401
- topic_results = compare_topics(
402
- texts_set_1=[model1_response],
403
- texts_set_2=[model2_response],
404
- n_topics=topic_count,
405
- model_names=[model1_name, model2_name])
406
-
407
- results["analyses"][prompt_text]["topic_modeling"] = topic_results
408
- except Exception as e:
409
- import traceback
410
- error_msg = f"Topic modeling error: {str(e)}\n{traceback.format_exc()}"
411
- logger.error(error_msg)
412
- results["analyses"][prompt_text]["topic_modeling"] = {
413
- "models": [model1_name, model2_name],
414
- "error": str(e),
415
- "message": "Topic modeling failed. Try with longer text or different parameters."
416
- }
 
 
417
 
418
- elif selected_analysis == "Classifier":
419
- # Perform classifier analysis
420
- logger.info("Running Classifier analysis")
 
 
 
421
 
422
- results["analyses"][prompt_text]["classifier"] = {
 
 
 
 
423
  "models": [model1_name, model2_name],
424
- "classifications": {
425
- model1_name: {
426
- "formality": classify_formality(model1_response),
427
- "sentiment": classify_sentiment(model1_response),
428
- "complexity": classify_complexity(model1_response)
429
- },
430
- model2_name: {
431
- "formality": classify_formality(model2_response),
432
- "sentiment": classify_sentiment(model2_response),
433
- "complexity": classify_complexity(model2_response)
434
- }
435
- },
436
- "differences": compare_classifications(model1_response, model2_response)
437
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
- elif selected_analysis == "Bias Detection":
440
- # Use partisan leaning bias detection by default
441
- logger.info("Running Bias Detection analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
- try:
444
- # Perform bias detection analysis
445
- logger.info(f"Calling compare_bias with model names: {model1_name}, {model2_name}")
446
- logger.info(f"Text lengths - Text1: {len(model1_response)}, Text2: {len(model2_response)}")
447
 
448
- bias_results = compare_bias(
449
- model1_response,
450
- model2_response,
451
- model_names=[model1_name, model2_name]
452
- )
453
 
454
- logger.info(f"Bias detection complete. Result has keys: {bias_results.keys() if bias_results else 'None'}")
455
- results["analyses"][prompt_text]["bias_detection"] = bias_results
456
 
457
- except Exception as e:
458
- import traceback
459
- error_msg = f"Bias detection error: {str(e)}\n{traceback.format_exc()}"
460
- logger.error(error_msg)
461
- results["analyses"][prompt_text]["bias_detection"] = {
462
- "models": [model1_name, model2_name],
463
- "error": str(e),
464
- "message": "Bias detection failed. Try with different parameters."
465
- }
466
-
467
- else:
468
- # Unknown analysis type
469
- logger.warning(f"Unknown analysis type: {selected_analysis}")
470
- results["analyses"][prompt_text]["message"] = "Please select a valid analysis type."
471
-
472
- except Exception as e:
473
- import traceback
474
- error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
475
- logger.error(error_msg)
476
- results = {
477
- "error": error_msg,
478
- "analyses": {
479
- prompt_text: {
480
- "message": f"Analysis failed: {str(e)}"
481
- }
482
  }
483
- }
 
 
 
484
 
485
  # Return both the analysis results and a placeholder for visualization data
486
- return results, None
 
8
  from processors.bow_analysis import compare_bow
9
  from processors.text_classifiers import classify_formality, classify_sentiment, classify_complexity, compare_classifications
10
  from processors.bias_detection import compare_bias
 
 
 
 
 
11
 
12
  def create_analysis_screen():
13
  """
14
  Create the analysis options screen
15
 
16
  Returns:
17
+ tuple: (analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count)
18
  """
19
  with gr.Column() as analysis_screen:
20
  gr.Markdown("## Analysis Options")
 
35
  label="Select Analysis Type"
36
  )
37
 
38
+ # Create slider directly here for easier access
39
+ gr.Markdown("### Bag of Words Parameters")
40
+ bow_top_slider = gr.Slider(
41
+ minimum=10, maximum=100, value=25, step=5,
42
+ label="Top Words to Compare",
43
+ elem_id="bow_top_slider"
44
+ )
45
+
46
  # Create N-gram parameters accessible at top level
47
  ngram_n = gr.Radio(
48
  choices=["1", "2", "3"], value="2",
49
  label="N-gram Size",
50
  visible=False
51
  )
52
+ ngram_top = gr.Slider(
53
+ minimum=5, maximum=30, value=10, step=1,
54
+ label="Top N-grams to Display",
55
+ visible=False
56
+ )
57
 
58
  # Create topic modeling parameter accessible at top level
59
  topic_count = gr.Slider(
 
61
  label="Number of Topics",
62
  visible=False
63
  )
64
+
65
+ bias_methods = gr.CheckboxGroup(
66
+ choices=["sentiment", "partisan", "framing"],
67
+ label="Bias Detection Methods",
68
+ value=["sentiment", "partisan"],
69
+ visible=False, # Initially hidden, will be shown when Bias Detection is selected
70
+ interactive=True
71
+ )
72
+
73
 
74
  # Parameters for each analysis type
75
  with gr.Group() as analysis_params:
 
78
  gr.Markdown("### Topic Modeling Parameters")
79
  # We'll use the topic_count defined above
80
 
81
+ # N-gram parameters group (using external ngram_n and ngram_top)
82
  with gr.Group(visible=False) as ngram_params:
83
  gr.Markdown("### N-gram Parameters")
84
+ # We're already using ngram_n and ngram_top defined above
85
 
86
+ # Bias detection parameters
87
  with gr.Group(visible=False) as bias_params:
88
  gr.Markdown("### Bias Detection Parameters")
89
+ bias_methods = gr.CheckboxGroup(
90
+ choices=["Sentiment Analysis", "Partisan Leaning", "Framing Analysis"],
91
+ value=["Sentiment Analysis", "Partisan Leaning"],
92
+ label="Bias Detection Methods",
93
+ interactive=True # Ensure this is interactive
94
+ )
95
 
96
  # Classifier parameters
97
  with gr.Group(visible=False) as classifier_params:
 
106
  bias_params: gr.update(visible=selected == "Bias Detection"),
107
  classifier_params: gr.update(visible=selected == "Classifier"),
108
  ngram_n: gr.update(visible=selected == "N-gram Analysis"),
109
+ ngram_top: gr.update(visible=selected == "N-gram Analysis"),
110
  topic_count: gr.update(visible=selected == "Topic Modeling"),
111
+ bow_top_slider: gr.update(visible=selected == "Bag of Words")
112
  }
113
 
114
  # Set up event handler for analysis selection
 
121
  bias_params,
122
  classifier_params,
123
  ngram_n,
124
+ ngram_top,
125
  topic_count,
126
+ bow_top_slider
127
  ]
128
  )
129
 
 
133
  # Analysis output area - hidden JSON component to store raw results
134
  analysis_output = gr.JSON(label="Analysis Results", visible=False)
135
 
136
+ # Return the components needed by app.py, now including bias_methods
137
+ return analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, ngram_n, ngram_top, topic_count, bias_methods
138
 
139
  # Add the implementation of these helper functions
140
  def extract_important_words(text, top_n=20):
 
347
  Returns:
348
  tuple: A tuple containing (analysis_results, visualization_data)
349
  """
 
 
350
  if not dataset or "entries" not in dataset or not dataset["entries"]:
 
351
  return {}, None
352
 
353
  # Initialize the results structure
 
356
  # Get the prompt text from the first entry
357
  prompt_text = dataset["entries"][0].get("prompt", "")
358
  if not prompt_text:
 
359
  return {"error": "No prompt found in dataset"}, None
360
 
361
  # Initialize the analysis container for this prompt
 
368
  model1_response = dataset["entries"][0].get("response", "")
369
  model2_response = dataset["entries"][1].get("response", "")
370
 
371
+ # Process based on the selected analysis type
372
+ if selected_analysis == "Bag of Words":
373
+ # Get the top_n parameter and ensure it's an integer
374
+ top_n = parameters.get("bow_top", 25)
375
+ if isinstance(top_n, str):
376
+ top_n = int(top_n)
377
+
378
+ print(f"Using top_n value: {top_n}") # Debug print
379
+
380
+ # Perform Bag of Words analysis using the processor
381
+ bow_results = compare_bow(
382
+ [model1_response, model2_response],
383
+ [model1_name, model2_name],
384
+ top_n=top_n
385
+ )
386
+ results["analyses"][prompt_text]["bag_of_words"] = bow_results
387
+
388
+ elif selected_analysis == "N-gram Analysis":
389
+ # Perform N-gram analysis
390
+ ngram_size = parameters.get("ngram_n", 2)
391
+ if isinstance(ngram_size, str):
392
+ ngram_size = int(ngram_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
 
394
+ top_n = parameters.get("ngram_top", 15)
395
+ if isinstance(top_n, str):
396
+ top_n = int(top_n)
397
+
398
+ # Use the processor from the dedicated ngram_analysis module
399
+ from processors.ngram_analysis import compare_ngrams as ngram_processor
400
+ ngram_results = ngram_processor(
401
+ [model1_response, model2_response],
402
+ [model1_name, model2_name],
403
+ n=ngram_size,
404
+ top_n=top_n
405
+ )
406
+ results["analyses"][prompt_text]["ngram_analysis"] = ngram_results
407
+
408
+ elif selected_analysis == "Topic Modeling":
409
+ # Perform topic modeling analysis
410
+ topic_count = parameters.get("topic_count", 3)
411
+ if isinstance(topic_count, str):
412
+ topic_count = int(topic_count)
413
 
414
+ try:
415
+ topic_results = compare_topics(
416
+ texts_set_1=[model1_response],
417
+ texts_set_2=[model2_response],
418
+ n_topics=topic_count,
419
+ model_names=[model1_name, model2_name])
420
 
421
+ results["analyses"][prompt_text]["topic_modeling"] = topic_results
422
+ except Exception as e:
423
+ import traceback
424
+ print(f"Topic modeling error: {str(e)}\n{traceback.format_exc()}")
425
+ results["analyses"][prompt_text]["topic_modeling"] = {
426
  "models": [model1_name, model2_name],
427
+ "error": str(e),
428
+ "message": "Topic modeling failed. Try with longer text or different parameters."
 
 
 
 
 
 
 
 
 
 
 
429
  }
430
+
431
+ elif selected_analysis == "Classifier":
432
+ # Perform classifier analysis
433
+ results["analyses"][prompt_text]["classifier"] = {
434
+ "models": [model1_name, model2_name],
435
+ "classifications": {
436
+ model1_name: {
437
+ "formality": classify_formality(model1_response),
438
+ "sentiment": classify_sentiment(model1_response),
439
+ "complexity": classify_complexity(model1_response)
440
+ },
441
+ model2_name: {
442
+ "formality": classify_formality(model2_response),
443
+ "sentiment": classify_sentiment(model2_response),
444
+ "complexity": classify_complexity(model2_response)
445
+ }
446
+ },
447
+ "differences": compare_classifications(model1_response, model2_response)
448
+ }
449
 
450
+ elif selected_analysis == "Bias Detection":
451
+ # Get the bias detection methods from parameters
452
+ bias_methods = parameters.get("bias_methods",
453
+ ["Sentiment Analysis", "Partisan Leaning", "Framing Analysis"])
454
+
455
+ try:
456
+ # Perform bias detection analysis
457
+ bias_results = compare_bias(
458
+ model1_response,
459
+ model2_response,
460
+ model_names=[model1_name, model2_name]
461
+ )
462
+
463
+ # Filter results based on selected methods
464
+ filtered_results = {"models": [model1_name, model2_name]}
465
+
466
+ # Always include comparative data
467
+ if "comparative" in bias_results:
468
+ filtered_results["comparative"] = bias_results["comparative"]
469
 
470
+ # Include individual model results based on selected methods
471
+ for model in [model1_name, model2_name]:
472
+ filtered_results[model] = {}
 
473
 
474
+ if "Sentiment Analysis" in bias_methods and model in bias_results:
475
+ filtered_results[model]["sentiment"] = bias_results[model]["sentiment"]
 
 
 
476
 
477
+ if "Partisan Leaning" in bias_methods and model in bias_results:
478
+ filtered_results[model]["partisan"] = bias_results[model]["partisan"]
479
 
480
+ if "Framing Analysis" in bias_methods and model in bias_results:
481
+ filtered_results[model]["framing"] = bias_results[model]["framing"]
482
+
483
+ results["analyses"][prompt_text]["bias_detection"] = filtered_results
484
+
485
+ except Exception as e:
486
+ import traceback
487
+ print(f"Bias detection error: {str(e)}\n{traceback.format_exc()}")
488
+ results["analyses"][prompt_text]["bias_detection"] = {
489
+ "models": [model1_name, model2_name],
490
+ "error": str(e),
491
+ "message": "Bias detection failed. Try with different parameters."
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  }
493
+
494
+ else:
495
+ # Unknown analysis type
496
+ results["analyses"][prompt_text]["message"] = "Please select a valid analysis type."
497
 
498
  # Return both the analysis results and a placeholder for visualization data
499
+ return results, None
visualization/bias_visualizer.py CHANGED
@@ -1,169 +1,233 @@
1
  import gradio as gr
2
- import logging
3
- import plotly.express as px
4
  import plotly.graph_objects as go
5
- from plotly.subplots import make_subplots
6
  import pandas as pd
7
 
8
- # Set up logging
9
- logger = logging.getLogger('gradio_app.bias_visualizer')
10
-
11
- def process_and_visualize_bias_analysis(analysis_results):
12
  """
13
- Process and visualize bias analysis results.
14
 
15
  Args:
16
- analysis_results (dict): The analysis results containing bias detection data.
17
 
18
  Returns:
19
- list: Components for Gradio UI to display bias visualizations.
20
  """
21
- logger.info("Processing bias visualization")
22
-
23
- # Default empty visualization components
24
- from visualization_handler import create_empty_visualization_response
25
- default_response = create_empty_visualization_response()
26
-
27
- try:
28
- # Extract the first prompt (assuming there's only one)
29
- if not analysis_results or "analyses" not in analysis_results:
30
- logger.warning("No analyses found in results")
31
- return default_response
32
-
33
- prompt_text = list(analysis_results["analyses"].keys())[0]
34
- analyses = analysis_results["analyses"][prompt_text]
35
-
36
- if "bias_detection" not in analyses:
37
- logger.warning("No bias detection results found")
38
- return default_response
39
-
40
- bias_results = analyses["bias_detection"]
41
-
42
- # Get model names
43
- model_names = bias_results.get("models", ["Model 1", "Model 2"])
44
- model1_name, model2_name = model_names[0], model_names[1]
45
-
46
- # Create visualization components
47
- prompt_title_value = f"### Prompt: {prompt_text}"
48
- models_compared_value = f"### Comparing {model1_name} vs {model2_name}"
49
-
50
- # Model 1 details
51
- model1_results = bias_results.get(model1_name, {})
52
- model1_title_value = f"#### {model1_name} Bias Analysis"
53
- model1_words_value = create_model_bias_summary(model1_results, model1_name)
54
-
55
- # Model 2 details
56
- model2_results = bias_results.get(model2_name, {})
57
- model2_title_value = f"#### {model2_name} Bias Analysis"
58
- model2_words_value = create_model_bias_summary(model2_results, model2_name)
59
-
60
- # Comparative analysis
61
- comparative = bias_results.get("comparative", {})
62
- similarity_metrics_value = create_comparative_summary(comparative, model1_name, model2_name)
63
-
64
- # Create bias visualization HTML
65
- bias_html_value = create_bias_visualizations_html(bias_results, model1_name, model2_name)
66
-
67
- # Bias visualizations should be visible
68
- bias_visualizations_visible = True
69
-
70
- # Return all components
71
- return [
72
- analysis_results, # analysis_results_state
73
- False, # analysis_output visibility
74
- True, # visualization_area_visible
75
- gr.update(visible=True), # analysis_title
76
- gr.update(visible=True, value=prompt_title_value), # prompt_title
77
- gr.update(visible=True, value=models_compared_value), # models_compared
78
- gr.update(visible=True, value=model1_title_value), # model1_title
79
- gr.update(visible=True, value=model1_words_value), # model1_words
80
- gr.update(visible=True, value=model2_title_value), # model2_title
81
- gr.update(visible=True, value=model2_words_value), # model2_words
82
- gr.update(visible=True), # similarity_metrics_title
83
- gr.update(visible=True, value=similarity_metrics_value), # similarity_metrics
84
- False, # status_message_visible
85
- gr.update(visible=False), # status_message
86
- gr.update(visible=bias_visualizations_visible) # bias_visualizations
87
- ]
88
-
89
- except Exception as e:
90
- import traceback
91
- logger.error(f"Error in bias visualization: {str(e)}\n{traceback.format_exc()}")
92
- default_response[12] = True # Set status_message_visible to True
93
- default_response[13] = gr.update(visible=True, value=f"Error creating bias visualization: {str(e)}") # Set error message
94
- return default_response
95
-
96
- def create_model_bias_summary(model_results, model_name):
97
- """Create a text summary of bias results for a given model"""
98
- if not model_results:
99
- return "No bias results available"
100
 
101
- sentiment = model_results.get("sentiment", {})
102
- partisan = model_results.get("partisan", {})
103
- framing = model_results.get("framing", {})
104
 
105
- summary = f"""
106
- **Sentiment Analysis:**
107
- - Direction: {sentiment.get('bias_direction', 'neutral')}
108
- - Strength: {sentiment.get('bias_strength', 0):.2f}/1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- **Partisan Leaning:**
111
- - Leaning: {partisan.get('leaning', 'balanced')}
112
- - Strength: {partisan.get('strength', 0):.2f}/1.0
113
- - Liberal terms: {', '.join(set(partisan.get('liberal_terms', [])))[:100]}
114
- - Conservative terms: {', '.join(set(partisan.get('conservative_terms', [])))[:100]}
115
 
116
- **Framing Analysis:**
117
- - Dominant frame: {framing.get('dominant_frame', 'none')}
118
- - Frame strength: {framing.get('frame_bias_strength', 0):.2f}/1.0
119
- """
120
-
121
- return summary
122
 
123
- def create_comparative_summary(comparative, model1_name, model2_name):
124
- """Create a text summary of the comparative bias analysis"""
125
- if not comparative:
126
- return "No comparative analysis available"
127
-
128
- sentiment = comparative.get("sentiment", {})
129
- partisan = comparative.get("partisan", {})
130
- framing = comparative.get("framing", {})
131
- overall = comparative.get("overall", {})
132
-
133
- summary = f"""
134
- **Overall Bias Difference:** {overall.get('difference', 0):.2f}
135
- ({overall.get('significant_bias_difference', False) and 'Significant' or 'Not significant'})
136
-
137
- **Sentiment Comparison:**
138
- - {model1_name}: {sentiment.get(model1_name, 'neutral')}
139
- - {model2_name}: {sentiment.get(model2_name, 'neutral')}
140
- - Difference: {sentiment.get('difference', 0):.2f} ({sentiment.get('significant', False) and 'Significant' or 'Not significant'})
141
-
142
- **Partisan Leaning Comparison:**
143
- - {model1_name}: {partisan.get(model1_name, 'balanced')}
144
- - {model2_name}: {partisan.get(model2_name, 'balanced')}
145
- - Difference: {partisan.get('difference', 0):.2f} ({partisan.get('significant', False) and 'Significant' or 'Not significant'})
146
-
147
- **Framing Comparison:**
148
- - {model1_name}: {framing.get(model1_name, 'none')}
149
- - {model2_name}: {framing.get(model2_name, 'none')}
150
- - Different frames: {framing.get('different_frames', False) and 'Yes' or 'No'}
151
  """
 
152
 
153
- return summary
154
-
155
- def create_bias_visualizations_html(bias_results, model1_name, model2_name):
156
- """Create HTML visualizations for bias analysis"""
 
 
157
  try:
158
- # Placeholder for visualizations - you can extend this with actual plotly charts
159
- html = f"""
160
- <div style="padding: 20px;">
161
- <h3>Bias Visualization</h3>
162
- <p>Bias analysis visualization for {model1_name} and {model2_name}</p>
163
- <div id="bias-charts"></div>
164
- </div>
165
- """
166
- return html
167
  except Exception as e:
168
- logger.error(f"Error creating bias HTML: {str(e)}")
169
- return "<div>Error creating bias visualizations</div>"
 
 
 
1
  import gradio as gr
 
 
2
  import plotly.graph_objects as go
3
+ import plotly.express as px
4
  import pandas as pd
5
 
6
+ def create_bias_visualization(analysis_results):
 
 
 
7
  """
8
+ Create visualizations for bias detection analysis results
9
 
10
  Args:
11
+ analysis_results (dict): Analysis results from the bias detection
12
 
13
  Returns:
14
+ list: List of gradio components with visualizations
15
  """
16
+ output_components = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Check if we have valid results
19
+ if not analysis_results or "analyses" not in analysis_results:
20
+ return [gr.Markdown("No analysis results found.")]
21
 
22
+ # Process each prompt
23
+ for prompt, analyses in analysis_results["analyses"].items():
24
+ # Process Bias Detection analysis if available
25
+ if "bias_detection" in analyses:
26
+ bias_results = analyses["bias_detection"]
27
+
28
+ # Show models being compared
29
+ models = bias_results.get("models", [])
30
+ if len(models) >= 2:
31
+ output_components.append(gr.Markdown(f"### Bias Analysis: Comparing responses from {models[0]} and {models[1]}"))
32
+
33
+ # Check if there's an error
34
+ if "error" in bias_results:
35
+ output_components.append(gr.Markdown(f"**Error in bias detection:** {bias_results['error']}"))
36
+ continue
37
+
38
+ model1_name, model2_name = models[0], models[1]
39
+
40
+ # Comparative results
41
+ if "comparative" in bias_results:
42
+ comparative = bias_results["comparative"]
43
+
44
+ output_components.append(gr.Markdown("#### Comparative Bias Analysis"))
45
+
46
+ # Create summary table
47
+ summary_html = f"""
48
+ <table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">
49
+ <tr>
50
+ <th style="border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f2f2f2;">Bias Category</th>
51
+ <th style="border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f2f2f2;">{model1_name}</th>
52
+ <th style="border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f2f2f2;">{model2_name}</th>
53
+ <th style="border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f2f2f2;">Significant Difference?</th>
54
+ </tr>
55
+ """
56
+
57
+ # Sentiment row
58
+ if "sentiment" in comparative:
59
+ sent_sig = comparative["sentiment"].get("significant", False)
60
+ summary_html += f"""
61
+ <tr>
62
+ <td style="border: 1px solid #ddd; padding: 8px;">Sentiment Bias</td>
63
+ <td style="border: 1px solid #ddd; padding: 8px;">{comparative["sentiment"].get(model1_name, "N/A").title()}</td>
64
+ <td style="border: 1px solid #ddd; padding: 8px;">{comparative["sentiment"].get(model2_name, "N/A").title()}</td>
65
+ <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold; color: {'red' if sent_sig else 'green'}">{"Yes" if sent_sig else "No"}</td>
66
+ </tr>
67
+ """
68
+
69
+ # Partisan row
70
+ if "partisan" in comparative:
71
+ part_sig = comparative["partisan"].get("significant", False)
72
+ summary_html += f"""
73
+ <tr>
74
+ <td style="border: 1px solid #ddd; padding: 8px;">Partisan Leaning</td>
75
+ <td style="border: 1px solid #ddd; padding: 8px;">{comparative["partisan"].get(model1_name, "N/A").title()}</td>
76
+ <td style="border: 1px solid #ddd; padding: 8px;">{comparative["partisan"].get(model2_name, "N/A").title()}</td>
77
+ <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold; color: {'red' if part_sig else 'green'}">{"Yes" if part_sig else "No"}</td>
78
+ </tr>
79
+ """
80
+
81
+ # Framing row
82
+ if "framing" in comparative:
83
+ frame_diff = comparative["framing"].get("different_frames", False)
84
+ summary_html += f"""
85
+ <tr>
86
+ <td style="border: 1px solid #ddd; padding: 8px;">Dominant Frame</td>
87
+ <td style="border: 1px solid #ddd; padding: 8px;">{comparative["framing"].get(model1_name, "N/A").title().replace('_', ' ')}</td>
88
+ <td style="border: 1px solid #ddd; padding: 8px;">{comparative["framing"].get(model2_name, "N/A").title().replace('_', ' ')}</td>
89
+ <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold; color: {'red' if frame_diff else 'green'}">{"Yes" if frame_diff else "No"}</td>
90
+ </tr>
91
+ """
92
+
93
+ # Overall row
94
+ if "overall" in comparative:
95
+ overall_sig = comparative["overall"].get("significant_bias_difference", False)
96
+ summary_html += f"""
97
+ <tr>
98
+ <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold;">Overall Bias Difference</td>
99
+ <td colspan="2" style="border: 1px solid #ddd; padding: 8px; text-align: center;">{comparative["overall"].get("difference", 0):.2f} / 1.0</td>
100
+ <td style="border: 1px solid #ddd; padding: 8px; font-weight: bold; color: {'red' if overall_sig else 'green'}">{"Yes" if overall_sig else "No"}</td>
101
+ </tr>
102
+ """
103
+
104
+ summary_html += "</table>"
105
+
106
+ # Add the HTML table to the components
107
+ output_components.append(gr.HTML(summary_html))
108
+
109
+ # Create detailed visualizations for each model if available
110
+ for model_name in [model1_name, model2_name]:
111
+ if model_name in bias_results:
112
+ model_data = bias_results[model_name]
113
+
114
+ # Sentiment visualization
115
+ if "sentiment" in model_data:
116
+ sentiment = model_data["sentiment"]
117
+ if "sentiment_scores" in sentiment:
118
+ # Create sentiment score chart
119
+ sentiment_df = pd.DataFrame({
120
+ 'Score': [
121
+ sentiment["sentiment_scores"]["pos"],
122
+ sentiment["sentiment_scores"]["neg"],
123
+ sentiment["sentiment_scores"]["neu"]
124
+ ],
125
+ 'Category': ['Positive', 'Negative', 'Neutral']
126
+ })
127
+
128
+ fig = px.bar(
129
+ sentiment_df,
130
+ x='Category',
131
+ y='Score',
132
+ title=f"Sentiment Analysis for {model_name}",
133
+ height=300
134
+ )
135
+
136
+ output_components.append(gr.Plot(value=fig))
137
+
138
+ # Partisan leaning visualization
139
+ if "partisan" in model_data:
140
+ partisan = model_data["partisan"]
141
+ if "liberal_count" in partisan and "conservative_count" in partisan:
142
+ # Create partisan terms chart
143
+ partisan_df = pd.DataFrame({
144
+ 'Count': [partisan["liberal_count"], partisan["conservative_count"]],
145
+ 'Category': ['Liberal Terms', 'Conservative Terms']
146
+ })
147
+
148
+ fig = px.bar(
149
+ partisan_df,
150
+ x='Category',
151
+ y='Count',
152
+ title=f"Partisan Term Usage for {model_name}",
153
+ color='Category',
154
+ color_discrete_map={
155
+ 'Liberal Terms': 'blue',
156
+ 'Conservative Terms': 'red'
157
+ },
158
+ height=300
159
+ )
160
+
161
+ output_components.append(gr.Plot(value=fig))
162
+
163
+ # Show example partisan terms
164
+ if "liberal_terms" in partisan or "conservative_terms" in partisan:
165
+ lib_terms = ", ".join(partisan.get("liberal_terms", []))
166
+ con_terms = ", ".join(partisan.get("conservative_terms", []))
167
+
168
+ if lib_terms or con_terms:
169
+ terms_md = f"**Partisan Terms Used by {model_name}**\n\n"
170
+ if lib_terms:
171
+ terms_md += f"- Liberal terms: {lib_terms}\n"
172
+ if con_terms:
173
+ terms_md += f"- Conservative terms: {con_terms}\n"
174
+
175
+ output_components.append(gr.Markdown(terms_md))
176
+
177
+ # Framing visualization
178
+ if "framing" in model_data:
179
+ framing = model_data["framing"]
180
+ if "framing_distribution" in framing:
181
+ # Create framing distribution chart
182
+ frame_items = []
183
+ for frame, value in framing["framing_distribution"].items():
184
+ frame_items.append({
185
+ 'Frame': frame.replace('_', ' ').title(),
186
+ 'Proportion': value
187
+ })
188
+
189
+ frame_df = pd.DataFrame(frame_items)
190
+
191
+ fig = px.pie(
192
+ frame_df,
193
+ values='Proportion',
194
+ names='Frame',
195
+ title=f"Issue Framing Distribution for {model_name}",
196
+ height=400
197
+ )
198
+
199
+ output_components.append(gr.Plot(value=fig))
200
+
201
+ # Show example framing terms
202
+ if "framing_examples" in framing:
203
+ examples_md = f"**Example Framing Terms Used by {model_name}**\n\n"
204
+ for frame, examples in framing["framing_examples"].items():
205
+ if examples:
206
+ examples_md += f"- {frame.replace('_', ' ').title()}: {', '.join(examples)}\n"
207
+
208
+ output_components.append(gr.Markdown(examples_md))
209
 
210
+ # If no components were added, show a message
211
+ if len(output_components) <= 1:
212
+ output_components.append(gr.Markdown("No detailed bias detection analysis found in results."))
 
 
213
 
214
+ return output_components
 
 
 
 
 
215
 
216
+ def process_and_visualize_bias_analysis(analysis_results):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  """
218
+ Process the bias detection analysis results and create visualization components
219
 
220
+ Args:
221
+ analysis_results (dict): The analysis results
222
+
223
+ Returns:
224
+ list: List of gradio components for visualization
225
+ """
226
  try:
227
+ print(f"Starting visualization of bias detection analysis results")
228
+ return create_bias_visualization(analysis_results)
 
 
 
 
 
 
 
229
  except Exception as e:
230
+ import traceback
231
+ error_msg = f"Bias detection visualization error: {str(e)}\n{traceback.format_exc()}"
232
+ print(error_msg)
233
+ return [gr.Markdown(f"**Error during bias detection visualization:**\n\n```\n{error_msg}\n```")]
visualization/topic_visualizer.py CHANGED
@@ -1,6 +1,7 @@
1
  """
2
- Improved visualization for topic modeling analysis results
3
  """
 
4
  import gradio as gr
5
  import json
6
  import numpy as np
@@ -8,11 +9,6 @@ import pandas as pd
8
  import plotly.express as px
9
  import plotly.graph_objects as go
10
  from plotly.subplots import make_subplots
11
- import logging
12
-
13
- # Set up logging
14
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
15
- logger = logging.getLogger('topic_visualizer')
16
 
17
  def create_topic_visualization(analysis_results):
18
  """
@@ -29,137 +25,124 @@ def create_topic_visualization(analysis_results):
29
 
30
  # Check if we have valid results
31
  if not analysis_results or "analyses" not in analysis_results:
32
- logger.warning("No valid analysis results found")
33
  return [gr.Markdown("No analysis results found.")]
34
 
35
- try:
36
- # Process each prompt
37
- for prompt, analyses in analysis_results["analyses"].items():
38
- # Process Topic Modeling analysis if available
39
- if "topic_modeling" in analyses:
40
- topic_results = analyses["topic_modeling"]
41
-
42
- # Check for errors in the analysis
43
- if "error" in topic_results:
44
- error_msg = topic_results.get("error", "Unknown error in topic modeling")
45
- logger.warning(f"Topic modeling error: {error_msg}")
46
- output_components.append(gr.Markdown(f"**Error in topic modeling analysis:** {error_msg}"))
47
- continue
48
-
49
- # Show method and number of topics
50
- method = topic_results.get("method", "lda").upper()
51
- n_topics = topic_results.get("n_topics", 3)
52
- logger.info(f"Creating visualization for {method} with {n_topics} topics")
53
-
54
- # Get models being compared
55
- models = topic_results.get("models", [])
56
- if not models or len(models) < 2:
57
- logger.warning("Not enough models found in results")
58
- output_components.append(gr.Markdown("Topic modeling requires at least two models to compare."))
59
- continue
60
-
61
- output_components.append(gr.Markdown(f"### Topic Modeling Analysis ({method}, {n_topics} topics)"))
62
- output_components.append(gr.Markdown(f"Comparing responses from **{models[0]}** and **{models[1]}**"))
63
 
64
- # Visualize discovered topics
65
  topics = topic_results.get("topics", [])
66
  if topics:
67
- output_components.append(gr.Markdown("#### Discovered Topics"))
68
 
69
- # Display topics in a more readable format
70
- for i, topic in enumerate(topics):
71
- topic_id = topic.get("id", i)
72
  words = topic.get("words", [])
73
- if words:
74
- topic_words = ", ".join(words[:5]) # Show top 5 words
75
- output_components.append(gr.Markdown(f"**Topic {topic_id+1}**: {topic_words}"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  # Visualize topic distributions for each model
78
  model_topics = topic_results.get("model_topics", {})
79
  if model_topics and all(model in model_topics for model in models):
80
- output_components.append(gr.Markdown("#### Topic Distribution by Model"))
81
 
82
- # Display topic distributions in a readable format
 
83
  for model in models:
84
  if model in model_topics:
85
- dist = model_topics[model]
86
- # Format the distribution
87
- dist_str = ", ".join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist[:n_topics])])
88
- output_components.append(gr.Markdown(f"**{model}**: {dist_str}"))
 
 
89
 
90
- # Create multi-model topic distribution visualization
91
- try:
92
- # Prepare data for visualization
93
- model_data = []
94
- for model in models:
95
- if model in model_topics:
96
- dist = model_topics[model]
97
- for i, weight in enumerate(dist[:n_topics]):
98
- model_data.append({
99
- "Model": model,
100
- "Topic": f"Topic {i+1}",
101
- "Weight": weight
102
- })
103
-
104
- if model_data:
105
- df = pd.DataFrame(model_data)
106
-
107
- # Create grouped bar chart
108
- fig = px.bar(
109
- df,
110
- x="Topic",
111
- y="Weight",
112
- color="Model",
113
- title="Topic Distribution Comparison",
114
- barmode="group",
115
- height=400
116
- )
117
-
118
- fig.update_layout(
119
- xaxis_title="Topic",
120
- yaxis_title="Weight",
121
- legend_title="Model"
122
- )
123
-
124
- output_components.append(gr.Plot(value=fig))
125
- except Exception as e:
126
- logger.error(f"Error creating topic distribution plot: {str(e)}")
127
- output_components.append(gr.Markdown(f"*Error creating visualization: {str(e)}*"))
128
 
129
- # Display similarity metrics
130
  comparisons = topic_results.get("comparisons", {})
131
  if comparisons:
132
- output_components.append(gr.Markdown("#### Similarity Metrics"))
133
 
134
  for comparison_key, comparison_data in comparisons.items():
135
- js_div = comparison_data.get("js_divergence", 0)
136
-
137
- # Jensen-Shannon divergence interpretation
138
- similarity_text = ""
139
- if js_div < 0.2:
140
- similarity_text = "very similar"
141
- elif js_div < 0.4:
142
- similarity_text = "somewhat similar"
143
- elif js_div < 0.6:
144
- similarity_text = "moderately different"
145
- else:
146
- similarity_text = "very different"
147
 
148
  output_components.append(gr.Markdown(
149
- f"**Topic Distribution Divergence**: {js_div:.4f} - Topic distributions are {similarity_text}"
150
  ))
151
 
152
- # Explain what the metric means
153
- output_components.append(gr.Markdown(
154
- "*Lower divergence values indicate more similar topic distributions between models*"
155
- ))
156
-
157
- except Exception as e:
158
- logger.error(f"Error in create_topic_visualization: {str(e)}")
159
- output_components.append(gr.Markdown(f"**Error creating topic visualization:** {str(e)}"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  # If no components were added, show a message
162
- if len(output_components) == 0:
163
  output_components.append(gr.Markdown("No detailed Topic Modeling analysis found in results."))
164
 
165
  return output_components
@@ -176,23 +159,10 @@ def process_and_visualize_topic_analysis(analysis_results):
176
  list: List of gradio components for visualization
177
  """
178
  try:
179
- logger.info(f"Starting visualization of topic modeling analysis results")
180
- # Debug output - print the structure of analysis_results
181
- if "analyses" in analysis_results:
182
- for prompt, analyses in analysis_results["analyses"].items():
183
- if "topic_modeling" in analyses:
184
- topic_results = analyses["topic_modeling"]
185
- logger.info(f"Found topic_modeling results with keys: {topic_results.keys()}")
186
- if "models" in topic_results:
187
- logger.info(f"Models: {topic_results['models']}")
188
- if "topics" in topic_results:
189
- logger.info(f"Found {len(topic_results['topics'])} topics")
190
- if "model_topics" in topic_results:
191
- logger.info(f"Model_topics keys: {topic_results['model_topics'].keys()}")
192
-
193
  return create_topic_visualization(analysis_results)
194
  except Exception as e:
195
  import traceback
196
  error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
197
- logger.error(error_msg)
198
  return [gr.Markdown(f"**Error during topic modeling visualization:**\n\n```\n{error_msg}\n```")]
 
1
  """
2
+ Visualization for topic modeling analysis results
3
  """
4
+ from visualization.ngram_visualizer import create_ngram_visualization
5
  import gradio as gr
6
  import json
7
  import numpy as np
 
9
  import plotly.express as px
10
  import plotly.graph_objects as go
11
  from plotly.subplots import make_subplots
 
 
 
 
 
12
 
13
  def create_topic_visualization(analysis_results):
14
  """
 
25
 
26
  # Check if we have valid results
27
  if not analysis_results or "analyses" not in analysis_results:
 
28
  return [gr.Markdown("No analysis results found.")]
29
 
30
+ # Process each prompt
31
+ for prompt, analyses in analysis_results["analyses"].items():
32
+ # Process Topic Modeling analysis if available
33
+ if "topic_modeling" in analyses:
34
+ topic_results = analyses["topic_modeling"]
35
+
36
+ # Show method and number of topics
37
+ method = topic_results.get("method", "lda").upper()
38
+ n_topics = topic_results.get("n_topics", 3)
39
+ output_components.append(gr.Markdown(f"## Topic Modeling Analysis ({method}, {n_topics} topics)"))
40
+
41
+ # Show models being compared
42
+ models = topic_results.get("models", [])
43
+ if len(models) >= 2:
44
+ output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}"))
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ # Visualize topics
47
  topics = topic_results.get("topics", [])
48
  if topics:
49
+ output_components.append(gr.Markdown("### Discovered Topics"))
50
 
51
+ for topic in topics:
52
+ topic_id = topic.get("id", 0)
 
53
  words = topic.get("words", [])
54
+ weights = topic.get("weights", [])
55
+
56
+ # Create topic word bar chart
57
+ if words and weights and len(words) == len(weights):
58
+ # Create dataframe for plotting
59
+ df = pd.DataFrame({
60
+ 'word': words,
61
+ 'weight': weights
62
+ })
63
+
64
+ # Sort by weight
65
+ df = df.sort_values('weight', ascending=False)
66
+
67
+ # Create bar chart
68
+ fig = px.bar(
69
+ df, x='word', y='weight',
70
+ title=f"Topic {topic_id+1} Top Words",
71
+ labels={'word': 'Word', 'weight': 'Weight'},
72
+ height=300
73
+ )
74
+
75
+ output_components.append(gr.Plot(value=fig))
76
 
77
  # Visualize topic distributions for each model
78
  model_topics = topic_results.get("model_topics", {})
79
  if model_topics and all(model in model_topics for model in models):
80
+ output_components.append(gr.Markdown("### Topic Distribution by Model"))
81
 
82
+ # Create multi-model topic distribution comparison
83
+ fig = go.Figure()
84
  for model in models:
85
  if model in model_topics:
86
+ distribution = model_topics[model]
87
+ fig.add_trace(go.Bar(
88
+ x=[f"Topic {i+1}" for i in range(len(distribution))],
89
+ y=distribution,
90
+ name=model
91
+ ))
92
 
93
+ fig.update_layout(
94
+ title="Topic Distributions Comparison",
95
+ xaxis_title="Topic",
96
+ yaxis_title="Weight",
97
+ barmode='group',
98
+ height=400
99
+ )
100
+
101
+ output_components.append(gr.Plot(value=fig))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ # Visualize topic differences
104
  comparisons = topic_results.get("comparisons", {})
105
  if comparisons:
106
+ output_components.append(gr.Markdown("### Topic Distribution Differences"))
107
 
108
  for comparison_key, comparison_data in comparisons.items():
109
+ js_divergence = comparison_data.get("js_divergence", 0)
110
+ topic_differences = comparison_data.get("topic_differences", [])
 
 
 
 
 
 
 
 
 
 
111
 
112
  output_components.append(gr.Markdown(
113
+ f"**{comparison_key}** - Jensen-Shannon Divergence: {js_divergence:.4f}"
114
  ))
115
 
116
+ if topic_differences:
117
+ # Create DataFrame for plotting
118
+ model1, model2 = comparison_key.split(" vs ")
119
+ df_diff = pd.DataFrame(topic_differences)
120
+
121
+ # Create bar chart for topic differences
122
+ fig = go.Figure()
123
+ fig.add_trace(go.Bar(
124
+ x=[f"Topic {d['topic_id']+1}" for d in topic_differences],
125
+ y=[d["model1_weight"] for d in topic_differences],
126
+ name=model1
127
+ ))
128
+ fig.add_trace(go.Bar(
129
+ x=[f"Topic {d['topic_id']+1}" for d in topic_differences],
130
+ y=[d["model2_weight"] for d in topic_differences],
131
+ name=model2
132
+ ))
133
+
134
+ fig.update_layout(
135
+ title="Topic Weight Comparison",
136
+ xaxis_title="Topic",
137
+ yaxis_title="Weight",
138
+ barmode='group',
139
+ height=400
140
+ )
141
+
142
+ output_components.append(gr.Plot(value=fig))
143
 
144
  # If no components were added, show a message
145
+ if len(output_components) <= 1:
146
  output_components.append(gr.Markdown("No detailed Topic Modeling analysis found in results."))
147
 
148
  return output_components
 
159
  list: List of gradio components for visualization
160
  """
161
  try:
162
+ print(f"Starting visualization of topic modeling analysis results")
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  return create_topic_visualization(analysis_results)
164
  except Exception as e:
165
  import traceback
166
  error_msg = f"Topic modeling visualization error: {str(e)}\n{traceback.format_exc()}"
167
+ print(error_msg)
168
  return [gr.Markdown(f"**Error during topic modeling visualization:**\n\n```\n{error_msg}\n```")]
visualization_handler.py DELETED
@@ -1,130 +0,0 @@
1
- import gradio as gr
2
- import logging
3
-
4
- # Set up logging
5
- logger = logging.getLogger('gradio_app.visualization_handler')
6
-
7
-
8
- def create_visualization_components():
9
- """
10
- Creates all the visualization components used in the analysis tab
11
-
12
- Returns:
13
- list: A list of all gradio components for visualization
14
- """
15
- # Pre-create visualization components (initially hidden)
16
- visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
17
- analysis_title = gr.Markdown("## Analysis Results", visible=False)
18
- prompt_title = gr.Markdown(visible=False)
19
- models_compared = gr.Markdown(visible=False)
20
-
21
- # Container for model 1 words
22
- model1_title = gr.Markdown(visible=False)
23
- model1_words = gr.Markdown(visible=False)
24
-
25
- # Container for model 2 words
26
- model2_title = gr.Markdown(visible=False)
27
- model2_words = gr.Markdown(visible=False)
28
-
29
- # Similarity metrics
30
- similarity_metrics_title = gr.Markdown("### Similarity Metrics", visible=False)
31
- similarity_metrics = gr.Markdown(visible=False)
32
-
33
- # Status or error message area
34
- status_message_visible = gr.Checkbox(value=False, visible=False, label="Status Message Visible")
35
- status_message = gr.Markdown(visible=False)
36
-
37
- # Create bias visualization container (initially hidden)
38
- with gr.Column(visible=False) as bias_visualizations:
39
- gr.Markdown("### Bias Analysis Visualizations")
40
- bias_html = gr.HTML(visible=True) # Add this line for bias HTML content
41
-
42
- # Return all components as a list
43
- return [
44
- analysis_results_state := gr.State({}),
45
- analysis_output := gr.JSON(visible=False),
46
- visualization_area_visible,
47
- analysis_title,
48
- prompt_title,
49
- models_compared,
50
- model1_title,
51
- model1_words,
52
- model2_title,
53
- model2_words,
54
- similarity_metrics_title,
55
- similarity_metrics,
56
- status_message_visible,
57
- status_message,
58
- bias_visualizations,
59
- bias_html # Add this line to include in the returned components
60
- ]
61
-
62
-
63
- def create_empty_visualization_response():
64
- """
65
- Creates an empty visualization response with the correct structure.
66
- This is useful for error handling and default responses.
67
-
68
- Returns:
69
- list: A list with empty values for all visualization components
70
- """
71
- return [
72
- {}, # analysis_results_state
73
- {}, # analysis_output
74
- False, # visualization_area_visible
75
- "", # analysis_title
76
- "", # prompt_title
77
- "", # models_compared
78
- "", # model1_title
79
- "", # model1_words
80
- "", # model2_title
81
- "", # model2_words
82
- "", # similarity_metrics_title
83
- "", # similarity_metrics
84
- False, # status_message_visible
85
- "", # status_message
86
- gr.update(visible=False), # bias_visualizations
87
- "" # bias_html - Add this line
88
- ]
89
-
90
-
91
- def process_and_visualize_bias_analysis(analysis_results):
92
- """
93
- Wrapper for bias visualization function from visualization.bias_visualizer
94
-
95
- Args:
96
- analysis_results (dict): The analysis results
97
-
98
- Returns:
99
- list: Components for bias visualization
100
- """
101
- from visualization.bias_visualizer import process_and_visualize_bias_analysis
102
- return process_and_visualize_bias_analysis(analysis_results)
103
-
104
-
105
- def process_and_visualize_ngram_analysis(analysis_results):
106
- """
107
- Wrapper for n-gram visualization function from visualization.ngram_visualizer
108
-
109
- Args:
110
- analysis_results (dict): The analysis results
111
-
112
- Returns:
113
- list: Components for n-gram visualization
114
- """
115
- from visualization.ngram_visualizer import process_and_visualize_ngram_analysis
116
- return process_and_visualize_ngram_analysis(analysis_results)
117
-
118
-
119
- def process_and_visualize_topic_analysis(analysis_results):
120
- """
121
- Wrapper for topic modeling visualization function from visualization.topic_visualizer
122
-
123
- Args:
124
- analysis_results (dict): The analysis results
125
-
126
- Returns:
127
- list: Components for topic visualization
128
- """
129
- from visualization.topic_visualizer import process_and_visualize_topic_analysis
130
- return process_and_visualize_topic_analysis(analysis_results)