Ryan commited on
Commit
769095a
·
1 Parent(s): 0071ad3
Files changed (3) hide show
  1. README.md +17 -0
  2. app.py +88 -43
  3. requirements.txt +1 -0
README.md CHANGED
@@ -47,6 +47,22 @@ Once you have loaded a dataset, you now have four options:
47
  - Bias Detection
48
  - Classifier
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ## RoBERTa Sentiment
51
 
52
 
@@ -212,6 +228,7 @@ Limitations:
212
  ## Bias Detection
213
 
214
 
 
215
  # Contributions
216
 
217
 
 
47
  - Bias Detection
48
  - Classifier
49
 
50
+ ### Bag of Words
51
+
52
+
53
+
54
+ ### N-grams
55
+
56
+
57
+
58
+ ### Bias Detection
59
+
60
+
61
+
62
+ ### Classifier
63
+
64
+
65
+
66
  ## RoBERTa Sentiment
67
 
68
 
 
228
  ## Bias Detection
229
 
230
 
231
+
232
  # Contributions
233
 
234
 
app.py CHANGED
@@ -11,6 +11,7 @@ import matplotlib.pyplot as plt
11
  import io
12
  import base64
13
  import datetime
 
14
 
15
  # Download necessary NLTK resources function remains unchanged
16
  def download_nltk_resources():
@@ -135,12 +136,11 @@ def create_app():
135
  status_message = gr.Markdown(visible=False)
136
 
137
  # Define a helper function to extract parameter values and run the analysis
138
- def run_analysis(dataset, selected_analysis, ngram_n, topic_count, existing_log):
139
  try:
140
  if not dataset or "entries" not in dataset or not dataset["entries"]:
141
  return (
142
  {}, # analysis_results_state
143
- existing_log, # no changes to user_analysis_log
144
  False, # analysis_output visibility
145
  False, # visualization_area_visible
146
  gr.update(visible=False), # analysis_title
@@ -169,44 +169,10 @@ def create_app():
169
  # Process the analysis request - passing selected_analysis as a string
170
  analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
171
 
172
- # NEW: Store the results in the user_analysis_log
173
- updated_log = existing_log.copy() if existing_log else {}
174
-
175
- # Get the prompt text for identifying this analysis
176
- prompt_text = None
177
- if analysis_results and "analyses" in analysis_results:
178
- prompt_text = list(analysis_results["analyses"].keys())[0] if analysis_results["analyses"] else None
179
-
180
- if prompt_text:
181
- # Initialize this prompt in the log if it doesn't exist
182
- if prompt_text not in updated_log:
183
- updated_log[prompt_text] = {}
184
-
185
- # Store the results for this analysis type
186
- if selected_analysis in ["Bag of Words", "N-gram Analysis", "Bias Detection", "Classifier"]:
187
- # Only store if the analysis was actually performed and has results
188
- analyses = analysis_results["analyses"][prompt_text]
189
-
190
- # Map the selected analysis to its key in the analyses dict
191
- analysis_key_map = {
192
- "Bag of Words": "bag_of_words",
193
- "N-gram Analysis": "ngram_analysis",
194
- "Bias Detection": "bias_detection",
195
- "Classifier": "classifier"
196
- }
197
-
198
- if analysis_key_map[selected_analysis] in analyses:
199
- # Store the specific analysis result
200
- updated_log[prompt_text][selected_analysis] = {
201
- "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
202
- "result": analyses[analysis_key_map[selected_analysis]]
203
- }
204
-
205
  # If there's an error or no results
206
  if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
207
  return (
208
  analysis_results,
209
- updated_log, # Return the updated log
210
  False,
211
  False,
212
  gr.update(visible=False),
@@ -251,7 +217,6 @@ def create_app():
251
  if "message" in analyses:
252
  return (
253
  analysis_results,
254
- updated_log, # Return the updated log
255
  False,
256
  False,
257
  gr.update(visible=False),
@@ -349,7 +314,13 @@ def create_app():
349
  model1_title_visible = True
350
  model1_title_value = f"#### Top {size_name} Used by {model1_name}"
351
 
352
- ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in important_ngrams[model1_name][:10]]
 
 
 
 
 
 
353
  model1_words_visible = True
354
  model1_words_value = ", ".join(ngram_list)
355
 
@@ -357,7 +328,13 @@ def create_app():
357
  model2_title_visible = True
358
  model2_title_value = f"#### Top {size_name} Used by {model2_name}"
359
 
360
- ngram_list = [f"**{item['ngram']}** ({item['count']})" for item in important_ngrams[model2_name][:10]]
 
 
 
 
 
 
361
  model2_words_visible = True
362
  model2_words_value = ", ".join(ngram_list)
363
 
@@ -374,6 +351,78 @@ def create_app():
374
  similarity_metrics_value = f"""
375
  - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
376
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
  # Check for Topic Modeling analysis
379
  elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
@@ -575,7 +624,6 @@ def create_app():
575
  if not visualization_area_visible:
576
  return (
577
  analysis_results,
578
- updated_log, # Return the updated log
579
  False,
580
  False,
581
  gr.update(visible=False),
@@ -586,7 +634,6 @@ def create_app():
586
  gr.update(visible=False),
587
  gr.update(visible=False),
588
  gr.update(visible=False),
589
- gr.update(visible=False),
590
  True, # status_message_visible
591
  gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
592
  )
@@ -594,7 +641,6 @@ def create_app():
594
  # Return all updated component values
595
  return (
596
  analysis_results, # analysis_results_state
597
- updated_log, # Return the updated log
598
  False, # analysis_output visibility
599
  True, # visualization_area_visible
600
  gr.update(visible=True), # analysis_title
@@ -617,7 +663,6 @@ def create_app():
617
 
618
  return (
619
  {"error": error_msg}, # analysis_results_state
620
- existing_log, # Return unchanged log
621
  True, # analysis_output visibility (show raw JSON for debugging)
622
  False, # visualization_area_visible
623
  gr.update(visible=False),
 
11
  import io
12
  import base64
13
  import datetime
14
+ from PIL import Image
15
 
16
  # Download necessary NLTK resources function remains unchanged
17
  def download_nltk_resources():
 
136
  status_message = gr.Markdown(visible=False)
137
 
138
  # Define a helper function to extract parameter values and run the analysis
139
+ def run_analysis(dataset, selected_analysis, ngram_n, topic_count):
140
  try:
141
  if not dataset or "entries" not in dataset or not dataset["entries"]:
142
  return (
143
  {}, # analysis_results_state
 
144
  False, # analysis_output visibility
145
  False, # visualization_area_visible
146
  gr.update(visible=False), # analysis_title
 
169
  # Process the analysis request - passing selected_analysis as a string
170
  analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  # If there's an error or no results
173
  if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
174
  return (
175
  analysis_results,
 
176
  False,
177
  False,
178
  gr.update(visible=False),
 
217
  if "message" in analyses:
218
  return (
219
  analysis_results,
 
220
  False,
221
  False,
222
  gr.update(visible=False),
 
314
  model1_title_visible = True
315
  model1_title_value = f"#### Top {size_name} Used by {model1_name}"
316
 
317
+ # Create a better formatted list of n-grams
318
+ ngram_list = []
319
+ for item in important_ngrams[model1_name][:10]:
320
+ ngram_text = item['ngram']
321
+ ngram_count = item['count']
322
+ ngram_list.append(f"**{ngram_text}** ({ngram_count})")
323
+
324
  model1_words_visible = True
325
  model1_words_value = ", ".join(ngram_list)
326
 
 
328
  model2_title_visible = True
329
  model2_title_value = f"#### Top {size_name} Used by {model2_name}"
330
 
331
+ # Create a better formatted list of n-grams
332
+ ngram_list = []
333
+ for item in important_ngrams[model2_name][:10]:
334
+ ngram_text = item['ngram']
335
+ ngram_count = item['count']
336
+ ngram_list.append(f"**{ngram_text}** ({ngram_count})")
337
+
338
  model2_words_visible = True
339
  model2_words_value = ", ".join(ngram_list)
340
 
 
351
  similarity_metrics_value = f"""
352
  - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
353
  """
354
+
355
+ # Create visualization using matplotlib (similar to Visuals tab)
356
+ import matplotlib.pyplot as plt
357
+ import io
358
+ from PIL import Image
359
+
360
+ # Create a new function to generate N-gram visualizations
361
+ def generate_ngram_visualization(important_ngrams, model1_name, model2_name):
362
+ plt.figure(figsize=(12, 6))
363
+
364
+ # Process data for model 1
365
+ model1_data = {}
366
+ if model1_name in important_ngrams:
367
+ for item in important_ngrams[model1_name][:10]:
368
+ model1_data[item['ngram']] = item['count']
369
+
370
+ # Process data for model 2
371
+ model2_data = {}
372
+ if model2_name in important_ngrams:
373
+ for item in important_ngrams[model2_name][:10]:
374
+ model2_data[item['ngram']] = item['count']
375
+
376
+ # Plot for the first model
377
+ plt.subplot(1, 2, 1)
378
+ sorted_data1 = sorted(model1_data.items(), key=lambda x: x[1], reverse=True)[:10]
379
+ terms1, counts1 = zip(*sorted_data1) if sorted_data1 else ([], [])
380
+
381
+ # Create horizontal bar chart
382
+ plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms1[::-1]], counts1[::-1])
383
+ plt.xlabel('Frequency')
384
+ plt.title(f'Top {size_name} Used by {model1_name}')
385
+ plt.tight_layout()
386
+
387
+ # Plot for the second model
388
+ plt.subplot(1, 2, 2)
389
+ sorted_data2 = sorted(model2_data.items(), key=lambda x: x[1], reverse=True)[:10]
390
+ terms2, counts2 = zip(*sorted_data2) if sorted_data2 else ([], [])
391
+
392
+ # Create horizontal bar chart
393
+ plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms2[::-1]], counts2[::-1])
394
+ plt.xlabel('Frequency')
395
+ plt.title(f'Top {size_name} Used by {model2_name}')
396
+ plt.tight_layout()
397
+
398
+ # Save the plot to a bytes buffer
399
+ buf = io.BytesIO()
400
+ plt.savefig(buf, format='png', dpi=100)
401
+ buf.seek(0)
402
+
403
+ # Convert to PIL Image
404
+ image = Image.open(buf)
405
+ return image
406
+
407
+ # Create the visualization
408
+ try:
409
+ viz_image = generate_ngram_visualization(important_ngrams, model1_name, model2_name)
410
+
411
+ # Convert the image to a base64 string for embedding
412
+ buffered = io.BytesIO()
413
+ viz_image.save(buffered, format="PNG")
414
+ img_str = base64.b64encode(buffered.getvalue()).decode()
415
+
416
+ # Append the image to the metrics_value
417
+ similarity_metrics_value += f"""
418
+ <div style="margin-top: 20px;">
419
+ <img src="data:image/png;base64,{img_str}" alt="N-gram visualization" style="max-width: 100%;">
420
+ </div>
421
+ """
422
+ similarity_metrics_visible = True
423
+ except Exception as viz_error:
424
+ print(f"Visualization error: {viz_error}")
425
+ # Handle the error gracefully - continue without the visualization
426
 
427
  # Check for Topic Modeling analysis
428
  elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
 
624
  if not visualization_area_visible:
625
  return (
626
  analysis_results,
 
627
  False,
628
  False,
629
  gr.update(visible=False),
 
634
  gr.update(visible=False),
635
  gr.update(visible=False),
636
  gr.update(visible=False),
 
637
  True, # status_message_visible
638
  gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
639
  )
 
641
  # Return all updated component values
642
  return (
643
  analysis_results, # analysis_results_state
 
644
  False, # analysis_output visibility
645
  True, # visualization_area_visible
646
  gr.update(visible=True), # analysis_title
 
663
 
664
  return (
665
  {"error": error_msg}, # analysis_results_state
 
666
  True, # analysis_output visibility (show raw JSON for debugging)
667
  False, # visualization_area_visible
668
  gr.update(visible=False),
requirements.txt CHANGED
@@ -7,3 +7,4 @@ plotly>=5.3.0
7
  matplotlib>=3.4.0
8
  transformers>=4.15.0
9
  torch>=1.9.0
 
 
7
  matplotlib>=3.4.0
8
  transformers>=4.15.0
9
  torch>=1.9.0
10
+ pillow>=9.0.0