Spaces:
Build error
Build error
Ryan
commited on
Commit
·
769095a
1
Parent(s):
0071ad3
update
Browse files- README.md +17 -0
- app.py +88 -43
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -47,6 +47,22 @@ Once you have loaded a dataset, you now have four options:
|
|
| 47 |
- Bias Detection
|
| 48 |
- Classifier
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
## RoBERTa Sentiment
|
| 51 |
|
| 52 |
|
|
@@ -212,6 +228,7 @@ Limitations:
|
|
| 212 |
## Bias Detection
|
| 213 |
|
| 214 |
|
|
|
|
| 215 |
# Contributions
|
| 216 |
|
| 217 |
|
|
|
|
| 47 |
- Bias Detection
|
| 48 |
- Classifier
|
| 49 |
|
| 50 |
+
### Bag of Words
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
### N-grams
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
### Bias Detection
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
### Classifier
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
## RoBERTa Sentiment
|
| 67 |
|
| 68 |
|
|
|
|
| 228 |
## Bias Detection
|
| 229 |
|
| 230 |
|
| 231 |
+
|
| 232 |
# Contributions
|
| 233 |
|
| 234 |
|
app.py
CHANGED
|
@@ -11,6 +11,7 @@ import matplotlib.pyplot as plt
|
|
| 11 |
import io
|
| 12 |
import base64
|
| 13 |
import datetime
|
|
|
|
| 14 |
|
| 15 |
# Download necessary NLTK resources function remains unchanged
|
| 16 |
def download_nltk_resources():
|
|
@@ -135,12 +136,11 @@ def create_app():
|
|
| 135 |
status_message = gr.Markdown(visible=False)
|
| 136 |
|
| 137 |
# Define a helper function to extract parameter values and run the analysis
|
| 138 |
-
def run_analysis(dataset, selected_analysis, ngram_n, topic_count
|
| 139 |
try:
|
| 140 |
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
| 141 |
return (
|
| 142 |
{}, # analysis_results_state
|
| 143 |
-
existing_log, # no changes to user_analysis_log
|
| 144 |
False, # analysis_output visibility
|
| 145 |
False, # visualization_area_visible
|
| 146 |
gr.update(visible=False), # analysis_title
|
|
@@ -169,44 +169,10 @@ def create_app():
|
|
| 169 |
# Process the analysis request - passing selected_analysis as a string
|
| 170 |
analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
|
| 171 |
|
| 172 |
-
# NEW: Store the results in the user_analysis_log
|
| 173 |
-
updated_log = existing_log.copy() if existing_log else {}
|
| 174 |
-
|
| 175 |
-
# Get the prompt text for identifying this analysis
|
| 176 |
-
prompt_text = None
|
| 177 |
-
if analysis_results and "analyses" in analysis_results:
|
| 178 |
-
prompt_text = list(analysis_results["analyses"].keys())[0] if analysis_results["analyses"] else None
|
| 179 |
-
|
| 180 |
-
if prompt_text:
|
| 181 |
-
# Initialize this prompt in the log if it doesn't exist
|
| 182 |
-
if prompt_text not in updated_log:
|
| 183 |
-
updated_log[prompt_text] = {}
|
| 184 |
-
|
| 185 |
-
# Store the results for this analysis type
|
| 186 |
-
if selected_analysis in ["Bag of Words", "N-gram Analysis", "Bias Detection", "Classifier"]:
|
| 187 |
-
# Only store if the analysis was actually performed and has results
|
| 188 |
-
analyses = analysis_results["analyses"][prompt_text]
|
| 189 |
-
|
| 190 |
-
# Map the selected analysis to its key in the analyses dict
|
| 191 |
-
analysis_key_map = {
|
| 192 |
-
"Bag of Words": "bag_of_words",
|
| 193 |
-
"N-gram Analysis": "ngram_analysis",
|
| 194 |
-
"Bias Detection": "bias_detection",
|
| 195 |
-
"Classifier": "classifier"
|
| 196 |
-
}
|
| 197 |
-
|
| 198 |
-
if analysis_key_map[selected_analysis] in analyses:
|
| 199 |
-
# Store the specific analysis result
|
| 200 |
-
updated_log[prompt_text][selected_analysis] = {
|
| 201 |
-
"timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 202 |
-
"result": analyses[analysis_key_map[selected_analysis]]
|
| 203 |
-
}
|
| 204 |
-
|
| 205 |
# If there's an error or no results
|
| 206 |
if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
|
| 207 |
return (
|
| 208 |
analysis_results,
|
| 209 |
-
updated_log, # Return the updated log
|
| 210 |
False,
|
| 211 |
False,
|
| 212 |
gr.update(visible=False),
|
|
@@ -251,7 +217,6 @@ def create_app():
|
|
| 251 |
if "message" in analyses:
|
| 252 |
return (
|
| 253 |
analysis_results,
|
| 254 |
-
updated_log, # Return the updated log
|
| 255 |
False,
|
| 256 |
False,
|
| 257 |
gr.update(visible=False),
|
|
@@ -349,7 +314,13 @@ def create_app():
|
|
| 349 |
model1_title_visible = True
|
| 350 |
model1_title_value = f"#### Top {size_name} Used by {model1_name}"
|
| 351 |
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
model1_words_visible = True
|
| 354 |
model1_words_value = ", ".join(ngram_list)
|
| 355 |
|
|
@@ -357,7 +328,13 @@ def create_app():
|
|
| 357 |
model2_title_visible = True
|
| 358 |
model2_title_value = f"#### Top {size_name} Used by {model2_name}"
|
| 359 |
|
| 360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
model2_words_visible = True
|
| 362 |
model2_words_value = ", ".join(ngram_list)
|
| 363 |
|
|
@@ -374,6 +351,78 @@ def create_app():
|
|
| 374 |
similarity_metrics_value = f"""
|
| 375 |
- **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
|
| 376 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
# Check for Topic Modeling analysis
|
| 379 |
elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
|
|
@@ -575,7 +624,6 @@ def create_app():
|
|
| 575 |
if not visualization_area_visible:
|
| 576 |
return (
|
| 577 |
analysis_results,
|
| 578 |
-
updated_log, # Return the updated log
|
| 579 |
False,
|
| 580 |
False,
|
| 581 |
gr.update(visible=False),
|
|
@@ -586,7 +634,6 @@ def create_app():
|
|
| 586 |
gr.update(visible=False),
|
| 587 |
gr.update(visible=False),
|
| 588 |
gr.update(visible=False),
|
| 589 |
-
gr.update(visible=False),
|
| 590 |
True, # status_message_visible
|
| 591 |
gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
|
| 592 |
)
|
|
@@ -594,7 +641,6 @@ def create_app():
|
|
| 594 |
# Return all updated component values
|
| 595 |
return (
|
| 596 |
analysis_results, # analysis_results_state
|
| 597 |
-
updated_log, # Return the updated log
|
| 598 |
False, # analysis_output visibility
|
| 599 |
True, # visualization_area_visible
|
| 600 |
gr.update(visible=True), # analysis_title
|
|
@@ -617,7 +663,6 @@ def create_app():
|
|
| 617 |
|
| 618 |
return (
|
| 619 |
{"error": error_msg}, # analysis_results_state
|
| 620 |
-
existing_log, # Return unchanged log
|
| 621 |
True, # analysis_output visibility (show raw JSON for debugging)
|
| 622 |
False, # visualization_area_visible
|
| 623 |
gr.update(visible=False),
|
|
|
|
| 11 |
import io
|
| 12 |
import base64
|
| 13 |
import datetime
|
| 14 |
+
from PIL import Image
|
| 15 |
|
| 16 |
# Download necessary NLTK resources function remains unchanged
|
| 17 |
def download_nltk_resources():
|
|
|
|
| 136 |
status_message = gr.Markdown(visible=False)
|
| 137 |
|
| 138 |
# Define a helper function to extract parameter values and run the analysis
|
| 139 |
+
def run_analysis(dataset, selected_analysis, ngram_n, topic_count):
|
| 140 |
try:
|
| 141 |
if not dataset or "entries" not in dataset or not dataset["entries"]:
|
| 142 |
return (
|
| 143 |
{}, # analysis_results_state
|
|
|
|
| 144 |
False, # analysis_output visibility
|
| 145 |
False, # visualization_area_visible
|
| 146 |
gr.update(visible=False), # analysis_title
|
|
|
|
| 169 |
# Process the analysis request - passing selected_analysis as a string
|
| 170 |
analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
# If there's an error or no results
|
| 173 |
if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
|
| 174 |
return (
|
| 175 |
analysis_results,
|
|
|
|
| 176 |
False,
|
| 177 |
False,
|
| 178 |
gr.update(visible=False),
|
|
|
|
| 217 |
if "message" in analyses:
|
| 218 |
return (
|
| 219 |
analysis_results,
|
|
|
|
| 220 |
False,
|
| 221 |
False,
|
| 222 |
gr.update(visible=False),
|
|
|
|
| 314 |
model1_title_visible = True
|
| 315 |
model1_title_value = f"#### Top {size_name} Used by {model1_name}"
|
| 316 |
|
| 317 |
+
# Create a better formatted list of n-grams
|
| 318 |
+
ngram_list = []
|
| 319 |
+
for item in important_ngrams[model1_name][:10]:
|
| 320 |
+
ngram_text = item['ngram']
|
| 321 |
+
ngram_count = item['count']
|
| 322 |
+
ngram_list.append(f"**{ngram_text}** ({ngram_count})")
|
| 323 |
+
|
| 324 |
model1_words_visible = True
|
| 325 |
model1_words_value = ", ".join(ngram_list)
|
| 326 |
|
|
|
|
| 328 |
model2_title_visible = True
|
| 329 |
model2_title_value = f"#### Top {size_name} Used by {model2_name}"
|
| 330 |
|
| 331 |
+
# Create a better formatted list of n-grams
|
| 332 |
+
ngram_list = []
|
| 333 |
+
for item in important_ngrams[model2_name][:10]:
|
| 334 |
+
ngram_text = item['ngram']
|
| 335 |
+
ngram_count = item['count']
|
| 336 |
+
ngram_list.append(f"**{ngram_text}** ({ngram_count})")
|
| 337 |
+
|
| 338 |
model2_words_visible = True
|
| 339 |
model2_words_value = ", ".join(ngram_list)
|
| 340 |
|
|
|
|
| 351 |
similarity_metrics_value = f"""
|
| 352 |
- **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
|
| 353 |
"""
|
| 354 |
+
|
| 355 |
+
# Create visualization using matplotlib (similar to Visuals tab)
|
| 356 |
+
import matplotlib.pyplot as plt
|
| 357 |
+
import io
|
| 358 |
+
from PIL import Image
|
| 359 |
+
|
| 360 |
+
# Create a new function to generate N-gram visualizations
|
| 361 |
+
def generate_ngram_visualization(important_ngrams, model1_name, model2_name):
|
| 362 |
+
plt.figure(figsize=(12, 6))
|
| 363 |
+
|
| 364 |
+
# Process data for model 1
|
| 365 |
+
model1_data = {}
|
| 366 |
+
if model1_name in important_ngrams:
|
| 367 |
+
for item in important_ngrams[model1_name][:10]:
|
| 368 |
+
model1_data[item['ngram']] = item['count']
|
| 369 |
+
|
| 370 |
+
# Process data for model 2
|
| 371 |
+
model2_data = {}
|
| 372 |
+
if model2_name in important_ngrams:
|
| 373 |
+
for item in important_ngrams[model2_name][:10]:
|
| 374 |
+
model2_data[item['ngram']] = item['count']
|
| 375 |
+
|
| 376 |
+
# Plot for the first model
|
| 377 |
+
plt.subplot(1, 2, 1)
|
| 378 |
+
sorted_data1 = sorted(model1_data.items(), key=lambda x: x[1], reverse=True)[:10]
|
| 379 |
+
terms1, counts1 = zip(*sorted_data1) if sorted_data1 else ([], [])
|
| 380 |
+
|
| 381 |
+
# Create horizontal bar chart
|
| 382 |
+
plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms1[::-1]], counts1[::-1])
|
| 383 |
+
plt.xlabel('Frequency')
|
| 384 |
+
plt.title(f'Top {size_name} Used by {model1_name}')
|
| 385 |
+
plt.tight_layout()
|
| 386 |
+
|
| 387 |
+
# Plot for the second model
|
| 388 |
+
plt.subplot(1, 2, 2)
|
| 389 |
+
sorted_data2 = sorted(model2_data.items(), key=lambda x: x[1], reverse=True)[:10]
|
| 390 |
+
terms2, counts2 = zip(*sorted_data2) if sorted_data2 else ([], [])
|
| 391 |
+
|
| 392 |
+
# Create horizontal bar chart
|
| 393 |
+
plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms2[::-1]], counts2[::-1])
|
| 394 |
+
plt.xlabel('Frequency')
|
| 395 |
+
plt.title(f'Top {size_name} Used by {model2_name}')
|
| 396 |
+
plt.tight_layout()
|
| 397 |
+
|
| 398 |
+
# Save the plot to a bytes buffer
|
| 399 |
+
buf = io.BytesIO()
|
| 400 |
+
plt.savefig(buf, format='png', dpi=100)
|
| 401 |
+
buf.seek(0)
|
| 402 |
+
|
| 403 |
+
# Convert to PIL Image
|
| 404 |
+
image = Image.open(buf)
|
| 405 |
+
return image
|
| 406 |
+
|
| 407 |
+
# Create the visualization
|
| 408 |
+
try:
|
| 409 |
+
viz_image = generate_ngram_visualization(important_ngrams, model1_name, model2_name)
|
| 410 |
+
|
| 411 |
+
# Convert the image to a base64 string for embedding
|
| 412 |
+
buffered = io.BytesIO()
|
| 413 |
+
viz_image.save(buffered, format="PNG")
|
| 414 |
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
| 415 |
+
|
| 416 |
+
# Append the image to the metrics_value
|
| 417 |
+
similarity_metrics_value += f"""
|
| 418 |
+
<div style="margin-top: 20px;">
|
| 419 |
+
<img src="data:image/png;base64,{img_str}" alt="N-gram visualization" style="max-width: 100%;">
|
| 420 |
+
</div>
|
| 421 |
+
"""
|
| 422 |
+
similarity_metrics_visible = True
|
| 423 |
+
except Exception as viz_error:
|
| 424 |
+
print(f"Visualization error: {viz_error}")
|
| 425 |
+
# Handle the error gracefully - continue without the visualization
|
| 426 |
|
| 427 |
# Check for Topic Modeling analysis
|
| 428 |
elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
|
|
|
|
| 624 |
if not visualization_area_visible:
|
| 625 |
return (
|
| 626 |
analysis_results,
|
|
|
|
| 627 |
False,
|
| 628 |
False,
|
| 629 |
gr.update(visible=False),
|
|
|
|
| 634 |
gr.update(visible=False),
|
| 635 |
gr.update(visible=False),
|
| 636 |
gr.update(visible=False),
|
|
|
|
| 637 |
True, # status_message_visible
|
| 638 |
gr.update(visible=True, value="❌ **No visualization data found.** Make sure to select a valid analysis option.")
|
| 639 |
)
|
|
|
|
| 641 |
# Return all updated component values
|
| 642 |
return (
|
| 643 |
analysis_results, # analysis_results_state
|
|
|
|
| 644 |
False, # analysis_output visibility
|
| 645 |
True, # visualization_area_visible
|
| 646 |
gr.update(visible=True), # analysis_title
|
|
|
|
| 663 |
|
| 664 |
return (
|
| 665 |
{"error": error_msg}, # analysis_results_state
|
|
|
|
| 666 |
True, # analysis_output visibility (show raw JSON for debugging)
|
| 667 |
False, # visualization_area_visible
|
| 668 |
gr.update(visible=False),
|
requirements.txt
CHANGED
|
@@ -7,3 +7,4 @@ plotly>=5.3.0
|
|
| 7 |
matplotlib>=3.4.0
|
| 8 |
transformers>=4.15.0
|
| 9 |
torch>=1.9.0
|
|
|
|
|
|
| 7 |
matplotlib>=3.4.0
|
| 8 |
transformers>=4.15.0
|
| 9 |
torch>=1.9.0
|
| 10 |
+
pillow>=9.0.0
|