Spaces:
Running
Running
Merge pull request #1 from Christina-A-Pan/hf-refactor
Browse files- audit_utils.py +90 -0
- server.py +22 -0
audit_utils.py
CHANGED
|
@@ -23,6 +23,7 @@ import time
|
|
| 23 |
from sentence_transformers import SentenceTransformer, util
|
| 24 |
import torch
|
| 25 |
from bertopic import BERTopic
|
|
|
|
| 26 |
|
| 27 |
########################################
|
| 28 |
# PRE-LOADING
|
|
@@ -314,6 +315,95 @@ def get_grp_model_labels(n_label_per_bin, score_bins, grp_ids):
|
|
| 314 |
|
| 315 |
return ratings_grp
|
| 316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
########################################
|
| 318 |
# GET_PERSONALIZED_MODEL utils
|
| 319 |
def fetch_existing_data(user, model_name):
|
|
|
|
| 23 |
from sentence_transformers import SentenceTransformer, util
|
| 24 |
import torch
|
| 25 |
from bertopic import BERTopic
|
| 26 |
+
from datetime import date
|
| 27 |
|
| 28 |
########################################
|
| 29 |
# PRE-LOADING
|
|
|
|
| 315 |
|
| 316 |
return ratings_grp
|
| 317 |
|
| 318 |
+
########################################
|
| 319 |
+
# SAVE_REPORT utils
|
| 320 |
+
# Convert indielabel json to AVID json format.
|
| 321 |
+
# See the AVID format in https://avidml.org/avidtools/reference/report
|
| 322 |
+
#
|
| 323 |
+
# Important mappings:
|
| 324 |
+
# IndieLabel Attribute AVID Attribute Example
|
| 325 |
+
# text_entry description "I think the Perspective API
|
| 326 |
+
# is too sensitive. Here are some examples."
|
| 327 |
+
# topic feature 0_shes_woman_lady_face
|
| 328 |
+
# persp_score model_score 0.94
|
| 329 |
+
# comment ori_input "She looks beautiful"
|
| 330 |
+
# user_rating personal_model_score 0.92
|
| 331 |
+
# user_decision user_decision "Non-toxic"
|
| 332 |
+
# Note that this is at the individual report level.
|
| 333 |
+
def convert_indie_label_json_to_avid_json(indie_label_json):
|
| 334 |
+
|
| 335 |
+
# Setting up the structure with a dict to enable programmatic additions
|
| 336 |
+
avid_json_dict = {
|
| 337 |
+
"data_type": "AVID",
|
| 338 |
+
"data_version": None,
|
| 339 |
+
"metadata": None,
|
| 340 |
+
"affects": {
|
| 341 |
+
"developer": [],
|
| 342 |
+
"deployer": [
|
| 343 |
+
"Hugging Face"
|
| 344 |
+
],
|
| 345 |
+
# TODO: Make artifacts malleable during modularity work
|
| 346 |
+
"artifacts": [
|
| 347 |
+
{
|
| 348 |
+
"type": "Model",
|
| 349 |
+
"name": "Perspective API"
|
| 350 |
+
}
|
| 351 |
+
]
|
| 352 |
+
},
|
| 353 |
+
"problemtype": {
|
| 354 |
+
"classof": "Undefined", # I don't think any of the other ClassEnums are applicable. Link: https://avidml.org/avidtools/_modules/avidtools/datamodels/enums#ClassEnum
|
| 355 |
+
"type": "Detection",
|
| 356 |
+
"description": {
|
| 357 |
+
"lang": "eng", # TODO: Make language selectable
|
| 358 |
+
"value": "This report contains results from an end user audit conducted on Hugging Face."
|
| 359 |
+
}
|
| 360 |
+
},
|
| 361 |
+
"metrics": [ # Note: For the end users use case, I made each comment an example.
|
| 362 |
+
],
|
| 363 |
+
"references": [],
|
| 364 |
+
"description": {
|
| 365 |
+
"lang": "eng", # TODO: Make language selectable
|
| 366 |
+
"value": "" # Leaving empty so the report comments can be contained here.
|
| 367 |
+
},
|
| 368 |
+
"impact": {
|
| 369 |
+
"avid": {
|
| 370 |
+
"risk_domain": [
|
| 371 |
+
"Ethics"
|
| 372 |
+
],
|
| 373 |
+
"sep_view": [
|
| 374 |
+
"E0101: Group fairness"
|
| 375 |
+
],
|
| 376 |
+
"lifecycle_view": [
|
| 377 |
+
"L05: Evaluation"
|
| 378 |
+
],
|
| 379 |
+
"taxonomy_version": "0.2"
|
| 380 |
+
}
|
| 381 |
+
},
|
| 382 |
+
"credit": None,
|
| 383 |
+
"reported_date": "" # Leaving empty so that it can be dynamically filled in
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
avid_json_dict["description"] = indie_label_json["text_entry"]
|
| 387 |
+
avid_json_dict["reported_date"] = str(date.today())
|
| 388 |
+
for e in indie_label_json["evidence"]:
|
| 389 |
+
curr_metric = {}
|
| 390 |
+
curr_metric["name"] = "Perspective API"
|
| 391 |
+
curr_metric["detection_method"] = {
|
| 392 |
+
"type": "Detection",
|
| 393 |
+
"name": "Individual Example from End User Audit"
|
| 394 |
+
}
|
| 395 |
+
res_dict = {}
|
| 396 |
+
res_dict["feature"] = e["topic"]
|
| 397 |
+
res_dict["model_score"] = str(e["persp_score"]) # Converted to string to avoid Float type error with DB
|
| 398 |
+
res_dict["ori_input"] = e["comment"]
|
| 399 |
+
res_dict["personal_model_score"] = str(e["user_rating"]) # See above
|
| 400 |
+
res_dict["user_decision"] = e["user_decision"]
|
| 401 |
+
curr_metric["results"] = res_dict
|
| 402 |
+
avid_json_dict["metrics"].append(curr_metric)
|
| 403 |
+
|
| 404 |
+
new_report = json.dumps(avid_json_dict)
|
| 405 |
+
return new_report
|
| 406 |
+
|
| 407 |
########################################
|
| 408 |
# GET_PERSONALIZED_MODEL utils
|
| 409 |
def fetch_existing_data(user, model_name):
|
server.py
CHANGED
|
@@ -21,6 +21,9 @@ import friendlywords as fw
|
|
| 21 |
|
| 22 |
import audit_utils as utils
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
app = Flask(__name__)
|
| 25 |
DEBUG = False # Debug flag for development; set to False for production
|
| 26 |
|
|
@@ -635,6 +638,21 @@ def get_prompts_scaffold():
|
|
| 635 |
},
|
| 636 |
]
|
| 637 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
########################################
|
| 639 |
# ROUTE: /SAVE_REPORTS
|
| 640 |
@app.route("/save_reports")
|
|
@@ -645,6 +663,9 @@ def save_reports():
|
|
| 645 |
scaffold_method = request.args.get("scaffold_method")
|
| 646 |
model = request.args.get("model")
|
| 647 |
|
|
|
|
|
|
|
|
|
|
| 648 |
# Save reports for current user to file
|
| 649 |
reports_file = utils.get_reports_file(cur_user, model)
|
| 650 |
with open(reports_file, "w", encoding ='utf8') as f:
|
|
@@ -653,6 +674,7 @@ def save_reports():
|
|
| 653 |
results = {
|
| 654 |
"status": "success",
|
| 655 |
}
|
|
|
|
| 656 |
return json.dumps(results)
|
| 657 |
|
| 658 |
########################################
|
|
|
|
| 21 |
|
| 22 |
import audit_utils as utils
|
| 23 |
|
| 24 |
+
import requests
|
| 25 |
+
|
| 26 |
+
|
| 27 |
app = Flask(__name__)
|
| 28 |
DEBUG = False # Debug flag for development; set to False for production
|
| 29 |
|
|
|
|
| 638 |
},
|
| 639 |
]
|
| 640 |
|
| 641 |
+
# Submit all reports to AVID
|
| 642 |
+
# Logs the responses
|
| 643 |
+
def submit_reports_to_AVID(reports):
|
| 644 |
+
#Set up the connection to AVID
|
| 645 |
+
root = environ.get('AVID_API_URL')
|
| 646 |
+
api_key = environ.get('AVID_API_KEY')
|
| 647 |
+
key = {"Authorization": api_key}
|
| 648 |
+
|
| 649 |
+
for r in reports:
|
| 650 |
+
new_report = utils.convert_indie_label_json_to_avid_json(r)
|
| 651 |
+
url = root + "submit"
|
| 652 |
+
response = requests.post(url, json=json.loads(new_report), headers=key) # The loads ensures type compliance
|
| 653 |
+
uuid = response.json()
|
| 654 |
+
print("AVID API response:", response, uuid)
|
| 655 |
+
|
| 656 |
########################################
|
| 657 |
# ROUTE: /SAVE_REPORTS
|
| 658 |
@app.route("/save_reports")
|
|
|
|
| 663 |
scaffold_method = request.args.get("scaffold_method")
|
| 664 |
model = request.args.get("model")
|
| 665 |
|
| 666 |
+
# Submit reports to AVID
|
| 667 |
+
submit_reports_to_AVID(reports)
|
| 668 |
+
|
| 669 |
# Save reports for current user to file
|
| 670 |
reports_file = utils.get_reports_file(cur_user, model)
|
| 671 |
with open(reports_file, "w", encoding ='utf8') as f:
|
|
|
|
| 674 |
results = {
|
| 675 |
"status": "success",
|
| 676 |
}
|
| 677 |
+
print(results)
|
| 678 |
return json.dumps(results)
|
| 679 |
|
| 680 |
########################################
|