Spaces:
Sleeping
Sleeping
File size: 6,312 Bytes
33d4721 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import os
import numpy as np
import requests
from sklearn import metrics
BINARY_CLASSIFICATION_EVAL_METRICS = (
"eval_loss",
"eval_accuracy",
"eval_f1",
"eval_auc",
"eval_precision",
"eval_recall",
)
MULTI_CLASS_CLASSIFICATION_EVAL_METRICS = (
"eval_loss",
"eval_accuracy",
"eval_f1_macro",
"eval_f1_micro",
"eval_f1_weighted",
"eval_precision_macro",
"eval_precision_micro",
"eval_precision_weighted",
"eval_recall_macro",
"eval_recall_micro",
"eval_recall_weighted",
)
MODEL_CARD = """
---
library_name: transformers
tags:
- autotrain
- text-classification{base_model}
widget:
- text: "I love AutoTrain"{dataset_tag}
---
# Model Trained Using AutoTrain
- Problem type: Text Classification
## Validation Metrics
{validation_metrics}
"""
def _binary_classification_metrics(pred):
"""
Calculate various binary classification metrics.
Args:
pred (tuple): A tuple containing raw predictions and true labels.
- raw_predictions (numpy.ndarray): The raw prediction scores from the model.
- labels (numpy.ndarray): The true labels.
Returns:
dict: A dictionary containing the following metrics:
- "f1" (float): The F1 score.
- "precision" (float): The precision score.
- "recall" (float): The recall score.
- "auc" (float): The Area Under the ROC Curve (AUC) score.
- "accuracy" (float): The accuracy score.
"""
raw_predictions, labels = pred
predictions = np.argmax(raw_predictions, axis=1)
result = {
"f1": metrics.f1_score(labels, predictions),
"precision": metrics.precision_score(labels, predictions),
"recall": metrics.recall_score(labels, predictions),
"auc": metrics.roc_auc_score(labels, raw_predictions[:, 1]),
"accuracy": metrics.accuracy_score(labels, predictions),
}
return result
def _multi_class_classification_metrics(pred):
"""
Compute various classification metrics for multi-class classification.
Args:
pred (tuple): A tuple containing raw predictions and true labels.
- raw_predictions (numpy.ndarray): The raw prediction scores for each class.
- labels (numpy.ndarray): The true labels.
Returns:
dict: A dictionary containing the following metrics:
- "f1_macro": F1 score with macro averaging.
- "f1_micro": F1 score with micro averaging.
- "f1_weighted": F1 score with weighted averaging.
- "precision_macro": Precision score with macro averaging.
- "precision_micro": Precision score with micro averaging.
- "precision_weighted": Precision score with weighted averaging.
- "recall_macro": Recall score with macro averaging.
- "recall_micro": Recall score with micro averaging.
- "recall_weighted": Recall score with weighted averaging.
- "accuracy": Accuracy score.
"""
raw_predictions, labels = pred
predictions = np.argmax(raw_predictions, axis=1)
results = {
"f1_macro": metrics.f1_score(labels, predictions, average="macro"),
"f1_micro": metrics.f1_score(labels, predictions, average="micro"),
"f1_weighted": metrics.f1_score(labels, predictions, average="weighted"),
"precision_macro": metrics.precision_score(labels, predictions, average="macro"),
"precision_micro": metrics.precision_score(labels, predictions, average="micro"),
"precision_weighted": metrics.precision_score(labels, predictions, average="weighted"),
"recall_macro": metrics.recall_score(labels, predictions, average="macro"),
"recall_micro": metrics.recall_score(labels, predictions, average="micro"),
"recall_weighted": metrics.recall_score(labels, predictions, average="weighted"),
"accuracy": metrics.accuracy_score(labels, predictions),
}
return results
def create_model_card(config, trainer, num_classes):
"""
Generates a model card for a text classification model.
Args:
config (object): Configuration object containing various settings and paths.
trainer (object): Trainer object used for evaluating the model.
num_classes (int): Number of classes in the classification task.
Returns:
str: A formatted string representing the model card.
"""
if config.valid_split is not None:
eval_scores = trainer.evaluate()
valid_metrics = (
BINARY_CLASSIFICATION_EVAL_METRICS if num_classes == 2 else MULTI_CLASS_CLASSIFICATION_EVAL_METRICS
)
eval_scores = [f"{k[len('eval_'):]}: {v}" for k, v in eval_scores.items() if k in valid_metrics]
eval_scores = "\n\n".join(eval_scores)
else:
eval_scores = "No validation metrics available"
if config.data_path == f"{config.project_name}/autotrain-data" or os.path.isdir(config.data_path):
dataset_tag = ""
else:
dataset_tag = f"\ndatasets:\n- {config.data_path}"
if os.path.isdir(config.model):
base_model = ""
else:
base_model = f"\nbase_model: {config.model}"
model_card = MODEL_CARD.format(
dataset_tag=dataset_tag,
validation_metrics=eval_scores,
base_model=base_model,
)
return model_card
def pause_endpoint(params):
"""
Pauses a Hugging Face endpoint using the provided parameters.
This function constructs an API URL using the endpoint ID from the environment
variables, and sends a POST request to pause the specified endpoint.
Args:
params (object): An object containing the following attribute:
- token (str): The authorization token required to authenticate the API request.
Returns:
dict: The JSON response from the API call.
"""
endpoint_id = os.environ["ENDPOINT_ID"]
username = endpoint_id.split("/")[0]
project_name = endpoint_id.split("/")[1]
api_url = f"https://api.endpoints.huggingface.cloud/v2/endpoint/{username}/{project_name}/pause"
headers = {"Authorization": f"Bearer {params.token}"}
r = requests.post(api_url, headers=headers)
return r.json()
|