Update Space (evaluate main: 4ca8eed5)
Browse files- README.md +2 -1
- matthews_correlation.py +41 -4
- requirements.txt +1 -1
README.md
CHANGED
|
@@ -48,9 +48,10 @@ At minimum, this metric requires a list of predictions and a list of references:
|
|
| 48 |
- **`predictions`** (`list` of `int`s): Predicted class labels.
|
| 49 |
- **`references`** (`list` of `int`s): Ground truth labels.
|
| 50 |
- **`sample_weight`** (`list` of `int`s, `float`s, or `bool`s): Sample weights. Defaults to `None`.
|
|
|
|
| 51 |
|
| 52 |
### Output Values
|
| 53 |
-
- **`matthews_correlation`** (`float`): Matthews correlation coefficient.
|
| 54 |
|
| 55 |
The metric output takes the following form:
|
| 56 |
```python
|
|
|
|
| 48 |
- **`predictions`** (`list` of `int`s): Predicted class labels.
|
| 49 |
- **`references`** (`list` of `int`s): Ground truth labels.
|
| 50 |
- **`sample_weight`** (`list` of `int`s, `float`s, or `bool`s): Sample weights. Defaults to `None`.
|
| 51 |
+
- **`average`**(`None` or `macro`): For the multilabel case, whether to return one correlation coefficient per feature (`average=None`), or the average of them (`average='macro'`). Defaults to `None`.
|
| 52 |
|
| 53 |
### Output Values
|
| 54 |
+
- **`matthews_correlation`** (`float` or `list` of `float`s): Matthews correlation coefficient, or list of them in the multilabel case without averaging.
|
| 55 |
|
| 56 |
The metric output takes the following form:
|
| 57 |
```python
|
matthews_correlation.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
"""Matthews Correlation metric."""
|
| 15 |
|
| 16 |
import datasets
|
|
|
|
| 17 |
from sklearn.metrics import matthews_corrcoef
|
| 18 |
|
| 19 |
import evaluate
|
|
@@ -36,6 +37,9 @@ _KWARGS_DESCRIPTION = """
|
|
| 36 |
Args:
|
| 37 |
predictions (list of int): Predicted labels, as returned by a model.
|
| 38 |
references (list of int): Ground truth labels.
|
|
|
|
|
|
|
|
|
|
| 39 |
sample_weight (list of int, float, or bool): Sample weights. Defaults to `None`.
|
| 40 |
Returns:
|
| 41 |
matthews_correlation (dict containing float): Matthews correlation.
|
|
@@ -62,6 +66,21 @@ Examples:
|
|
| 62 |
... sample_weight=[0.5, 1, 0, 0, 0, 1])
|
| 63 |
>>> print(round(results['matthews_correlation'], 2))
|
| 64 |
-0.25
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
"""
|
| 66 |
|
| 67 |
_CITATION = """\
|
|
@@ -88,6 +107,11 @@ class MatthewsCorrelation(evaluate.Metric):
|
|
| 88 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 89 |
features=datasets.Features(
|
| 90 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
"predictions": datasets.Value("int32"),
|
| 92 |
"references": datasets.Value("int32"),
|
| 93 |
}
|
|
@@ -97,7 +121,20 @@ class MatthewsCorrelation(evaluate.Metric):
|
|
| 97 |
],
|
| 98 |
)
|
| 99 |
|
| 100 |
-
def _compute(self, predictions, references, sample_weight=None):
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"""Matthews Correlation metric."""
|
| 15 |
|
| 16 |
import datasets
|
| 17 |
+
import numpy as np
|
| 18 |
from sklearn.metrics import matthews_corrcoef
|
| 19 |
|
| 20 |
import evaluate
|
|
|
|
| 37 |
Args:
|
| 38 |
predictions (list of int): Predicted labels, as returned by a model.
|
| 39 |
references (list of int): Ground truth labels.
|
| 40 |
+
average (`string`): This parameter is used for multilabel configs. Defaults to `None`.
|
| 41 |
+
- None (default): Returns an array of Matthews correlation coefficients, one for each feature
|
| 42 |
+
- 'macro': Calculate metrics for each feature, and find their unweighted mean.
|
| 43 |
sample_weight (list of int, float, or bool): Sample weights. Defaults to `None`.
|
| 44 |
Returns:
|
| 45 |
matthews_correlation (dict containing float): Matthews correlation.
|
|
|
|
| 66 |
... sample_weight=[0.5, 1, 0, 0, 0, 1])
|
| 67 |
>>> print(round(results['matthews_correlation'], 2))
|
| 68 |
-0.25
|
| 69 |
+
|
| 70 |
+
Example 4, Multi-label without averaging:
|
| 71 |
+
>>> matthews_metric = evaluate.load("matthews_correlation", config_name="multilabel")
|
| 72 |
+
>>> results = matthews_metric.compute(references=[[0,1], [1,0], [1,1]],
|
| 73 |
+
... predictions=[[0,1], [1,1], [0,1]])
|
| 74 |
+
>>> print(results['matthews_correlation'])
|
| 75 |
+
[0.5, 0.0]
|
| 76 |
+
|
| 77 |
+
Example 5, Multi-label with averaging:
|
| 78 |
+
>>> matthews_metric = evaluate.load("matthews_correlation", config_name="multilabel")
|
| 79 |
+
>>> results = matthews_metric.compute(references=[[0,1], [1,0], [1,1]],
|
| 80 |
+
... predictions=[[0,1], [1,1], [0,1]],
|
| 81 |
+
... average='macro')
|
| 82 |
+
>>> print(round(results['matthews_correlation'], 2))
|
| 83 |
+
0.25
|
| 84 |
"""
|
| 85 |
|
| 86 |
_CITATION = """\
|
|
|
|
| 107 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 108 |
features=datasets.Features(
|
| 109 |
{
|
| 110 |
+
"predictions": datasets.Sequence(datasets.Value("int32")),
|
| 111 |
+
"references": datasets.Sequence(datasets.Value("int32")),
|
| 112 |
+
}
|
| 113 |
+
if self.config_name == "multilabel"
|
| 114 |
+
else {
|
| 115 |
"predictions": datasets.Value("int32"),
|
| 116 |
"references": datasets.Value("int32"),
|
| 117 |
}
|
|
|
|
| 121 |
],
|
| 122 |
)
|
| 123 |
|
| 124 |
+
def _compute(self, predictions, references, sample_weight=None, average=None):
|
| 125 |
+
if self.config_name == "multilabel":
|
| 126 |
+
references = np.array(references)
|
| 127 |
+
predictions = np.array(predictions)
|
| 128 |
+
if not (references.ndim == 2 and predictions.ndim == 2):
|
| 129 |
+
raise ValueError("For multi-label inputs, both references and predictions should be 2-dimensional")
|
| 130 |
+
matthews_corr = [
|
| 131 |
+
matthews_corrcoef(predictions[:, i], references[:, i], sample_weight=sample_weight)
|
| 132 |
+
for i in range(references.shape[1])
|
| 133 |
+
]
|
| 134 |
+
if average == "macro":
|
| 135 |
+
matthews_corr = np.mean(matthews_corr)
|
| 136 |
+
elif average is not None:
|
| 137 |
+
raise ValueError("Invalid `average`: expected `macro`, or None ")
|
| 138 |
+
else:
|
| 139 |
+
matthews_corr = float(matthews_corrcoef(references, predictions, sample_weight=sample_weight))
|
| 140 |
+
return {"matthews_correlation": matthews_corr}
|
requirements.txt
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
-
git+https://github.com/huggingface/evaluate@
|
| 2 |
scikit-learn
|
|
|
|
| 1 |
+
git+https://github.com/huggingface/evaluate@4ca8eed54000a52e542145f2d8d6201032423acb
|
| 2 |
scikit-learn
|