Spaces:
Running
Running
from typing import Optional, Union, Callable | |
from dataclasses import dataclass | |
from datasets import Dataset | |
from evaluate.evaluation_suite import EvaluationSuite | |
from evaluate.module import EvaluationModule | |
from datasets.features import Features, Sequence, Value | |
from evaluate.module import EvaluationModule, EvaluationModuleInfo, combine | |
class SubTask: | |
model_or_pipeline: Optional[Union[str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel"]] = None | |
data: Optional[Union[str, Dataset]] = None | |
subset: Optional[str] = None | |
split: Optional[str] = None | |
data_preprocessor: Optional[Callable] = None, | |
args_for_task: Optional[dict] = None | |
class Suite(EvaluationSuite): | |
def _info(self): | |
super().__init__() | |
return EvaluationModuleInfo( | |
description="dummy metric for tests", | |
citation="insert citation here", | |
features=Features({"predictions": Value("int64"), "references": Value("int64")})) | |
def __init__(self): | |
self.preprocessor = None #lambda x: x["text"].lower() | |
self.suite = [ | |
SubTask( | |
data="imdb", | |
split="test", | |
data_preprocessor=self.preprocessor, | |
args_for_task={ | |
"metric": "accuracy", | |
"input_column": "text", | |
"label_column": "label", | |
"label_mapping": { | |
"LABEL_0": 0.0, | |
"LABEL_1": 1.0 | |
} | |
} | |
), | |
SubTask( | |
data="sst2", | |
split="test[:10]", | |
data_preprocessor=self.preprocessor, | |
args_for_task={ | |
"metric": "accuracy", | |
"input_column": "sentence", | |
"label_column": "label", | |
"label_mapping": { | |
"LABEL_0": 0.0, | |
"LABEL_1": 1.0 | |
} | |
} | |
) | |
] | |