Upload dataset.py with huggingface_hub
Browse files- dataset.py +17 -8
dataset.py
CHANGED
|
@@ -27,46 +27,55 @@ from .metrics import __file__ as _
|
|
| 27 |
from .normalizers import __file__ as _
|
| 28 |
from .operator import __file__ as _
|
| 29 |
from .operators import __file__ as _
|
|
|
|
| 30 |
from .processors import __file__ as _
|
| 31 |
from .random_utils import __file__ as _
|
| 32 |
from .recipe import __file__ as _
|
| 33 |
from .register import __file__ as _
|
| 34 |
from .schema import __file__ as _
|
| 35 |
-
from .serializers import __file__ as _
|
| 36 |
from .settings_utils import __file__ as _
|
|
|
|
|
|
|
| 37 |
from .split_utils import __file__ as _
|
| 38 |
from .splitters import __file__ as _
|
| 39 |
from .standard import __file__ as _
|
| 40 |
from .stream import __file__ as _
|
|
|
|
|
|
|
| 41 |
from .task import __file__ as _
|
| 42 |
from .templates import __file__ as _
|
| 43 |
from .text_utils import __file__ as _
|
| 44 |
from .type_utils import __file__ as _
|
| 45 |
from .utils import __file__ as _
|
|
|
|
| 46 |
from .validate import __file__ as _
|
| 47 |
from .version import __file__ as _
|
| 48 |
from .version import version
|
| 49 |
|
| 50 |
logger = get_logger()
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
class Dataset(datasets.GeneratorBasedBuilder):
|
| 54 |
"""TODO: Short description of my dataset."""
|
| 55 |
|
| 56 |
-
VERSION =
|
| 57 |
|
| 58 |
@property
|
| 59 |
def generators(self):
|
| 60 |
if not hasattr(self, "_generators") or self._generators is None:
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
from unitxt.dataset_utils import \
|
| 63 |
get_dataset_artifact as get_dataset_artifact_installed
|
| 64 |
|
| 65 |
-
unitxt_installed = True
|
| 66 |
-
except ImportError:
|
| 67 |
-
unitxt_installed = False
|
| 68 |
-
|
| 69 |
-
if unitxt_installed:
|
| 70 |
logger.info("Loading with installed unitxt library...")
|
| 71 |
dataset = get_dataset_artifact_installed(self.config.name)
|
| 72 |
else:
|
|
|
|
| 27 |
from .normalizers import __file__ as _
|
| 28 |
from .operator import __file__ as _
|
| 29 |
from .operators import __file__ as _
|
| 30 |
+
from .parsing_utils import __file__ as _
|
| 31 |
from .processors import __file__ as _
|
| 32 |
from .random_utils import __file__ as _
|
| 33 |
from .recipe import __file__ as _
|
| 34 |
from .register import __file__ as _
|
| 35 |
from .schema import __file__ as _
|
|
|
|
| 36 |
from .settings_utils import __file__ as _
|
| 37 |
+
from .settings_utils import get_constants
|
| 38 |
+
from .span_lableing_operators import __file__ as _
|
| 39 |
from .split_utils import __file__ as _
|
| 40 |
from .splitters import __file__ as _
|
| 41 |
from .standard import __file__ as _
|
| 42 |
from .stream import __file__ as _
|
| 43 |
+
from .struct_data_operators import __file__ as _
|
| 44 |
+
from .system_prompts import __file__ as _
|
| 45 |
from .task import __file__ as _
|
| 46 |
from .templates import __file__ as _
|
| 47 |
from .text_utils import __file__ as _
|
| 48 |
from .type_utils import __file__ as _
|
| 49 |
from .utils import __file__ as _
|
| 50 |
+
from .utils import is_package_installed
|
| 51 |
from .validate import __file__ as _
|
| 52 |
from .version import __file__ as _
|
| 53 |
from .version import version
|
| 54 |
|
| 55 |
logger = get_logger()
|
| 56 |
+
constants = get_constants()
|
| 57 |
|
| 58 |
|
| 59 |
class Dataset(datasets.GeneratorBasedBuilder):
|
| 60 |
"""TODO: Short description of my dataset."""
|
| 61 |
|
| 62 |
+
VERSION = constants.version
|
| 63 |
|
| 64 |
@property
|
| 65 |
def generators(self):
|
| 66 |
if not hasattr(self, "_generators") or self._generators is None:
|
| 67 |
+
if is_package_installed("unitxt"):
|
| 68 |
+
from unitxt.settings_utils import \
|
| 69 |
+
get_constants as installed_get_constants
|
| 70 |
+
|
| 71 |
+
installed_package_constants = installed_get_constants()
|
| 72 |
+
if installed_package_constants.version != self.VERSION:
|
| 73 |
+
raise ValueError(
|
| 74 |
+
f"Located installed unitxt version {installed_get_constants.version} that is different then unitxt dataset version {self.VERSION}. Please make sure the installed version is identical to the dataset version."
|
| 75 |
+
)
|
| 76 |
from unitxt.dataset_utils import \
|
| 77 |
get_dataset_artifact as get_dataset_artifact_installed
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
logger.info("Loading with installed unitxt library...")
|
| 80 |
dataset = get_dataset_artifact_installed(self.config.name)
|
| 81 |
else:
|