Upload artifact.py with huggingface_hub
Browse files- artifact.py +50 -22
artifact.py
CHANGED
|
@@ -6,10 +6,13 @@ import pkgutil
|
|
| 6 |
from abc import abstractmethod
|
| 7 |
from copy import deepcopy
|
| 8 |
from functools import lru_cache
|
| 9 |
-
from typing import Dict, List, Union, final
|
| 10 |
|
| 11 |
from .dataclass import AbstractField, Dataclass, Field, InternalField, fields
|
| 12 |
from .logging_utils import get_logger
|
|
|
|
|
|
|
|
|
|
| 13 |
from .settings_utils import get_settings
|
| 14 |
from .text_utils import camel_to_snake_case, is_camel_case
|
| 15 |
from .type_utils import issubtype
|
|
@@ -28,10 +31,19 @@ class Artifactories:
|
|
| 28 |
return cls.instance
|
| 29 |
|
| 30 |
def __iter__(self):
|
| 31 |
-
|
|
|
|
| 32 |
|
| 33 |
def __next__(self):
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def register(self, artifactory):
|
| 37 |
assert isinstance(
|
|
@@ -173,31 +185,33 @@ class Artifact(Dataclass):
|
|
| 173 |
return clz in set(cls._class_register.values())
|
| 174 |
|
| 175 |
@classmethod
|
| 176 |
-
def _recursive_load(cls,
|
| 177 |
-
if isinstance(
|
| 178 |
new_d = {}
|
| 179 |
-
for key, value in
|
| 180 |
new_d[key] = cls._recursive_load(value)
|
| 181 |
-
|
| 182 |
-
elif isinstance(
|
| 183 |
-
|
| 184 |
else:
|
| 185 |
pass
|
| 186 |
-
if cls.is_artifact_dict(
|
| 187 |
-
cls.verify_artifact_dict(
|
| 188 |
-
return cls._class_register[
|
| 189 |
|
| 190 |
-
return
|
| 191 |
|
| 192 |
@classmethod
|
| 193 |
-
def from_dict(cls, d):
|
|
|
|
|
|
|
| 194 |
cls.verify_artifact_dict(d)
|
| 195 |
return cls._recursive_load(d)
|
| 196 |
|
| 197 |
@classmethod
|
| 198 |
-
def load(cls, path, artifact_identifier=None):
|
| 199 |
d = load_json(path)
|
| 200 |
-
new_artifact = cls.from_dict(d)
|
| 201 |
new_artifact.artifact_identifier = artifact_identifier
|
| 202 |
return new_artifact
|
| 203 |
|
|
@@ -267,6 +281,10 @@ class Artifactory(Artifact):
|
|
| 267 |
def __getitem__(self, name) -> Artifact:
|
| 268 |
pass
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
class UnitxtArtifactNotFoundError(Exception):
|
| 272 |
def __init__(self, name, artifactories):
|
|
@@ -285,14 +303,24 @@ def fetch_artifact(name):
|
|
| 285 |
if Artifact.is_artifact_file(name):
|
| 286 |
return Artifact.load(name), None
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
if name in artifactory:
|
| 293 |
-
return artifactory
|
| 294 |
|
| 295 |
-
raise UnitxtArtifactNotFoundError(name,
|
| 296 |
|
| 297 |
|
| 298 |
@lru_cache(maxsize=None)
|
|
|
|
| 6 |
from abc import abstractmethod
|
| 7 |
from copy import deepcopy
|
| 8 |
from functools import lru_cache
|
| 9 |
+
from typing import Dict, List, Optional, Union, final
|
| 10 |
|
| 11 |
from .dataclass import AbstractField, Dataclass, Field, InternalField, fields
|
| 12 |
from .logging_utils import get_logger
|
| 13 |
+
from .parsing_utils import (
|
| 14 |
+
separate_inside_and_outside_square_brackets,
|
| 15 |
+
)
|
| 16 |
from .settings_utils import get_settings
|
| 17 |
from .text_utils import camel_to_snake_case, is_camel_case
|
| 18 |
from .type_utils import issubtype
|
|
|
|
| 31 |
return cls.instance
|
| 32 |
|
| 33 |
def __iter__(self):
|
| 34 |
+
self._index = 0 # Initialize/reset the index for iteration
|
| 35 |
+
return self
|
| 36 |
|
| 37 |
def __next__(self):
|
| 38 |
+
while self._index < len(self.artifactories):
|
| 39 |
+
artifactory = self.artifactories[self._index]
|
| 40 |
+
self._index += 1
|
| 41 |
+
if (
|
| 42 |
+
settings.use_only_local_catalogs and not artifactory.is_local
|
| 43 |
+
): # Corrected typo from 'is_loacl' to 'is_local'
|
| 44 |
+
continue
|
| 45 |
+
return artifactory
|
| 46 |
+
raise StopIteration
|
| 47 |
|
| 48 |
def register(self, artifactory):
|
| 49 |
assert isinstance(
|
|
|
|
| 185 |
return clz in set(cls._class_register.values())
|
| 186 |
|
| 187 |
@classmethod
|
| 188 |
+
def _recursive_load(cls, obj):
|
| 189 |
+
if isinstance(obj, dict):
|
| 190 |
new_d = {}
|
| 191 |
+
for key, value in obj.items():
|
| 192 |
new_d[key] = cls._recursive_load(value)
|
| 193 |
+
obj = new_d
|
| 194 |
+
elif isinstance(obj, list):
|
| 195 |
+
obj = [cls._recursive_load(value) for value in obj]
|
| 196 |
else:
|
| 197 |
pass
|
| 198 |
+
if cls.is_artifact_dict(obj):
|
| 199 |
+
cls.verify_artifact_dict(obj)
|
| 200 |
+
return cls._class_register[obj.pop("type")](**obj)
|
| 201 |
|
| 202 |
+
return obj
|
| 203 |
|
| 204 |
@classmethod
|
| 205 |
+
def from_dict(cls, d, overwrite_args=None):
|
| 206 |
+
if overwrite_args is not None:
|
| 207 |
+
d = {**d, **overwrite_args}
|
| 208 |
cls.verify_artifact_dict(d)
|
| 209 |
return cls._recursive_load(d)
|
| 210 |
|
| 211 |
@classmethod
|
| 212 |
+
def load(cls, path, artifact_identifier=None, overwrite_args=None):
|
| 213 |
d = load_json(path)
|
| 214 |
+
new_artifact = cls.from_dict(d, overwrite_args=overwrite_args)
|
| 215 |
new_artifact.artifact_identifier = artifact_identifier
|
| 216 |
return new_artifact
|
| 217 |
|
|
|
|
| 281 |
def __getitem__(self, name) -> Artifact:
|
| 282 |
pass
|
| 283 |
|
| 284 |
+
@abstractmethod
|
| 285 |
+
def get_with_overwrite(self, name, overwrite_args) -> Artifact:
|
| 286 |
+
pass
|
| 287 |
+
|
| 288 |
|
| 289 |
class UnitxtArtifactNotFoundError(Exception):
|
| 290 |
def __init__(self, name, artifactories):
|
|
|
|
| 303 |
if Artifact.is_artifact_file(name):
|
| 304 |
return Artifact.load(name), None
|
| 305 |
|
| 306 |
+
artifactory, name, args = get_artifactory_name_and_args(name=name)
|
| 307 |
+
|
| 308 |
+
return artifactory.get_with_overwrite(name, overwrite_args=args), artifactory
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def get_artifactory_name_and_args(
|
| 312 |
+
name: str, artifactories: Optional[List[Artifactory]] = None
|
| 313 |
+
):
|
| 314 |
+
name, args = separate_inside_and_outside_square_brackets(name)
|
| 315 |
+
|
| 316 |
+
if artifactories is None:
|
| 317 |
+
artifactories = list(Artifactories())
|
| 318 |
+
|
| 319 |
+
for artifactory in artifactories:
|
| 320 |
if name in artifactory:
|
| 321 |
+
return artifactory, name, args
|
| 322 |
|
| 323 |
+
raise UnitxtArtifactNotFoundError(name, artifactories)
|
| 324 |
|
| 325 |
|
| 326 |
@lru_cache(maxsize=None)
|