Upload validate.py with huggingface_hub
Browse files- validate.py +13 -7
validate.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
from abc import ABC
|
| 2 |
from dataclasses import field
|
| 3 |
-
from typing import Any, Dict
|
| 4 |
|
| 5 |
-
from datasets import
|
| 6 |
|
| 7 |
from .operator import StreamInstanceOperator
|
| 8 |
|
|
@@ -15,14 +15,20 @@ class ValidateSchema(Validator, StreamInstanceOperator):
|
|
| 15 |
schema: Features = None
|
| 16 |
|
| 17 |
def verify(self):
|
| 18 |
-
assert isinstance(
|
|
|
|
|
|
|
| 19 |
assert self.schema is not None, "Schema must be specified"
|
| 20 |
|
| 21 |
def verify_first_instance(self, instance):
|
| 22 |
-
for
|
| 23 |
-
assert
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
return instance
|
| 27 |
|
| 28 |
|
|
|
|
| 1 |
from abc import ABC
|
| 2 |
from dataclasses import field
|
| 3 |
+
from typing import Any, Dict, Optional
|
| 4 |
|
| 5 |
+
from datasets import Features, Sequence, Value
|
| 6 |
|
| 7 |
from .operator import StreamInstanceOperator
|
| 8 |
|
|
|
|
| 15 |
schema: Features = None
|
| 16 |
|
| 17 |
def verify(self):
|
| 18 |
+
assert isinstance(
|
| 19 |
+
self.schema, Features
|
| 20 |
+
), "Schema must be an instance of Features"
|
| 21 |
assert self.schema is not None, "Schema must be specified"
|
| 22 |
|
| 23 |
def verify_first_instance(self, instance):
|
| 24 |
+
for std_field in self.standart_fields:
|
| 25 |
+
assert (
|
| 26 |
+
std_field in instance
|
| 27 |
+
), f'Field "{std_field}" is missing in the first instance'
|
| 28 |
+
|
| 29 |
+
def process(
|
| 30 |
+
self, instance: Dict[str, Any], stream_name: Optional[str] = None
|
| 31 |
+
) -> Dict[str, Any]:
|
| 32 |
return instance
|
| 33 |
|
| 34 |
|