Upload processors.py with huggingface_hub
Browse files- processors.py +32 -2
processors.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import json
|
| 2 |
import re
|
| 3 |
-
from
|
|
|
|
| 4 |
|
| 5 |
-
from .operators import FieldOperator
|
| 6 |
|
| 7 |
|
| 8 |
class ToString(FieldOperator):
|
|
@@ -117,6 +118,28 @@ class Capitalize(FieldOperator):
|
|
| 117 |
return text.capitalize()
|
| 118 |
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
class Substring(FieldOperator):
|
| 121 |
begin: int = 0
|
| 122 |
end: int = None
|
|
@@ -152,6 +175,13 @@ class YesNoToInt(FieldOperator):
|
|
| 152 |
return text
|
| 153 |
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
class StrToFloatFormat(FieldOperator):
|
| 156 |
def process_value(self, text: Any) -> Any:
|
| 157 |
try:
|
|
|
|
| 1 |
import json
|
| 2 |
import re
|
| 3 |
+
from difflib import get_close_matches
|
| 4 |
+
from typing import Any, Dict
|
| 5 |
|
| 6 |
+
from .operators import FieldOperator, InstanceFieldOperator
|
| 7 |
|
| 8 |
|
| 9 |
class ToString(FieldOperator):
|
|
|
|
| 118 |
return text.capitalize()
|
| 119 |
|
| 120 |
|
| 121 |
+
class GetStringAfter(FieldOperator):
|
| 122 |
+
substring: str
|
| 123 |
+
|
| 124 |
+
def process_value(self, text: Any) -> Any:
|
| 125 |
+
return text.split(self.substring, 1)[-1].strip()
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
class MatchClosestOption(InstanceFieldOperator):
|
| 129 |
+
options_field: str = "options"
|
| 130 |
+
|
| 131 |
+
def process_instance_value(self, value: Any, instance: Dict[str, Any]):
|
| 132 |
+
options = instance["task_data"][self.options_field]
|
| 133 |
+
return get_close_matches(value, options, n=1, cutoff=0.0)[0]
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def process_instance_value(self, value, instance):
|
| 137 |
+
options = instance[self.options_field]
|
| 138 |
+
# Get the closest match; n=1 returns the single closest match
|
| 139 |
+
closest_match = get_close_matches(value, options, n=1, cutoff=0)
|
| 140 |
+
return closest_match[0] if closest_match else None
|
| 141 |
+
|
| 142 |
+
|
| 143 |
class Substring(FieldOperator):
|
| 144 |
begin: int = 0
|
| 145 |
end: int = None
|
|
|
|
| 175 |
return text
|
| 176 |
|
| 177 |
|
| 178 |
+
class YesToOneElseZero(FieldOperator):
|
| 179 |
+
def process_value(self, text: Any) -> Any:
|
| 180 |
+
if text == "yes":
|
| 181 |
+
return "1"
|
| 182 |
+
return "0"
|
| 183 |
+
|
| 184 |
+
|
| 185 |
class StrToFloatFormat(FieldOperator):
|
| 186 |
def process_value(self, text: Any) -> Any:
|
| 187 |
try:
|