File size: 10,390 Bytes
9d5b4c0
058c80a
6dd8cc1
058c80a
7cdc7d0
fe70438
 
5901c08
3776396
fd7aa63
b6719ff
 
fd7aa63
4c76401
118aaef
3776396
4c76401
118aaef
740f26d
fd7aa63
 
 
fe70438
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740f26d
 
 
fd7aa63
 
 
 
 
 
 
 
3776396
6dd8cc1
 
5901c08
 
 
 
 
6dd8cc1
 
 
 
 
 
 
 
 
 
 
 
058c80a
6dd8cc1
058c80a
6dd8cc1
8ae904e
 
058c80a
 
 
 
8ae904e
 
 
 
 
 
 
 
 
24278cc
 
8ae904e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6719ff
 
7cdc7d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6719ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23cdef0
7cdc7d0
 
 
b6719ff
 
23cdef0
b6719ff
058c80a
 
 
 
 
 
 
 
 
 
 
 
9d5b4c0
 
cc5f321
 
 
 
 
 
 
 
 
9d5b4c0
cc5f321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe70438
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
import copy
import importlib.util
import json
import os
import re
import threading
from collections import OrderedDict
from functools import lru_cache
from typing import Any, Dict

from .text_utils import is_made_of_sub_strings


class Singleton(type):
    _instances = {}

    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super().__call__(*args, **kwargs)
        return cls._instances[cls]


class LRUCache:
    """An LRU (Least Recently Used) cache that stores a limited number of items.

    This cache automatically removes the least recently used item when it
    exceeds its max size. It behaves similarly to a dictionary, allowing
    items to be added and accessed using `[]` syntax.

    This implementation is thread-safe, using a lock to ensure that only one
    thread can modify or access the cache at any time.

    Attributes:
        max_size (int): The maximum number of items to store in the cache.
            Items exceeding this limit are automatically removed based on least
            recent usage.
    """

    def __init__(self, max_size=10):
        self._max_size = max_size
        self._cache = OrderedDict()
        self._lock = threading.Lock()  # Lock to ensure thread safety

    @property
    def max_size(self):
        with self._lock:
            return self._max_size

    @max_size.setter
    def max_size(self, size):
        with self._lock:
            self._max_size = size
            # Adjust the cache if the new size is smaller than the current number of items
            while len(self._cache) > self._max_size:
                self._cache.popitem(last=False)

    def __setitem__(self, key, value):
        with self._lock:
            # If the key already exists, remove it first to refresh its order
            if key in self._cache:
                self._cache.pop(key)

            # Add the new item to the cache (most recently used)
            self._cache[key] = value

            # If the cache exceeds the specified size, remove the least recently used item
            while len(self._cache) > self._max_size:
                self._cache.popitem(last=False)

    def __getitem__(self, key):
        with self._lock:
            if key in self._cache:
                # Move the accessed item to the end (mark as most recently used)
                value = self._cache.pop(key)
                self._cache[key] = value
                return value
            raise KeyError(f"{key} not found in cache")

    def set(self, key, value):
        """Sets a key-value pair in the cache."""
        with self._lock:
            if key in self._cache:
                self._cache.pop(key)
            self._cache[key] = value
            while len(self._cache) > self._max_size:
                self._cache.popitem(last=False)

    def get(self, key, default=None):
        """Gets a value from the cache by key, returning `default` if the key is not found."""
        with self._lock:
            if key in self._cache:
                value = self._cache.pop(key)
                self._cache[key] = value  # Move item to end to mark as recently used
                return value
            return default

    def __contains__(self, key):
        with self._lock:
            return key in self._cache

    def __len__(self):
        with self._lock:
            return len(self._cache)

    def __repr__(self):
        with self._lock:
            return f"LRUCache(max_size={self._max_size}, items={list(self._cache.items())})"


def flatten_dict(
    d: Dict[str, Any], parent_key: str = "", sep: str = "_"
) -> Dict[str, Any]:
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))

    return dict(items)


@lru_cache(maxsize=None)
def artifacts_json_cache(artifact_path):
    return load_json(artifact_path)


def load_json(path):
    with open(path) as f:
        try:
            return json.load(f)
        except json.decoder.JSONDecodeError as e:
            with open(path) as f:
                file_content = "\n".join(f.readlines())
            raise RuntimeError(
                f"Failed to decode json file at '{path}' with file content:\n{file_content}"
            ) from e


def save_to_file(path, data):
    with open(path, "w") as f:
        f.write(data)
        f.write("\n")


def json_dump(data):
    return json.dumps(data, indent=4, ensure_ascii=False)


def is_package_installed(package_name):
    """Check if a package is installed.

    Parameters:
    - package_name (str): The name of the package to check.

    Returns:
    - bool: True if the package is installed, False otherwise.
    """
    unitxt_pkg = importlib.util.find_spec(package_name)
    return unitxt_pkg is not None


def is_module_available(module_name):
    """Check if a module is available in the current Python environment.

    Parameters:
    - module_name (str): The name of the module to check.

    Returns:
    - bool: True if the module is available, False otherwise.
    """
    try:
        __import__(module_name)
        return True
    except ImportError:
        return False


def remove_numerics_and_quoted_texts(input_str):
    # Remove floats first to avoid leaving stray periods
    input_str = re.sub(r"\d+\.\d+", "", input_str)

    # Remove integers
    input_str = re.sub(r"\d+", "", input_str)

    # Remove strings in single quotes
    input_str = re.sub(r"'.*?'", "", input_str)

    # Remove strings in double quotes
    input_str = re.sub(r'".*?"', "", input_str)

    # Remove strings in triple quotes
    return re.sub(r'""".*?"""', "", input_str, flags=re.DOTALL)


def safe_eval(expression: str, context: dict, allowed_tokens: list) -> any:
    """Evaluates a given expression in a restricted environment, allowing only specified tokens and context variables.

    Args:
        expression (str): The expression to evaluate.
        context (dict): A dictionary mapping variable names to their values, which
                        can be used in the expression.
        allowed_tokens (list): A list of strings representing allowed tokens (such as
                               operators, function names, etc.) that can be used in the expression.

    Returns:
        any: The result of evaluating the expression.

    Raises:
        ValueError: If the expression contains tokens not in the allowed list or context keys.

    Note:
        This function should be used carefully, as it employs `eval`, which can
        execute arbitrary code. The function attempts to mitigate security risks
        by restricting the available tokens and not exposing built-in functions.
    """
    allowed_sub_strings = list(context.keys()) + allowed_tokens
    if is_made_of_sub_strings(
        remove_numerics_and_quoted_texts(expression), allowed_sub_strings
    ):
        return eval(expression, {"__builtins__": {}}, context)
    raise ValueError(
        f"The expression '{expression}' can not be evaluated because it contains tokens outside the allowed list of {allowed_sub_strings}."
    )


def import_module_from_file(file_path):
    # Get the module name (file name without extension)
    module_name = os.path.splitext(os.path.basename(file_path))[0]
    # Create a module specification
    spec = importlib.util.spec_from_file_location(module_name, file_path)
    # Create a new module based on the specification
    module = importlib.util.module_from_spec(spec)
    # Load the module
    spec.loader.exec_module(module)
    return module


def deep_copy(obj):
    """Creates a deep copy of the given object.

    Args:
        obj: The object to be deep copied.

    Returns:
        A deep copy of the original object.
    """
    return copy.deepcopy(obj)


def shallow_copy(obj):
    """Creates a shallow copy of the given object.

    Args:
        obj: The object to be shallow copied.

    Returns:
        A shallow copy of the original object.
    """
    return copy.copy(obj)


def recursive_copy(obj, internal_copy=None):
    """Recursively copies an object with a selective copy method.

    For `list`, `dict`, and `tuple` types, it recursively copies their contents.
    For other types, it uses the provided `internal_copy` function if available.
    Objects without a `copy` method are returned as is.

    Args:
        obj: The object to be copied.
        internal_copy (callable, optional): The copy function to use for non-container objects.
            If `None`, objects without a `copy` method are returned as is.

    Returns:
        The recursively copied object.
    """
    # Handle dictionaries
    if isinstance(obj, dict):
        return type(obj)(
            {key: recursive_copy(value, internal_copy) for key, value in obj.items()}
        )

    # Handle named tuples
    if isinstance(obj, tuple) and hasattr(obj, "_fields"):
        return type(obj)(*(recursive_copy(item, internal_copy) for item in obj))

    # Handle tuples and lists
    if isinstance(obj, (tuple, list)):
        return type(obj)(recursive_copy(item, internal_copy) for item in obj)

    if internal_copy is None:
        return obj

    return internal_copy(obj)


def recursive_deep_copy(obj):
    """Performs a recursive deep copy of the given object.

    This function uses `deep_copy` as the internal copy method for non-container objects.

    Args:
        obj: The object to be deep copied.

    Returns:
        A recursively deep-copied version of the original object.
    """
    return recursive_copy(obj, deep_copy)


def recursive_shallow_copy(obj):
    """Performs a recursive shallow copy of the given object.

    This function uses `shallow_copy` as the internal copy method for non-container objects.

    Args:
        obj: The object to be shallow copied.

    Returns:
        A recursively shallow-copied version of the original object.
    """
    return recursive_copy(obj, shallow_copy)


class LongString(str):
    def __new__(cls, value, *, repr_str=None):
        obj = super().__new__(cls, value)
        obj._repr_str = repr_str
        return obj

    def __repr__(self):
        if self._repr_str is not None:
            return self._repr_str
        return super().__repr__()