Spaces:
Running
Running
File size: 4,745 Bytes
85b7206 c1390d7 85b7206 c1390d7 85b7206 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
"""Various utilities (Serialize objects, time benchmark, args dump, numerical and stats info)"""
from pathlib import Path
from typing import Mapping
import numpy as np
from constants import app_logger
def serialize(obj: any, include_none: bool = False):
"""
Return the input object into a serializable one
Args:
obj: Object to serialize
include_none: bool to indicate if include also keys with None values during dict serialization
Returns:
serialized object
"""
return _serialize(obj, include_none)
def _serialize(obj: any, include_none: bool):
from numpy import ndarray as np_ndarray, floating as np_floating, integer as np_integer
primitive = (int, float, str, bool)
# print(type(obj))
try:
if obj is None:
return None
elif isinstance(obj, np_integer):
return int(obj)
elif isinstance(obj, np_floating):
return float(obj)
elif isinstance(obj, np_ndarray):
return obj.tolist()
elif isinstance(obj, primitive):
return obj
elif type(obj) is list:
return _serialize_list(obj, include_none)
elif type(obj) is tuple:
return list(obj)
elif type(obj) is bytes:
return _serialize_bytes(obj)
elif isinstance(obj, Exception):
return _serialize_exception(obj)
# elif isinstance(obj, object):
# return _serialize_object(obj, include_none)
else:
return _serialize_object(obj, include_none)
except Exception as e_serialize:
app_logger.error(f"e_serialize::{e_serialize}, type_obj:{type(obj)}, obj:{obj}.")
return f"object_name:{str(obj)}__object_type_str:{str(type(obj))}."
def _serialize_object(obj: Mapping[any, object], include_none: bool) -> dict[any]:
res = {}
if type(obj) is not dict:
keys = [i for i in obj.__dict__.keys() if (getattr(obj, i) is not None) or include_none]
else:
keys = [i for i in obj.keys() if (obj[i] is not None) or include_none]
for key in keys:
if type(obj) is not dict:
res[key] = _serialize(getattr(obj, key), include_none)
else:
res[key] = _serialize(obj[key], include_none)
return res
def _serialize_list(ls: list, include_none: bool) -> list:
return [_serialize(elem, include_none) for elem in ls]
def _serialize_bytes(b: bytes) -> dict[str, str]:
import base64
encoded = base64.b64encode(b)
return {"value": encoded.decode('ascii'), "type": "bytes"}
def _serialize_exception(e: Exception) -> dict[str, str]:
return {"msg": str(e), "type": str(type(e)), **e.__dict__}
def hash_calculate(arr_or_path: np.ndarray | str | Path, is_file: bool, read_mode: str = "rb") -> str | bytes:
"""
Return computed hash from input variable (typically a numpy array).
Args:
arr_or_path: variable to hash (numpy array, string, Path-like object, dict, bytes)
is_file: read the variable from a file
read_mode: used when is_file is True to read the file in binary or text mode
Returns:
computed hash from input variable
"""
from hashlib import sha256
from base64 import b64encode
from numpy import ndarray as np_ndarray
if is_file:
with open(arr_or_path, read_mode) as file_to_check:
# read contents of the file
arr_or_path = file_to_check.read()
# # pipe contents of the file through
# try:
# return hashlib.sha256(data).hexdigest()
# except TypeError:
# app_logger.warning(
# f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}."
# )
# return hashlib.sha256(data.encode("utf-8")).hexdigest()
if isinstance(arr_or_path, np_ndarray):
hash_fn = sha256(arr_or_path.data)
elif isinstance(arr_or_path, dict):
import json
serialized = serialize(arr_or_path)
variable_to_hash = json.dumps(serialized, sort_keys=True).encode("utf-8")
hash_fn = sha256(variable_to_hash)
elif isinstance(arr_or_path, str):
try:
hash_fn = sha256(arr_or_path)
except TypeError:
app_logger.error(
f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}."
)
hash_fn = sha256(arr_or_path.encode("utf-8"))
elif isinstance(arr_or_path, bytes):
hash_fn = sha256(arr_or_path)
else:
raise ValueError(
f"variable 'arr':{arr_or_path} of type '{type(arr_or_path)}' not yet handled."
)
return b64encode(hash_fn.digest())
|