Spaces:
Running
Running
alessandro trinca tornidor
feat: port whisper and faster-whisper support from https://github.com/Thiagohgl/ai-pronunciation-trainer
85b7206
"""Various utilities (Serialize objects, time benchmark, args dump, numerical and stats info)""" | |
from pathlib import Path | |
from typing import Mapping | |
import numpy as np | |
from constants import app_logger | |
def serialize(obj: any, include_none: bool = False): | |
""" | |
Return the input object into a serializable one | |
Args: | |
obj: Object to serialize | |
include_none: bool to indicate if include also keys with None values during dict serialization | |
Returns: | |
serialized object | |
""" | |
return _serialize(obj, include_none) | |
def _serialize(obj: any, include_none: bool): | |
from numpy import ndarray as np_ndarray, floating as np_floating, integer as np_integer | |
primitive = (int, float, str, bool) | |
# print(type(obj)) | |
try: | |
if obj is None: | |
return None | |
elif isinstance(obj, np_integer): | |
return int(obj) | |
elif isinstance(obj, np_floating): | |
return float(obj) | |
elif isinstance(obj, np_ndarray): | |
return obj.tolist() | |
elif isinstance(obj, primitive): | |
return obj | |
elif type(obj) is list: | |
return _serialize_list(obj, include_none) | |
elif type(obj) is tuple: | |
return list(obj) | |
elif type(obj) is bytes: | |
return _serialize_bytes(obj) | |
elif isinstance(obj, Exception): | |
return _serialize_exception(obj) | |
# elif isinstance(obj, object): | |
# return _serialize_object(obj, include_none) | |
else: | |
return _serialize_object(obj, include_none) | |
except Exception as e_serialize: | |
app_logger.error(f"e_serialize::{e_serialize}, type_obj:{type(obj)}, obj:{obj}.") | |
return f"object_name:{str(obj)}__object_type_str:{str(type(obj))}." | |
def _serialize_object(obj: Mapping[any, object], include_none: bool) -> dict[any]: | |
res = {} | |
if type(obj) is not dict: | |
keys = [i for i in obj.__dict__.keys() if (getattr(obj, i) is not None) or include_none] | |
else: | |
keys = [i for i in obj.keys() if (obj[i] is not None) or include_none] | |
for key in keys: | |
if type(obj) is not dict: | |
res[key] = _serialize(getattr(obj, key), include_none) | |
else: | |
res[key] = _serialize(obj[key], include_none) | |
return res | |
def _serialize_list(ls: list, include_none: bool) -> list: | |
return [_serialize(elem, include_none) for elem in ls] | |
def _serialize_bytes(b: bytes) -> dict[str, str]: | |
import base64 | |
encoded = base64.b64encode(b) | |
return {"value": encoded.decode('ascii'), "type": "bytes"} | |
def _serialize_exception(e: Exception) -> dict[str, str]: | |
return {"msg": str(e), "type": str(type(e)), **e.__dict__} | |
def hash_calculate(arr_or_path: np.ndarray | str | Path, is_file: bool, read_mode: str = "rb") -> str | bytes: | |
""" | |
Return computed hash from input variable (typically a numpy array). | |
Args: | |
arr_or_path: variable to hash (numpy array, string, Path-like object, dict, bytes) | |
is_file: read the variable from a file | |
read_mode: used when is_file is True to read the file in binary or text mode | |
Returns: | |
computed hash from input variable | |
""" | |
from hashlib import sha256 | |
from base64 import b64encode | |
from numpy import ndarray as np_ndarray | |
if is_file: | |
with open(arr_or_path, read_mode) as file_to_check: | |
# read contents of the file | |
arr_or_path = file_to_check.read() | |
# # pipe contents of the file through | |
# try: | |
# return hashlib.sha256(data).hexdigest() | |
# except TypeError: | |
# app_logger.warning( | |
# f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}." | |
# ) | |
# return hashlib.sha256(data.encode("utf-8")).hexdigest() | |
if isinstance(arr_or_path, np_ndarray): | |
hash_fn = sha256(arr_or_path.data) | |
elif isinstance(arr_or_path, dict): | |
import json | |
serialized = serialize(arr_or_path) | |
variable_to_hash = json.dumps(serialized, sort_keys=True).encode("utf-8") | |
hash_fn = sha256(variable_to_hash) | |
elif isinstance(arr_or_path, str): | |
try: | |
hash_fn = sha256(arr_or_path) | |
except TypeError: | |
app_logger.error( | |
f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}." | |
) | |
hash_fn = sha256(arr_or_path.encode("utf-8")) | |
elif isinstance(arr_or_path, bytes): | |
hash_fn = sha256(arr_or_path) | |
else: | |
raise ValueError( | |
f"variable 'arr':{arr_or_path} of type '{type(arr_or_path)}' not yet handled." | |
) | |
return b64encode(hash_fn.digest()) | |