Spaces:
Sleeping
Sleeping
Commit
·
7399708
1
Parent(s):
c12b4db
bring dvats & requirements & entrypoint
Browse files- dvats/.ipynb_checkpoints/__init__-checkpoint.py +1 -0
- dvats/.ipynb_checkpoints/__init__.py +0 -0
- dvats/.ipynb_checkpoints/_modidx-checkpoint.py +14 -0
- dvats/.ipynb_checkpoints/_nbdev-checkpoint.py +39 -0
- dvats/.ipynb_checkpoints/all-checkpoint.py +8 -0
- dvats/.ipynb_checkpoints/dr-checkpoint.py +166 -0
- dvats/.ipynb_checkpoints/encoder-checkpoint.py +153 -0
- dvats/.ipynb_checkpoints/imports-checkpoint.py +24 -0
- dvats/.ipynb_checkpoints/load-checkpoint.py +166 -0
- dvats/.ipynb_checkpoints/utils-checkpoint.py +134 -0
- dvats/__init__.py +1 -0
- dvats/__pycache__/__init__.cpython-310.pyc +0 -0
- dvats/__pycache__/all.cpython-310.pyc +0 -0
- dvats/__pycache__/dr.cpython-310.pyc +0 -0
- dvats/__pycache__/encoder.cpython-310.pyc +0 -0
- dvats/__pycache__/imports.cpython-310.pyc +0 -0
- dvats/__pycache__/load.cpython-310.pyc +0 -0
- dvats/__pycache__/utils.cpython-310.pyc +0 -0
- dvats/__pycache__/visualization.cpython-310.pyc +0 -0
- dvats/__pycache__/xai.cpython-310.pyc +0 -0
- dvats/_modidx.py +105 -0
- dvats/_nbdev.py +39 -0
- dvats/all.py +8 -0
- dvats/dr.py +166 -0
- dvats/encoder.py +301 -0
- dvats/imports.py +24 -0
- dvats/load.py +168 -0
- dvats/utils.py +245 -0
- dvats/visualization.py +63 -0
- dvats/xai.py +964 -0
- entrypoint-rstudio.sh +25 -0
- requirements.txt +1 -3
dvats/.ipynb_checkpoints/__init__-checkpoint.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__version__ = "0.0.1"
|
dvats/.ipynb_checkpoints/__init__.py
ADDED
|
File without changes
|
dvats/.ipynb_checkpoints/_modidx-checkpoint.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Autogenerated by nbdev
|
| 2 |
+
|
| 3 |
+
d = { 'settings': { 'branch': 'master',
|
| 4 |
+
'doc_baseurl': '/dvats/',
|
| 5 |
+
'doc_host': 'https://vrodriguezf.github.io',
|
| 6 |
+
'git_url': 'https://github.com/vrodriguezf/deepvats',
|
| 7 |
+
'lib_path': 'dvats'},
|
| 8 |
+
'syms': { 'dvats.all': {},
|
| 9 |
+
'dvats.dr': {},
|
| 10 |
+
'dvats.encoder': {},
|
| 11 |
+
'dvats.imports': {},
|
| 12 |
+
'dvats.load': {},
|
| 13 |
+
'dvats.utils': {},
|
| 14 |
+
'dvats.visualization': {}}}
|
dvats/.ipynb_checkpoints/_nbdev-checkpoint.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED BY NBDEV! DO NOT EDIT!
|
| 2 |
+
|
| 3 |
+
__all__ = ["index", "modules", "custom_doc_links", "git_url"]
|
| 4 |
+
|
| 5 |
+
index = {"check_compatibility": "dr.ipynb",
|
| 6 |
+
"get_UMAP_prjs": "dr.ipynb",
|
| 7 |
+
"get_PCA_prjs": "dr.ipynb",
|
| 8 |
+
"get_TSNE_prjs": "dr.ipynb",
|
| 9 |
+
"DCAE_torch": "encoder.ipynb",
|
| 10 |
+
"ENCODER_EMBS_MODULE_NAME": "encoder.ipynb",
|
| 11 |
+
"get_enc_embs": "encoder.ipynb",
|
| 12 |
+
"TSArtifact": "load.ipynb",
|
| 13 |
+
"wandb.apis.public.Artifact.to_df": "load.ipynb",
|
| 14 |
+
"wandb.apis.public.Artifact.to_tsartifact": "load.ipynb",
|
| 15 |
+
"infer_or_inject_freq": "load.ipynb",
|
| 16 |
+
"generate_TS_df": "utils.ipynb",
|
| 17 |
+
"normalize_columns": "utils.ipynb",
|
| 18 |
+
"remove_constant_columns": "utils.ipynb",
|
| 19 |
+
"ReferenceArtifact": "utils.ipynb",
|
| 20 |
+
"wandb.apis.public.Artifact.to_obj": "utils.ipynb",
|
| 21 |
+
"PrintLayer": "utils.ipynb",
|
| 22 |
+
"Learner.export_and_get": "utils.ipynb",
|
| 23 |
+
"get_wandb_artifacts": "utils.ipynb",
|
| 24 |
+
"get_pickle_artifact": "utils.ipynb",
|
| 25 |
+
"plot_TS": "visualization.ipynb",
|
| 26 |
+
"plot_validation_ts_ae": "visualization.ipynb",
|
| 27 |
+
"plot_mask": "visualization.ipynb"}
|
| 28 |
+
|
| 29 |
+
modules = ["dr.py",
|
| 30 |
+
"encoder.py",
|
| 31 |
+
"load.py",
|
| 32 |
+
"utils.py",
|
| 33 |
+
"visualization.py"]
|
| 34 |
+
|
| 35 |
+
doc_url = "https://vrodriguezf.github.io/tchub/"
|
| 36 |
+
|
| 37 |
+
git_url = "https://gitlab.geist.re/pml/x_timecluster_extension/tree/master/"
|
| 38 |
+
|
| 39 |
+
def custom_doc_links(name): return None
|
dvats/.ipynb_checkpoints/all-checkpoint.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dvats
|
| 2 |
+
from .imports import *
|
| 3 |
+
from .load import *
|
| 4 |
+
from .utils import *
|
| 5 |
+
from .dr import *
|
| 6 |
+
from .encoder import *
|
| 7 |
+
from .visualization import *
|
| 8 |
+
from .xai import *
|
dvats/.ipynb_checkpoints/dr-checkpoint.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/dr.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['get_gpu_memory', 'color_for_percentage', 'create_bar', 'gpu_memory_status', 'check_compatibility', 'get_UMAP_prjs',
|
| 5 |
+
'get_PCA_prjs', 'get_TSNE_prjs', 'cluster_score']
|
| 6 |
+
|
| 7 |
+
# %% ../nbs/dr.ipynb 2
|
| 8 |
+
import subprocess
|
| 9 |
+
def get_gpu_memory(device = 0):
|
| 10 |
+
total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
|
| 11 |
+
total_memory = int(total_memory.decode().split('\n')[0])
|
| 12 |
+
used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
|
| 13 |
+
used_memory = int(used_memory.decode().split('\n')[0])
|
| 14 |
+
|
| 15 |
+
percentage = round((used_memory / total_memory) * 100)
|
| 16 |
+
return used_memory, total_memory, percentage
|
| 17 |
+
|
| 18 |
+
def color_for_percentage(percentage):
|
| 19 |
+
if percentage < 20:
|
| 20 |
+
return "\033[90m" # Gray
|
| 21 |
+
elif percentage < 40:
|
| 22 |
+
return "\033[94m" # Blue
|
| 23 |
+
elif percentage < 60:
|
| 24 |
+
return "\033[92m" # Green
|
| 25 |
+
elif percentage < 80:
|
| 26 |
+
return "\033[93m" # Orange
|
| 27 |
+
else:
|
| 28 |
+
return "\033[91m" # Red
|
| 29 |
+
|
| 30 |
+
def create_bar(percentage, color_code, length=20):
|
| 31 |
+
filled_length = int(length * percentage // 100)
|
| 32 |
+
bar = "█" * filled_length + "-" * (length - filled_length)
|
| 33 |
+
return color_code + bar + "\033[0m" # Apply color and reset after bar
|
| 34 |
+
|
| 35 |
+
def gpu_memory_status(device=0):
|
| 36 |
+
used, total, percentage = get_gpu_memory(device)
|
| 37 |
+
color_code = color_for_percentage(percentage)
|
| 38 |
+
bar = create_bar(percentage, color_code)
|
| 39 |
+
print(f"Used mem: {used}")
|
| 40 |
+
print(f"Used mem: {total}")
|
| 41 |
+
print(f"Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
|
| 42 |
+
|
| 43 |
+
# %% ../nbs/dr.ipynb 4
|
| 44 |
+
import umap
|
| 45 |
+
import cudf
|
| 46 |
+
import cuml
|
| 47 |
+
import pandas as pd
|
| 48 |
+
import numpy as np
|
| 49 |
+
from fastcore.all import *
|
| 50 |
+
from .imports import *
|
| 51 |
+
from .load import TSArtifact
|
| 52 |
+
|
| 53 |
+
# %% ../nbs/dr.ipynb 5
|
| 54 |
+
def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
|
| 55 |
+
"Function to check that the artifact used by the encoder model and the artifact that is \
|
| 56 |
+
going to be passed through the DR are compatible"
|
| 57 |
+
try:
|
| 58 |
+
# Check that both artifacts have the same variables
|
| 59 |
+
chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
|
| 60 |
+
# Check that both artifacts have the same freq
|
| 61 |
+
chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
|
| 62 |
+
# Check that the dr artifact is not normalized (not normalized data has not the key normalization)
|
| 63 |
+
chk_norm = dr_ar.metadata['TS'].get('normalization') is None
|
| 64 |
+
# Check that the dr artifact has not missing values
|
| 65 |
+
chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
|
| 66 |
+
# Check all logical vars.
|
| 67 |
+
if chk_vars and chk_freq and chk_norm and chk_miss:
|
| 68 |
+
print("Artifacts are compatible.")
|
| 69 |
+
else:
|
| 70 |
+
raise Exception
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print("Artifacts are not compatible.")
|
| 73 |
+
raise e
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
# %% ../nbs/dr.ipynb 7
|
| 77 |
+
#Comment this part after 4_seconds debugged
|
| 78 |
+
import hashlib
|
| 79 |
+
|
| 80 |
+
# %% ../nbs/dr.ipynb 8
|
| 81 |
+
import warnings
|
| 82 |
+
import sys
|
| 83 |
+
from numba.core.errors import NumbaPerformanceWarning
|
| 84 |
+
@delegates(cuml.UMAP)
|
| 85 |
+
def get_UMAP_prjs(
|
| 86 |
+
input_data,
|
| 87 |
+
cpu=True,
|
| 88 |
+
print_flag = False,
|
| 89 |
+
check_memory_usage = True,
|
| 90 |
+
**kwargs
|
| 91 |
+
):
|
| 92 |
+
"Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
|
| 93 |
+
if print_flag:
|
| 94 |
+
print("--> get_UMAP_prjs")
|
| 95 |
+
print("kwargs: ", kwargs)
|
| 96 |
+
sys.stdout.flush()
|
| 97 |
+
####
|
| 98 |
+
checksum = hashlib.md5(input_data.tobytes()).hexdigest()
|
| 99 |
+
print(checksum)
|
| 100 |
+
####
|
| 101 |
+
|
| 102 |
+
if check_memory_usage: gpu_memory_status()
|
| 103 |
+
|
| 104 |
+
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
|
| 105 |
+
|
| 106 |
+
#reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
|
| 107 |
+
if cpu:
|
| 108 |
+
print("-- umap.UMAP --", cpu)
|
| 109 |
+
sys.stdout.flush()
|
| 110 |
+
reducer = umap.UMAP(**kwargs)
|
| 111 |
+
else:
|
| 112 |
+
print("-- cuml.UMAP --", cpu)
|
| 113 |
+
sys.stdout.flush()
|
| 114 |
+
if 'random_state' in kwargs:
|
| 115 |
+
kwargs['random_state'] = np.uint64(kwargs['random_state'])
|
| 116 |
+
reducer = cuml.UMAP(**kwargs)
|
| 117 |
+
|
| 118 |
+
if print_flag:
|
| 119 |
+
print("------- reducer --------")
|
| 120 |
+
print(reducer)
|
| 121 |
+
print(reducer.get_params())
|
| 122 |
+
print("------- reducer --------")
|
| 123 |
+
sys.stdout.flush()
|
| 124 |
+
|
| 125 |
+
projections = reducer.fit_transform(input_data)
|
| 126 |
+
|
| 127 |
+
if check_memory_usage: gpu_memory_status()
|
| 128 |
+
if print_flag:
|
| 129 |
+
checksum = hashlib.md5(projections.tobytes()).hexdigest()
|
| 130 |
+
print("prjs checksum ", checksum)
|
| 131 |
+
print("get_UMAP_prjs -->")
|
| 132 |
+
sys.stdout.flush()
|
| 133 |
+
return projections
|
| 134 |
+
|
| 135 |
+
# %% ../nbs/dr.ipynb 13
|
| 136 |
+
@delegates(cuml.PCA)
|
| 137 |
+
def get_PCA_prjs(X, cpu=False, **kwargs):
|
| 138 |
+
r"""
|
| 139 |
+
Computes PCA projections of X
|
| 140 |
+
"""
|
| 141 |
+
if cpu:
|
| 142 |
+
raise NotImplementedError
|
| 143 |
+
else:
|
| 144 |
+
reducer = cuml.PCA(**kwargs)
|
| 145 |
+
projections = reducer.fit_transform(X)
|
| 146 |
+
return projections
|
| 147 |
+
|
| 148 |
+
# %% ../nbs/dr.ipynb 15
|
| 149 |
+
@delegates(cuml.TSNE)
|
| 150 |
+
def get_TSNE_prjs(X, cpu=False, **kwargs):
|
| 151 |
+
r"""
|
| 152 |
+
Computes TSNE projections of X
|
| 153 |
+
"""
|
| 154 |
+
if cpu:
|
| 155 |
+
raise NotImplementedError
|
| 156 |
+
else:
|
| 157 |
+
reducer = cuml.TSNE(**kwargs)
|
| 158 |
+
projections = reducer.fit_transform(X)
|
| 159 |
+
return projections
|
| 160 |
+
|
| 161 |
+
# %% ../nbs/dr.ipynb 18
|
| 162 |
+
from sklearn.metrics import silhouette_score
|
| 163 |
+
def cluster_score(prjs, clusters_labels, print_flag):
|
| 164 |
+
score = silhouette_score(prjs, clusters_labels)
|
| 165 |
+
if print_flag: print("Silhouette_score:", score)
|
| 166 |
+
return score
|
dvats/.ipynb_checkpoints/encoder-checkpoint.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""encoder.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated.
|
| 5 |
+
|
| 6 |
+
Original file is located at:
|
| 7 |
+
/home/macu/work/nbs/encoder.ipynb
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
#default_exp encoder
|
| 11 |
+
|
| 12 |
+
#hide
|
| 13 |
+
%load_ext autoreload
|
| 14 |
+
%autoreload 2
|
| 15 |
+
|
| 16 |
+
#export
|
| 17 |
+
import pandas as pd
|
| 18 |
+
import numpy as np
|
| 19 |
+
from fastcore.all import *
|
| 20 |
+
from tsai.callback.MVP import *
|
| 21 |
+
from tsai.imports import *
|
| 22 |
+
from tsai.models.InceptionTimePlus import InceptionTimePlus
|
| 23 |
+
from tsai.models.explainability import get_acts_and_grads
|
| 24 |
+
from tsai.models.layers import *
|
| 25 |
+
from tsai.data.validation import combine_split_data
|
| 26 |
+
|
| 27 |
+
#hide
|
| 28 |
+
from tsai.all import *
|
| 29 |
+
|
| 30 |
+
#export
|
| 31 |
+
class DCAE_torch(Module):
|
| 32 |
+
def __init__(self, c_in, seq_len, delta, nfs=[64, 32, 12], kss=[10, 5, 5],
|
| 33 |
+
pool_szs=[2,2,3], output_fsz=10):
|
| 34 |
+
"""
|
| 35 |
+
Create a Deep Convolutional Autoencoder for multivariate time series of `d` dimensions,
|
| 36 |
+
sliced with a window size of `w`. The parameter `delta` sets the number of latent features that will be
|
| 37 |
+
contained in the Dense layer of the network. The the number of features
|
| 38 |
+
maps (filters), the filter size and the pool size can also be adjusted."
|
| 39 |
+
"""
|
| 40 |
+
assert all_equal([len(x) for x in [nfs, kss, pool_szs]], np.repeat(len(nfs), 3)), \
|
| 41 |
+
'nfs, kss, and pool_szs must have the same length'
|
| 42 |
+
assert np.prod(pool_szs) == nfs[-1], \
|
| 43 |
+
'The number of filters in the last conv layer must be equal to the product of pool sizes'
|
| 44 |
+
assert seq_len % np.prod(pool_szs) == 0, \
|
| 45 |
+
'The product of pool sizes must be a divisor of the window size'
|
| 46 |
+
layers = []
|
| 47 |
+
for i in range_of(kss):
|
| 48 |
+
layers += [Conv1d(ni=nfs[i-1] if i>0 else c_in, nf=nfs[i], ks=kss[i]),
|
| 49 |
+
nn.MaxPool1d(kernel_size=pool_szs[i])]
|
| 50 |
+
self.downsample = nn.Sequential(*layers)
|
| 51 |
+
self.bottleneck = nn.Sequential(OrderedDict([
|
| 52 |
+
('flatten', nn.Flatten()),
|
| 53 |
+
('latent_in', nn.Linear(seq_len, delta)),
|
| 54 |
+
('latent_out', nn.Linear(delta, seq_len)),
|
| 55 |
+
('reshape', Reshape(nfs[-1], seq_len // np.prod(pool_szs)))
|
| 56 |
+
]))
|
| 57 |
+
layers = []
|
| 58 |
+
for i in reversed(range_of(kss)):
|
| 59 |
+
layers += [Conv1d(ni=nfs[i+1] if i != (len(nfs)-1) else nfs[-1],
|
| 60 |
+
nf=nfs[i], ks=kss[i]),
|
| 61 |
+
nn.Upsample(scale_factor=pool_szs[i])]
|
| 62 |
+
layers += [Conv1d(ni=nfs[0], nf=c_in, kernel_size=output_fsz)]
|
| 63 |
+
self.upsample = nn.Sequential(*layers)
|
| 64 |
+
|
| 65 |
+
def forward(self, x):
|
| 66 |
+
x = self.downsample(x)
|
| 67 |
+
x = self.bottleneck(x)
|
| 68 |
+
x = self.upsample(x)
|
| 69 |
+
return x
|
| 70 |
+
|
| 71 |
+
#hide
|
| 72 |
+
foo = torch.rand(3, 1, 48)
|
| 73 |
+
m = DCAE_torch(c_in=foo.shape[1], seq_len=foo.shape[2], delta=12)
|
| 74 |
+
m(foo).shape
|
| 75 |
+
|
| 76 |
+
#export
|
| 77 |
+
ENCODER_EMBS_MODULE_NAME = {
|
| 78 |
+
InceptionTimePlus: 'backbone', # for mvp based models
|
| 79 |
+
DCAE_torch: 'bottleneck.latent_in'
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
#export
|
| 83 |
+
def get_enc_embs(X, enc_learn, module=None, cpu=False, average_seq_dim=True, to_numpy=True):
|
| 84 |
+
"""
|
| 85 |
+
Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
|
| 86 |
+
learner. By default, the embeddings are obtained from the last layer
|
| 87 |
+
before the model head, although any layer can be passed to `model`.
|
| 88 |
+
Input
|
| 89 |
+
- `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
|
| 90 |
+
- `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
|
| 91 |
+
- `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
|
| 92 |
+
"""
|
| 93 |
+
if cpu:
|
| 94 |
+
print("--> Get enc embs CPU")
|
| 95 |
+
enc_learn.dls.cpu()
|
| 96 |
+
enc_learn.cpu()
|
| 97 |
+
else:
|
| 98 |
+
print("--> Use CUDA |Get enc embs GPU")
|
| 99 |
+
enc_learn.dls.cuda()
|
| 100 |
+
enc_learn.cuda()
|
| 101 |
+
print("devices: ", enc_learn.dls.device, enc_learn.model.device)
|
| 102 |
+
print("Use CUDA -->")
|
| 103 |
+
if enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
|
| 104 |
+
print("--> Get enc embs bs: ", enc_learn.dls.bs)
|
| 105 |
+
aux_dl = enc_learn.dls.valid.new_dl(X=X)
|
| 106 |
+
aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
|
| 107 |
+
module = nested_attr(enc_learn.model,
|
| 108 |
+
ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) \
|
| 109 |
+
if module is None else module
|
| 110 |
+
embs = [get_acts_and_grads(model=enc_learn.model,
|
| 111 |
+
modules=module,
|
| 112 |
+
x=xb[0], cpu=cpu)[0] for xb in aux_dl]
|
| 113 |
+
embs = to_concat(embs)
|
| 114 |
+
if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
|
| 115 |
+
if to_numpy: embs = embs.numpy() if cpu else embs.cpu().numpy()
|
| 116 |
+
return embs
|
| 117 |
+
|
| 118 |
+
#hide
|
| 119 |
+
import wandb
|
| 120 |
+
from dvats.utils import *
|
| 121 |
+
wandb_api = wandb.Api()
|
| 122 |
+
enc_artifact = wandb_api.artifact('deepvats/mvp:latest')
|
| 123 |
+
enc_learner = enc_artifact.to_obj()
|
| 124 |
+
X = torch.rand(9, 1, 48)
|
| 125 |
+
|
| 126 |
+
#hide
|
| 127 |
+
#slow
|
| 128 |
+
#%%time
|
| 129 |
+
embs = get_enc_embs(X, enc_learner, cpu=True)
|
| 130 |
+
test_eq(embs.shape[0], X.shape[0])
|
| 131 |
+
embs.shape, embs.__class__
|
| 132 |
+
|
| 133 |
+
#hide
|
| 134 |
+
%%time
|
| 135 |
+
embs = get_enc_embs(X, enc_learner, cpu=False, to_numpy=False)
|
| 136 |
+
test_eq(embs.shape[0], X.shape[0])
|
| 137 |
+
embs.shape, embs.__class__, embs.device
|
| 138 |
+
|
| 139 |
+
#hide
|
| 140 |
+
%%time
|
| 141 |
+
embs = get_enc_embs(X, enc_learner, cpu=False, to_numpy=True)
|
| 142 |
+
test_eq(embs.shape[0], X.shape[0])
|
| 143 |
+
embs.shape, embs.__class__
|
| 144 |
+
|
| 145 |
+
#hide
|
| 146 |
+
|
| 147 |
+
#from nbdev.export import notebook2script
|
| 148 |
+
|
| 149 |
+
#notebook2script()
|
| 150 |
+
|
| 151 |
+
#from tsai import nb2py
|
| 152 |
+
#nb2py
|
| 153 |
+
#beep(1)
|
dvats/.ipynb_checkpoints/imports-checkpoint.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from IPython.display import Audio, display, HTML, Javascript, clear_output # from tsai
|
| 2 |
+
import importlib
|
| 3 |
+
import numpy as np
|
| 4 |
+
import time
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
##
|
| 8 |
+
# Constants
|
| 9 |
+
##
|
| 10 |
+
WANDB_ARTIFACTS_DIR = 'data/wandb_artifacts'
|
| 11 |
+
|
| 12 |
+
# General purpose functions
|
| 13 |
+
def beep(inp=1, duration=.1, n=1):
|
| 14 |
+
rate = 10000
|
| 15 |
+
mult = 1.6 * inp if inp else .08
|
| 16 |
+
wave = np.sin(mult*np.arange(rate*duration))
|
| 17 |
+
for i in range(n):
|
| 18 |
+
display(Audio(wave, rate=10000, autoplay=True))
|
| 19 |
+
time.sleep(duration / .1)
|
| 20 |
+
|
| 21 |
+
def m_reload(package_name):
|
| 22 |
+
for k,v in sys.modules.items():
|
| 23 |
+
if k.startswith(package_name):
|
| 24 |
+
importlib.reload(v)
|
dvats/.ipynb_checkpoints/load-checkpoint.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/load.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['TSArtifact', 'infer_or_inject_freq']
|
| 5 |
+
|
| 6 |
+
# %% ../nbs/load.ipynb 2
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import numpy as np
|
| 9 |
+
from fastcore.all import *
|
| 10 |
+
import wandb
|
| 11 |
+
from datetime import datetime, timedelta
|
| 12 |
+
from .imports import *
|
| 13 |
+
from .utils import *
|
| 14 |
+
import pickle
|
| 15 |
+
import pyarrow.feather as ft
|
| 16 |
+
|
| 17 |
+
# %% ../nbs/load.ipynb 7
|
| 18 |
+
class TSArtifact(wandb.Artifact):
|
| 19 |
+
|
| 20 |
+
default_storage_path = Path(Path.home()/'data/wandb_artifacts/')
|
| 21 |
+
date_format = '%Y-%m-%d %H:%M:%S' # TODO add milliseconds
|
| 22 |
+
handle_missing_values_techniques = {
|
| 23 |
+
'linear_interpolation': lambda df : df.interpolate(method='linear', limit_direction='both'),
|
| 24 |
+
'overall_mean': lambda df : df.fillna(df.mean()),
|
| 25 |
+
'overall_median': lambda df : df.fillna(df.median()),
|
| 26 |
+
'backward_fill' : lambda df : df.fillna(method='bfill'),
|
| 27 |
+
'forward_fill' : lambda df : df.fillna(method='ffill')
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
"Class that represents a wandb artifact containing time series data. sd stands for start_date \
|
| 31 |
+
and ed for end_date. Both should be pd.Timestamps"
|
| 32 |
+
|
| 33 |
+
@delegates(wandb.Artifact.__init__)
|
| 34 |
+
def __init__(self, name, sd:pd.Timestamp, ed:pd.Timestamp, **kwargs):
|
| 35 |
+
super().__init__(type='dataset', name=name, **kwargs)
|
| 36 |
+
self.sd = sd
|
| 37 |
+
self.ed = ed
|
| 38 |
+
if self.metadata is None:
|
| 39 |
+
self.metadata = dict()
|
| 40 |
+
self.metadata['TS'] = dict(sd = self.sd.strftime(self.date_format),
|
| 41 |
+
ed = self.ed.strftime(self.date_format))
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@classmethod
|
| 45 |
+
def from_daily_csv_files(cls, root_path, fread=pd.read_csv, start_date=None, end_date=None, metadata=None, **kwargs):
|
| 46 |
+
|
| 47 |
+
"Create a wandb artifact of type `dataset`, containing the CSV files from `start_date` \
|
| 48 |
+
to `end_date`. Dates must be pased as `datetime.datetime` objects. If a `wandb_run` is \
|
| 49 |
+
defined, the created artifact will be logged to that run, using the longwall name as \
|
| 50 |
+
artifact name, and the date range as version."
|
| 51 |
+
|
| 52 |
+
return None
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@classmethod
|
| 56 |
+
@delegates(__init__)
|
| 57 |
+
def from_df(cls, df:pd.DataFrame, name:str, path:str=None, sd:pd.Timestamp=None, ed:pd.Timestamp=None,
|
| 58 |
+
normalize:bool=False, missing_values_technique:str=None, resampling_freq:str=None, **kwargs):
|
| 59 |
+
|
| 60 |
+
"""
|
| 61 |
+
Create a TSArtifact of type `dataset`, using the DataFrame `df` samples from \
|
| 62 |
+
`sd` (start date) to `ed` (end date). Dates must be passed as `datetime.datetime` \
|
| 63 |
+
objects. The transformed DataFrame is stored as a pickle file in the path `path` \
|
| 64 |
+
and its reference is added to the artifact entries. Additionally, the dataset can \
|
| 65 |
+
be normalized (see `normalize` argument) or transformed using missing values \
|
| 66 |
+
handling techniques (see `missing_values_technique` argument) or resampling (see \
|
| 67 |
+
`resampling_freq` argument).
|
| 68 |
+
|
| 69 |
+
Arguments:
|
| 70 |
+
df: (DataFrame) The dataframe you want to convert into an artifact.
|
| 71 |
+
name: (str) The artifact name.
|
| 72 |
+
path: (str, optional) The path where the file, containing the new transformed \
|
| 73 |
+
dataframe, is saved. Default None.
|
| 74 |
+
sd: (sd, optional) Start date. By default, the first index of `df` is taken.
|
| 75 |
+
ed: (ed, optional) End date. By default, the last index of `df` is taken.
|
| 76 |
+
normalize: (bool, optional) If the dataset values should be normalized. Default\
|
| 77 |
+
False.
|
| 78 |
+
missing_values_technique: (str, optional) The technique used to handle missing \
|
| 79 |
+
values. Options: "linear_iterpolation", "overall_mean", "overall_median" or \
|
| 80 |
+
None. Default None.
|
| 81 |
+
resampling_freq: (str, optional) The offset string or object representing \
|
| 82 |
+
frequency conversion for time series resampling. Default None.
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
TSArtifact object.
|
| 86 |
+
"""
|
| 87 |
+
sd = df.index[0] if sd is None else sd
|
| 88 |
+
ed = df.index[-1] if ed is None else ed
|
| 89 |
+
obj = cls(name, sd=sd, ed=ed, **kwargs)
|
| 90 |
+
df = df.query('@obj.sd <= index <= @obj.ed')
|
| 91 |
+
obj.metadata['TS']['created'] = 'from-df'
|
| 92 |
+
obj.metadata['TS']['n_vars'] = df.columns.__len__()
|
| 93 |
+
|
| 94 |
+
# Handle Missing Values
|
| 95 |
+
df = obj.handle_missing_values_techniques[missing_values_technique](df) if missing_values_technique is not None else df
|
| 96 |
+
obj.metadata['TS']['handle_missing_values_technique'] = missing_values_technique.__str__()
|
| 97 |
+
obj.metadata['TS']['has_missing_values'] = np.any(df.isna().values).__str__()
|
| 98 |
+
|
| 99 |
+
# Indexing and Resampling
|
| 100 |
+
if resampling_freq: df = df.resample(resampling_freq).mean()
|
| 101 |
+
obj.metadata['TS']['n_samples'] = len(df)
|
| 102 |
+
obj.metadata['TS']['freq'] = str(df.index.freq)
|
| 103 |
+
|
| 104 |
+
# Time Series Variables
|
| 105 |
+
obj.metadata['TS']['vars'] = list(df.columns)
|
| 106 |
+
|
| 107 |
+
# Normalization - Save the previous means and stds
|
| 108 |
+
if normalize:
|
| 109 |
+
obj.metadata['TS']['normalization'] = dict(means = df.describe().loc['mean'].to_dict(),
|
| 110 |
+
stds = df.describe().loc['std'].to_dict())
|
| 111 |
+
df = normalize_columns(df)
|
| 112 |
+
|
| 113 |
+
# Hash and save
|
| 114 |
+
hash_code = str(pd.util.hash_pandas_object(df).sum()) # str(hash(df.values.tobytes()))
|
| 115 |
+
path = obj.default_storage_path/f'{hash_code}' if path is None else Path(path)/f'{hash_code}.feather'
|
| 116 |
+
ft.write_feather(df, path)
|
| 117 |
+
obj.metadata['TS']['hash'] = hash_code
|
| 118 |
+
obj.add_file(str(path))
|
| 119 |
+
|
| 120 |
+
return obj
|
| 121 |
+
|
| 122 |
+
# %% ../nbs/load.ipynb 11
|
| 123 |
+
@patch
|
| 124 |
+
def to_df(self:wandb.apis.public.Artifact):
|
| 125 |
+
"Download the files of a saved wandb artifact and process them as a single dataframe. The artifact must \
|
| 126 |
+
come from a call to `run.use_artifact` with a proper wandb run."
|
| 127 |
+
# The way we have to ensure that the argument comes from a TS arfitact is the metadata
|
| 128 |
+
if self.metadata.get('TS') is None:
|
| 129 |
+
print(f'ERROR:{self} does not come from a logged TSArtifact')
|
| 130 |
+
return None
|
| 131 |
+
dir = Path(self.download())
|
| 132 |
+
if self.metadata['TS']['created'] == 'from-df':
|
| 133 |
+
# Call read_pickle with the single file from dir
|
| 134 |
+
#return pd.read_pickle(dir.ls()[0])
|
| 135 |
+
return ft.read_feather(dir.ls()[0])
|
| 136 |
+
else:
|
| 137 |
+
print("ERROR: Only from_df method is allowed yet")
|
| 138 |
+
|
| 139 |
+
# %% ../nbs/load.ipynb 13
|
| 140 |
+
@patch
|
| 141 |
+
def to_tsartifact(self:wandb.apis.public.Artifact):
|
| 142 |
+
"Cast an artifact as a TS artifact. The artifact must have been created from one of the \
|
| 143 |
+
class creation methods of the class `TSArtifact`. This is useful to go back to a TSArtifact \
|
| 144 |
+
after downloading an artifact through the wand API"
|
| 145 |
+
return TSArtifact(name=self.digest, #TODO change this
|
| 146 |
+
sd=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
|
| 147 |
+
ed=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
|
| 148 |
+
description=self.description,
|
| 149 |
+
metadata=self.metadata)
|
| 150 |
+
|
| 151 |
+
# %% ../nbs/load.ipynb 15
|
| 152 |
+
@delegates(pd.to_datetime)
|
| 153 |
+
def infer_or_inject_freq(df, injected_freq='1s', start_date=None, **kwargs):
|
| 154 |
+
"""
|
| 155 |
+
Infer index frequency. If there's not a proper time index, create fake timestamps,
|
| 156 |
+
keeping the desired `injected_freq`. If that is None, set a default one of 1 second.
|
| 157 |
+
start_date: the first date of the index (int or string).
|
| 158 |
+
"""
|
| 159 |
+
inferred_freq = pd.infer_freq(df.index)
|
| 160 |
+
if inferred_freq == 'N':
|
| 161 |
+
timedelta = pd.to_timedelta(injected_freq)
|
| 162 |
+
df.index = pd.to_datetime(ifnone(start_date, 0), **kwargs) + timedelta*df.index
|
| 163 |
+
df.index.freq = pd.infer_freq(df.index)
|
| 164 |
+
else:
|
| 165 |
+
df.index.freq = inferred_freq
|
| 166 |
+
return df
|
dvats/.ipynb_checkpoints/utils-checkpoint.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/utils.ipynb (unless otherwise specified).
|
| 2 |
+
|
| 3 |
+
__all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
|
| 4 |
+
'get_wandb_artifacts', 'get_pickle_artifact']
|
| 5 |
+
|
| 6 |
+
# Cell
|
| 7 |
+
from .imports import *
|
| 8 |
+
from fastcore.all import *
|
| 9 |
+
import wandb
|
| 10 |
+
import pickle
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import numpy as np
|
| 13 |
+
#import tensorflow as tf
|
| 14 |
+
import torch.nn as nn
|
| 15 |
+
from fastai.basics import *
|
| 16 |
+
|
| 17 |
+
# Cell
|
| 18 |
+
def generate_TS_df(rows, cols):
|
| 19 |
+
"Generates a dataframe containing a multivariate time series, where each column \
|
| 20 |
+
represents a variable and each row a time point (sample). The timestamp is in the \
|
| 21 |
+
index of the dataframe, and it is created with a even space of 1 second between samples"
|
| 22 |
+
index = np.arange(pd.Timestamp.now(),
|
| 23 |
+
pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
|
| 24 |
+
pd.Timedelta(1, 'seconds'))
|
| 25 |
+
data = np.random.randn(len(index), cols)
|
| 26 |
+
return pd.DataFrame(data, index=index)
|
| 27 |
+
|
| 28 |
+
# Cell
|
| 29 |
+
def normalize_columns(df:pd.DataFrame):
|
| 30 |
+
"Normalize columns from `df` to have 0 mean and 1 standard deviation"
|
| 31 |
+
mean = df.mean()
|
| 32 |
+
std = df.std() + 1e-7
|
| 33 |
+
return (df-mean)/std
|
| 34 |
+
|
| 35 |
+
# Cell
|
| 36 |
+
def remove_constant_columns(df:pd.DataFrame):
|
| 37 |
+
return df.loc[:, (df != df.iloc[0]).any()]
|
| 38 |
+
|
| 39 |
+
# Cell
|
| 40 |
+
class ReferenceArtifact(wandb.Artifact):
|
| 41 |
+
default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
|
| 42 |
+
"This class is meant to create an artifact with a single reference to an object \
|
| 43 |
+
passed as argument in the contructor. The object will be pickled, hashed and stored \
|
| 44 |
+
in a specified folder."
|
| 45 |
+
@delegates(wandb.Artifact.__init__)
|
| 46 |
+
def __init__(self, obj, name, type='object', folder=None, **kwargs):
|
| 47 |
+
super().__init__(type=type, name=name, **kwargs)
|
| 48 |
+
# pickle dumps the object and then hash it
|
| 49 |
+
hash_code = str(hash(pickle.dumps(obj)))
|
| 50 |
+
folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
|
| 51 |
+
with open(f'{folder}/{hash_code}', 'wb') as f:
|
| 52 |
+
pickle.dump(obj, f)
|
| 53 |
+
self.add_reference(f'file://{folder}/{hash_code}')
|
| 54 |
+
if self.metadata is None:
|
| 55 |
+
self.metadata = dict()
|
| 56 |
+
self.metadata['ref'] = dict()
|
| 57 |
+
self.metadata['ref']['hash'] = hash_code
|
| 58 |
+
self.metadata['ref']['type'] = str(obj.__class__)
|
| 59 |
+
|
| 60 |
+
# Cell
|
| 61 |
+
@patch
|
| 62 |
+
def to_obj(self:wandb.apis.public.Artifact):
|
| 63 |
+
"""Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
|
| 64 |
+
come from a call to `run.use_artifact` with a proper wandb run."""
|
| 65 |
+
if self.metadata.get('ref') is None:
|
| 66 |
+
print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
|
| 67 |
+
return None
|
| 68 |
+
original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
|
| 69 |
+
path = original_path if original_path.exists() else Path(self.download()).ls()[0]
|
| 70 |
+
with open(path, 'rb') as f:
|
| 71 |
+
obj = pickle.load(f)
|
| 72 |
+
return obj
|
| 73 |
+
|
| 74 |
+
# Cell
|
| 75 |
+
import torch.nn as nn
|
| 76 |
+
class PrintLayer(nn.Module):
|
| 77 |
+
def __init__(self):
|
| 78 |
+
super(PrintLayer, self).__init__()
|
| 79 |
+
|
| 80 |
+
def forward(self, x):
|
| 81 |
+
# Do your print / debug stuff here
|
| 82 |
+
print(x.shape)
|
| 83 |
+
return x
|
| 84 |
+
|
| 85 |
+
# Cell
|
| 86 |
+
@patch
|
| 87 |
+
def export_and_get(self:Learner, keep_exported_file=False):
|
| 88 |
+
"""
|
| 89 |
+
Export the learner into an auxiliary file, load it and return it back.
|
| 90 |
+
"""
|
| 91 |
+
aux_path = Path('aux.pkl')
|
| 92 |
+
self.export(fname='aux.pkl')
|
| 93 |
+
aux_learn = load_learner('aux.pkl')
|
| 94 |
+
if not keep_exported_file: aux_path.unlink()
|
| 95 |
+
return aux_learn
|
| 96 |
+
|
| 97 |
+
# Cell
|
| 98 |
+
def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
|
| 99 |
+
"""
|
| 100 |
+
Get the artifacts logged in a wandb project.
|
| 101 |
+
Input:
|
| 102 |
+
- `project_path` (str): entity/project_name
|
| 103 |
+
- `type` (str): whether to return only one type of artifacts
|
| 104 |
+
- `name` (str): Leave none to have all artifact names
|
| 105 |
+
- `last_version`: whether to return only the last version of each artifact or not
|
| 106 |
+
|
| 107 |
+
Output: List of artifacts
|
| 108 |
+
"""
|
| 109 |
+
public_api = wandb.Api()
|
| 110 |
+
if type is not None:
|
| 111 |
+
types = [public_api.artifact_type(type, project_path)]
|
| 112 |
+
else:
|
| 113 |
+
types = public_api.artifact_types(project_path)
|
| 114 |
+
|
| 115 |
+
res = L()
|
| 116 |
+
for kind in types:
|
| 117 |
+
for collection in kind.collections():
|
| 118 |
+
if name is None or name == collection.name:
|
| 119 |
+
versions = public_api.artifact_versions(
|
| 120 |
+
kind.type,
|
| 121 |
+
"/".join([kind.entity, kind.project, collection.name]),
|
| 122 |
+
per_page=1,
|
| 123 |
+
)
|
| 124 |
+
if last_version: res += next(versions)
|
| 125 |
+
else: res += L(versions)
|
| 126 |
+
return list(res)
|
| 127 |
+
|
| 128 |
+
# Cell
|
| 129 |
+
def get_pickle_artifact(filename):
|
| 130 |
+
|
| 131 |
+
with open(filename, "rb") as f:
|
| 132 |
+
df = pickle.load(f)
|
| 133 |
+
|
| 134 |
+
return df
|
dvats/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__version__ = "0.0.1"
|
dvats/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (147 Bytes). View file
|
|
|
dvats/__pycache__/all.cpython-310.pyc
ADDED
|
Binary file (273 Bytes). View file
|
|
|
dvats/__pycache__/dr.cpython-310.pyc
ADDED
|
Binary file (4.12 kB). View file
|
|
|
dvats/__pycache__/encoder.cpython-310.pyc
ADDED
|
Binary file (10.5 kB). View file
|
|
|
dvats/__pycache__/imports.cpython-310.pyc
ADDED
|
Binary file (940 Bytes). View file
|
|
|
dvats/__pycache__/load.cpython-310.pyc
ADDED
|
Binary file (7.04 kB). View file
|
|
|
dvats/__pycache__/utils.cpython-310.pyc
ADDED
|
Binary file (7.84 kB). View file
|
|
|
dvats/__pycache__/visualization.cpython-310.pyc
ADDED
|
Binary file (2.22 kB). View file
|
|
|
dvats/__pycache__/xai.cpython-310.pyc
ADDED
|
Binary file (26.1 kB). View file
|
|
|
dvats/_modidx.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Autogenerated by nbdev
|
| 2 |
+
|
| 3 |
+
d = { 'settings': { 'branch': 'master',
|
| 4 |
+
'doc_baseurl': '/dvats/',
|
| 5 |
+
'doc_host': 'https://vrodriguezf.github.io',
|
| 6 |
+
'git_url': 'https://github.com/vrodriguezf/deepvats',
|
| 7 |
+
'lib_path': 'dvats'},
|
| 8 |
+
'syms': { 'dvats.all': {},
|
| 9 |
+
'dvats.dr': { 'dvats.dr.check_compatibility': ('dr.html#check_compatibility', 'dvats/dr.py'),
|
| 10 |
+
'dvats.dr.cluster_score': ('dr.html#cluster_score', 'dvats/dr.py'),
|
| 11 |
+
'dvats.dr.color_for_percentage': ('dr.html#color_for_percentage', 'dvats/dr.py'),
|
| 12 |
+
'dvats.dr.create_bar': ('dr.html#create_bar', 'dvats/dr.py'),
|
| 13 |
+
'dvats.dr.get_PCA_prjs': ('dr.html#get_pca_prjs', 'dvats/dr.py'),
|
| 14 |
+
'dvats.dr.get_TSNE_prjs': ('dr.html#get_tsne_prjs', 'dvats/dr.py'),
|
| 15 |
+
'dvats.dr.get_UMAP_prjs': ('dr.html#get_umap_prjs', 'dvats/dr.py'),
|
| 16 |
+
'dvats.dr.get_gpu_memory': ('dr.html#get_gpu_memory', 'dvats/dr.py'),
|
| 17 |
+
'dvats.dr.gpu_memory_status': ('dr.html#gpu_memory_status', 'dvats/dr.py')},
|
| 18 |
+
'dvats.encoder': { 'dvats.encoder.DCAE_torch': ('encoder.html#dcae_torch', 'dvats/encoder.py'),
|
| 19 |
+
'dvats.encoder.DCAE_torch.__init__': ('encoder.html#__init__', 'dvats/encoder.py'),
|
| 20 |
+
'dvats.encoder.DCAE_torch.forward': ('encoder.html#forward', 'dvats/encoder.py'),
|
| 21 |
+
'dvats.encoder.color_for_percentage': ('encoder.html#color_for_percentage', 'dvats/encoder.py'),
|
| 22 |
+
'dvats.encoder.create_bar': ('encoder.html#create_bar', 'dvats/encoder.py'),
|
| 23 |
+
'dvats.encoder.get_enc_embs': ('encoder.html#get_enc_embs', 'dvats/encoder.py'),
|
| 24 |
+
'dvats.encoder.get_enc_embs_set_stride_set_batch_size': ( 'encoder.html#get_enc_embs_set_stride_set_batch_size',
|
| 25 |
+
'dvats/encoder.py'),
|
| 26 |
+
'dvats.encoder.get_gpu_memory_': ('encoder.html#get_gpu_memory_', 'dvats/encoder.py'),
|
| 27 |
+
'dvats.encoder.gpu_memory_status_': ('encoder.html#gpu_memory_status_', 'dvats/encoder.py')},
|
| 28 |
+
'dvats.imports': {},
|
| 29 |
+
'dvats.load': { 'dvats.load.TSArtifact': ('load.html#tsartifact', 'dvats/load.py'),
|
| 30 |
+
'dvats.load.TSArtifact.__init__': ('load.html#__init__', 'dvats/load.py'),
|
| 31 |
+
'dvats.load.TSArtifact.from_daily_csv_files': ('load.html#from_daily_csv_files', 'dvats/load.py'),
|
| 32 |
+
'dvats.load.TSArtifact.from_df': ('load.html#from_df', 'dvats/load.py'),
|
| 33 |
+
'dvats.load.infer_or_inject_freq': ('load.html#infer_or_inject_freq', 'dvats/load.py'),
|
| 34 |
+
'dvats.load.wandb.apis.public.Artifact.to_df': ('load.html#wandb.apis.public.artifact.to_df', 'dvats/load.py'),
|
| 35 |
+
'dvats.load.wandb.apis.public.Artifact.to_tsartifact': ( 'load.html#wandb.apis.public.artifact.to_tsartifact',
|
| 36 |
+
'dvats/load.py')},
|
| 37 |
+
'dvats.utils': { 'dvats.utils.Learner.export_and_get': ('utils.html#learner.export_and_get', 'dvats/utils.py'),
|
| 38 |
+
'dvats.utils.PrintLayer': ('utils.html#printlayer', 'dvats/utils.py'),
|
| 39 |
+
'dvats.utils.PrintLayer.__init__': ('utils.html#__init__', 'dvats/utils.py'),
|
| 40 |
+
'dvats.utils.PrintLayer.forward': ('utils.html#forward', 'dvats/utils.py'),
|
| 41 |
+
'dvats.utils.ReferenceArtifact': ('utils.html#referenceartifact', 'dvats/utils.py'),
|
| 42 |
+
'dvats.utils.ReferenceArtifact.__init__': ('utils.html#__init__', 'dvats/utils.py'),
|
| 43 |
+
'dvats.utils.exec_with_and_feather_k_output': ('utils.html#exec_with_and_feather_k_output', 'dvats/utils.py'),
|
| 44 |
+
'dvats.utils.exec_with_feather': ('utils.html#exec_with_feather', 'dvats/utils.py'),
|
| 45 |
+
'dvats.utils.exec_with_feather_k_output': ('utils.html#exec_with_feather_k_output', 'dvats/utils.py'),
|
| 46 |
+
'dvats.utils.generate_TS_df': ('utils.html#generate_ts_df', 'dvats/utils.py'),
|
| 47 |
+
'dvats.utils.get_pickle_artifact': ('utils.html#get_pickle_artifact', 'dvats/utils.py'),
|
| 48 |
+
'dvats.utils.get_wandb_artifacts': ('utils.html#get_wandb_artifacts', 'dvats/utils.py'),
|
| 49 |
+
'dvats.utils.learner_module_leaves': ('utils.html#learner_module_leaves', 'dvats/utils.py'),
|
| 50 |
+
'dvats.utils.learner_module_leaves_subtables': ( 'utils.html#learner_module_leaves_subtables',
|
| 51 |
+
'dvats/utils.py'),
|
| 52 |
+
'dvats.utils.normalize_columns': ('utils.html#normalize_columns', 'dvats/utils.py'),
|
| 53 |
+
'dvats.utils.py_function': ('utils.html#py_function', 'dvats/utils.py'),
|
| 54 |
+
'dvats.utils.remove_constant_columns': ('utils.html#remove_constant_columns', 'dvats/utils.py'),
|
| 55 |
+
'dvats.utils.wandb.apis.public.Artifact.to_obj': ( 'utils.html#wandb.apis.public.artifact.to_obj',
|
| 56 |
+
'dvats/utils.py')},
|
| 57 |
+
'dvats.visualization': { 'dvats.visualization.plot_TS': ('visualization.html#plot_ts', 'dvats/visualization.py'),
|
| 58 |
+
'dvats.visualization.plot_mask': ('visualization.html#plot_mask', 'dvats/visualization.py'),
|
| 59 |
+
'dvats.visualization.plot_validation_ts_ae': ( 'visualization.html#plot_validation_ts_ae',
|
| 60 |
+
'dvats/visualization.py')},
|
| 61 |
+
'dvats.xai': { 'dvats.xai.InteractiveAnomalyPlot': ('xai.html#interactiveanomalyplot', 'dvats/xai.py'),
|
| 62 |
+
'dvats.xai.InteractiveAnomalyPlot.__init__': ('xai.html#__init__', 'dvats/xai.py'),
|
| 63 |
+
'dvats.xai.InteractiveAnomalyPlot.plot_projections_clusters_interactive': ( 'xai.html#plot_projections_clusters_interactive',
|
| 64 |
+
'dvats/xai.py'),
|
| 65 |
+
'dvats.xai.InteractiveTSPlot': ('xai.html#interactivetsplot', 'dvats/xai.py'),
|
| 66 |
+
'dvats.xai.InteractiveTSPlot.__init__': ('xai.html#__init__', 'dvats/xai.py'),
|
| 67 |
+
'dvats.xai.add_movement_buttons': ('xai.html#add_movement_buttons', 'dvats/xai.py'),
|
| 68 |
+
'dvats.xai.add_selected_features': ('xai.html#add_selected_features', 'dvats/xai.py'),
|
| 69 |
+
'dvats.xai.add_windows': ('xai.html#add_windows', 'dvats/xai.py'),
|
| 70 |
+
'dvats.xai.anomaly_score': ('xai.html#anomaly_score', 'dvats/xai.py'),
|
| 71 |
+
'dvats.xai.calculate_cluster_stats': ('xai.html#calculate_cluster_stats', 'dvats/xai.py'),
|
| 72 |
+
'dvats.xai.delta_x_bigger': ('xai.html#delta_x_bigger', 'dvats/xai.py'),
|
| 73 |
+
'dvats.xai.delta_x_lower': ('xai.html#delta_x_lower', 'dvats/xai.py'),
|
| 74 |
+
'dvats.xai.delta_y_bigger': ('xai.html#delta_y_bigger', 'dvats/xai.py'),
|
| 75 |
+
'dvats.xai.delta_y_lower': ('xai.html#delta_y_lower', 'dvats/xai.py'),
|
| 76 |
+
'dvats.xai.detector': ('xai.html#detector', 'dvats/xai.py'),
|
| 77 |
+
'dvats.xai.get_anomalies': ('xai.html#get_anomalies', 'dvats/xai.py'),
|
| 78 |
+
'dvats.xai.get_anomaly_styles': ('xai.html#get_anomaly_styles', 'dvats/xai.py'),
|
| 79 |
+
'dvats.xai.get_dataset': ('xai.html#get_dataset', 'dvats/xai.py'),
|
| 80 |
+
'dvats.xai.get_dateformat': ('xai.html#get_dateformat', 'dvats/xai.py'),
|
| 81 |
+
'dvats.xai.get_df_selected': ('xai.html#get_df_selected', 'dvats/xai.py'),
|
| 82 |
+
'dvats.xai.get_embeddings': ('xai.html#get_embeddings', 'dvats/xai.py'),
|
| 83 |
+
'dvats.xai.get_prjs': ('xai.html#get_prjs', 'dvats/xai.py'),
|
| 84 |
+
'dvats.xai.initial_plot': ('xai.html#initial_plot', 'dvats/xai.py'),
|
| 85 |
+
'dvats.xai.merge_overlapping_windows': ('xai.html#merge_overlapping_windows', 'dvats/xai.py'),
|
| 86 |
+
'dvats.xai.move_down': ('xai.html#move_down', 'dvats/xai.py'),
|
| 87 |
+
'dvats.xai.move_left': ('xai.html#move_left', 'dvats/xai.py'),
|
| 88 |
+
'dvats.xai.move_right': ('xai.html#move_right', 'dvats/xai.py'),
|
| 89 |
+
'dvats.xai.move_up': ('xai.html#move_up', 'dvats/xai.py'),
|
| 90 |
+
'dvats.xai.plot_anomaly_scores_distribution': ('xai.html#plot_anomaly_scores_distribution', 'dvats/xai.py'),
|
| 91 |
+
'dvats.xai.plot_clusters_with_anomalies': ('xai.html#plot_clusters_with_anomalies', 'dvats/xai.py'),
|
| 92 |
+
'dvats.xai.plot_clusters_with_anomalies_interactive_plot': ( 'xai.html#plot_clusters_with_anomalies_interactive_plot',
|
| 93 |
+
'dvats/xai.py'),
|
| 94 |
+
'dvats.xai.plot_initial_config': ('xai.html#plot_initial_config', 'dvats/xai.py'),
|
| 95 |
+
'dvats.xai.plot_projections': ('xai.html#plot_projections', 'dvats/xai.py'),
|
| 96 |
+
'dvats.xai.plot_projections_clusters': ('xai.html#plot_projections_clusters', 'dvats/xai.py'),
|
| 97 |
+
'dvats.xai.plot_save': ('xai.html#plot_save', 'dvats/xai.py'),
|
| 98 |
+
'dvats.xai.set_features_buttons': ('xai.html#set_features_buttons', 'dvats/xai.py'),
|
| 99 |
+
'dvats.xai.setup_boxes': ('xai.html#setup_boxes', 'dvats/xai.py'),
|
| 100 |
+
'dvats.xai.setup_style': ('xai.html#setup_style', 'dvats/xai.py'),
|
| 101 |
+
'dvats.xai.shift_datetime': ('xai.html#shift_datetime', 'dvats/xai.py'),
|
| 102 |
+
'dvats.xai.show': ('xai.html#show', 'dvats/xai.py'),
|
| 103 |
+
'dvats.xai.toggle_trace': ('xai.html#toggle_trace', 'dvats/xai.py'),
|
| 104 |
+
'dvats.xai.umap_parameters': ('xai.html#umap_parameters', 'dvats/xai.py'),
|
| 105 |
+
'dvats.xai.update_plot': ('xai.html#update_plot', 'dvats/xai.py')}}}
|
dvats/_nbdev.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED BY NBDEV! DO NOT EDIT!
|
| 2 |
+
|
| 3 |
+
__all__ = ["index", "modules", "custom_doc_links", "git_url"]
|
| 4 |
+
|
| 5 |
+
index = {"check_compatibility": "dr.ipynb",
|
| 6 |
+
"get_UMAP_prjs": "dr.ipynb",
|
| 7 |
+
"get_PCA_prjs": "dr.ipynb",
|
| 8 |
+
"get_TSNE_prjs": "dr.ipynb",
|
| 9 |
+
"DCAE_torch": "encoder.ipynb",
|
| 10 |
+
"ENCODER_EMBS_MODULE_NAME": "encoder.ipynb",
|
| 11 |
+
"get_enc_embs": "encoder.ipynb",
|
| 12 |
+
"TSArtifact": "load.ipynb",
|
| 13 |
+
"wandb.apis.public.Artifact.to_df": "load.ipynb",
|
| 14 |
+
"wandb.apis.public.Artifact.to_tsartifact": "load.ipynb",
|
| 15 |
+
"infer_or_inject_freq": "load.ipynb",
|
| 16 |
+
"generate_TS_df": "utils.ipynb",
|
| 17 |
+
"normalize_columns": "utils.ipynb",
|
| 18 |
+
"remove_constant_columns": "utils.ipynb",
|
| 19 |
+
"ReferenceArtifact": "utils.ipynb",
|
| 20 |
+
"wandb.apis.public.Artifact.to_obj": "utils.ipynb",
|
| 21 |
+
"PrintLayer": "utils.ipynb",
|
| 22 |
+
"Learner.export_and_get": "utils.ipynb",
|
| 23 |
+
"get_wandb_artifacts": "utils.ipynb",
|
| 24 |
+
"get_pickle_artifact": "utils.ipynb",
|
| 25 |
+
"plot_TS": "visualization.ipynb",
|
| 26 |
+
"plot_validation_ts_ae": "visualization.ipynb",
|
| 27 |
+
"plot_mask": "visualization.ipynb"}
|
| 28 |
+
|
| 29 |
+
modules = ["dr.py",
|
| 30 |
+
"encoder.py",
|
| 31 |
+
"load.py",
|
| 32 |
+
"utils.py",
|
| 33 |
+
"visualization.py"]
|
| 34 |
+
|
| 35 |
+
doc_url = "https://vrodriguezf.github.io/tchub/"
|
| 36 |
+
|
| 37 |
+
git_url = "https://gitlab.geist.re/pml/x_timecluster_extension/tree/master/"
|
| 38 |
+
|
| 39 |
+
def custom_doc_links(name): return None
|
dvats/all.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dvats
|
| 2 |
+
from .imports import *
|
| 3 |
+
from .load import *
|
| 4 |
+
from .utils import *
|
| 5 |
+
from .dr import *
|
| 6 |
+
from .encoder import *
|
| 7 |
+
from .visualization import *
|
| 8 |
+
from .xai import *
|
dvats/dr.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/dr.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['get_gpu_memory', 'color_for_percentage', 'create_bar', 'gpu_memory_status', 'check_compatibility', 'get_UMAP_prjs',
|
| 5 |
+
'get_PCA_prjs', 'get_TSNE_prjs', 'cluster_score']
|
| 6 |
+
|
| 7 |
+
# %% ../nbs/dr.ipynb 2
|
| 8 |
+
import subprocess
|
| 9 |
+
def get_gpu_memory(device = 0):
|
| 10 |
+
total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
|
| 11 |
+
total_memory = int(total_memory.decode().split('\n')[0])
|
| 12 |
+
used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
|
| 13 |
+
used_memory = int(used_memory.decode().split('\n')[0])
|
| 14 |
+
|
| 15 |
+
percentage = round((used_memory / total_memory) * 100)
|
| 16 |
+
return used_memory, total_memory, percentage
|
| 17 |
+
|
| 18 |
+
def color_for_percentage(percentage):
|
| 19 |
+
if percentage < 20:
|
| 20 |
+
return "\033[90m" # Gray
|
| 21 |
+
elif percentage < 40:
|
| 22 |
+
return "\033[94m" # Blue
|
| 23 |
+
elif percentage < 60:
|
| 24 |
+
return "\033[92m" # Green
|
| 25 |
+
elif percentage < 80:
|
| 26 |
+
return "\033[93m" # Orange
|
| 27 |
+
else:
|
| 28 |
+
return "\033[91m" # Red
|
| 29 |
+
|
| 30 |
+
def create_bar(percentage, color_code, length=20):
|
| 31 |
+
filled_length = int(length * percentage // 100)
|
| 32 |
+
bar = "█" * filled_length + "-" * (length - filled_length)
|
| 33 |
+
return color_code + bar + "\033[0m" # Apply color and reset after bar
|
| 34 |
+
|
| 35 |
+
def gpu_memory_status(device=0):
|
| 36 |
+
used, total, percentage = get_gpu_memory(device)
|
| 37 |
+
color_code = color_for_percentage(percentage)
|
| 38 |
+
bar = create_bar(percentage, color_code)
|
| 39 |
+
print(f"GPU | Used mem: {used}")
|
| 40 |
+
print(f"GPU | Used mem: {total}")
|
| 41 |
+
print(f"GPU | Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
|
| 42 |
+
|
| 43 |
+
# %% ../nbs/dr.ipynb 4
|
| 44 |
+
import umap
|
| 45 |
+
import cudf
|
| 46 |
+
import cuml
|
| 47 |
+
import pandas as pd
|
| 48 |
+
import numpy as np
|
| 49 |
+
from fastcore.all import *
|
| 50 |
+
from .imports import *
|
| 51 |
+
from .load import TSArtifact
|
| 52 |
+
|
| 53 |
+
# %% ../nbs/dr.ipynb 5
|
| 54 |
+
def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
|
| 55 |
+
"Function to check that the artifact used by the encoder model and the artifact that is \
|
| 56 |
+
going to be passed through the DR are compatible"
|
| 57 |
+
try:
|
| 58 |
+
# Check that both artifacts have the same variables
|
| 59 |
+
chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
|
| 60 |
+
# Check that both artifacts have the same freq
|
| 61 |
+
chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
|
| 62 |
+
# Check that the dr artifact is not normalized (not normalized data has not the key normalization)
|
| 63 |
+
chk_norm = dr_ar.metadata['TS'].get('normalization') is None
|
| 64 |
+
# Check that the dr artifact has not missing values
|
| 65 |
+
chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
|
| 66 |
+
# Check all logical vars.
|
| 67 |
+
if chk_vars and chk_freq and chk_norm and chk_miss:
|
| 68 |
+
print("Artifacts are compatible.")
|
| 69 |
+
else:
|
| 70 |
+
raise Exception
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print("Artifacts are not compatible.")
|
| 73 |
+
raise e
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
# %% ../nbs/dr.ipynb 7
|
| 77 |
+
#Comment this part after 4_seconds debugged
|
| 78 |
+
import hashlib
|
| 79 |
+
|
| 80 |
+
# %% ../nbs/dr.ipynb 8
|
| 81 |
+
import warnings
|
| 82 |
+
import sys
|
| 83 |
+
from numba.core.errors import NumbaPerformanceWarning
|
| 84 |
+
@delegates(cuml.UMAP)
|
| 85 |
+
def get_UMAP_prjs(
|
| 86 |
+
input_data,
|
| 87 |
+
cpu=True,
|
| 88 |
+
print_flag = False,
|
| 89 |
+
check_memory_usage = True,
|
| 90 |
+
**kwargs
|
| 91 |
+
):
|
| 92 |
+
"Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
|
| 93 |
+
if print_flag:
|
| 94 |
+
print("--> get_UMAP_prjs")
|
| 95 |
+
print("kwargs: ", kwargs)
|
| 96 |
+
sys.stdout.flush()
|
| 97 |
+
####
|
| 98 |
+
checksum = hashlib.md5(input_data.tobytes()).hexdigest()
|
| 99 |
+
print(checksum)
|
| 100 |
+
####
|
| 101 |
+
|
| 102 |
+
if check_memory_usage: gpu_memory_status()
|
| 103 |
+
|
| 104 |
+
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
|
| 105 |
+
|
| 106 |
+
#reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
|
| 107 |
+
if cpu:
|
| 108 |
+
print("-- umap.UMAP --", cpu)
|
| 109 |
+
sys.stdout.flush()
|
| 110 |
+
reducer = umap.UMAP(**kwargs)
|
| 111 |
+
else:
|
| 112 |
+
print("-- cuml.UMAP --", cpu)
|
| 113 |
+
sys.stdout.flush()
|
| 114 |
+
if 'random_state' in kwargs:
|
| 115 |
+
kwargs['random_state'] = np.uint64(kwargs['random_state'])
|
| 116 |
+
reducer = cuml.UMAP(**kwargs)
|
| 117 |
+
|
| 118 |
+
if print_flag:
|
| 119 |
+
print("------- reducer --------")
|
| 120 |
+
print(reducer)
|
| 121 |
+
print(reducer.get_params())
|
| 122 |
+
print("------- reducer --------")
|
| 123 |
+
sys.stdout.flush()
|
| 124 |
+
|
| 125 |
+
projections = reducer.fit_transform(input_data)
|
| 126 |
+
|
| 127 |
+
if check_memory_usage: gpu_memory_status()
|
| 128 |
+
if print_flag:
|
| 129 |
+
checksum = hashlib.md5(projections.tobytes()).hexdigest()
|
| 130 |
+
print("prjs checksum ", checksum)
|
| 131 |
+
print("get_UMAP_prjs -->")
|
| 132 |
+
sys.stdout.flush()
|
| 133 |
+
return projections
|
| 134 |
+
|
| 135 |
+
# %% ../nbs/dr.ipynb 13
|
| 136 |
+
@delegates(cuml.PCA)
|
| 137 |
+
def get_PCA_prjs(X, cpu=False, **kwargs):
|
| 138 |
+
r"""
|
| 139 |
+
Computes PCA projections of X
|
| 140 |
+
"""
|
| 141 |
+
if cpu:
|
| 142 |
+
raise NotImplementedError
|
| 143 |
+
else:
|
| 144 |
+
reducer = cuml.PCA(**kwargs)
|
| 145 |
+
projections = reducer.fit_transform(X)
|
| 146 |
+
return projections
|
| 147 |
+
|
| 148 |
+
# %% ../nbs/dr.ipynb 15
|
| 149 |
+
@delegates(cuml.TSNE)
|
| 150 |
+
def get_TSNE_prjs(X, cpu=False, **kwargs):
|
| 151 |
+
r"""
|
| 152 |
+
Computes TSNE projections of X
|
| 153 |
+
"""
|
| 154 |
+
if cpu:
|
| 155 |
+
raise NotImplementedError
|
| 156 |
+
else:
|
| 157 |
+
reducer = cuml.TSNE(**kwargs)
|
| 158 |
+
projections = reducer.fit_transform(X)
|
| 159 |
+
return projections
|
| 160 |
+
|
| 161 |
+
# %% ../nbs/dr.ipynb 18
|
| 162 |
+
from sklearn.metrics import silhouette_score
|
| 163 |
+
def cluster_score(prjs, clusters_labels, print_flag):
|
| 164 |
+
score = silhouette_score(prjs, clusters_labels)
|
| 165 |
+
if print_flag: print("Silhouette_score:", score)
|
| 166 |
+
return score
|
dvats/encoder.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/encoder.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['ENCODER_EMBS_MODULE_NAME', 'get_gpu_memory_', 'color_for_percentage', 'create_bar', 'gpu_memory_status_',
|
| 5 |
+
'DCAE_torch', 'get_enc_embs', 'get_enc_embs_set_stride_set_batch_size']
|
| 6 |
+
|
| 7 |
+
# %% ../nbs/encoder.ipynb 2
|
| 8 |
+
import subprocess
|
| 9 |
+
def get_gpu_memory_(device = 0):
|
| 10 |
+
total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
|
| 11 |
+
total_memory = int(total_memory.decode().split('\n')[0])
|
| 12 |
+
used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
|
| 13 |
+
used_memory = int(used_memory.decode().split('\n')[0])
|
| 14 |
+
|
| 15 |
+
percentage = round((used_memory / total_memory) * 100)
|
| 16 |
+
return used_memory, total_memory, percentage
|
| 17 |
+
|
| 18 |
+
def color_for_percentage(percentage):
|
| 19 |
+
if percentage < 20:
|
| 20 |
+
return "\033[90m" # Gray
|
| 21 |
+
elif percentage < 40:
|
| 22 |
+
return "\033[94m" # Blue
|
| 23 |
+
elif percentage < 60:
|
| 24 |
+
return "\033[92m" # Green
|
| 25 |
+
elif percentage < 80:
|
| 26 |
+
return "\033[93m" # Orange
|
| 27 |
+
else:
|
| 28 |
+
return "\033[91m" # Red
|
| 29 |
+
|
| 30 |
+
def create_bar(percentage, color_code, length=20):
|
| 31 |
+
filled_length = int(length * percentage // 100)
|
| 32 |
+
bar = "█" * filled_length + "-" * (length - filled_length)
|
| 33 |
+
return color_code + bar + "\033[0m" # Apply color and reset after bar
|
| 34 |
+
|
| 35 |
+
def gpu_memory_status_(device=0):
|
| 36 |
+
used, total, percentage = get_gpu_memory_(device)
|
| 37 |
+
color_code = color_for_percentage(percentage)
|
| 38 |
+
bar = create_bar(percentage, color_code)
|
| 39 |
+
print(f"GPU | Used mem: {used}")
|
| 40 |
+
print(f"GPU | Used mem: {total}")
|
| 41 |
+
print(f"GPU | Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# %% ../nbs/encoder.ipynb 4
|
| 45 |
+
import pandas as pd
|
| 46 |
+
import numpy as np
|
| 47 |
+
from fastcore.all import *
|
| 48 |
+
from tsai.callback.MVP import *
|
| 49 |
+
from tsai.imports import *
|
| 50 |
+
from tsai.models.InceptionTimePlus import InceptionTimePlus
|
| 51 |
+
from tsai.models.explainability import get_acts_and_grads
|
| 52 |
+
from tsai.models.layers import *
|
| 53 |
+
from tsai.data.validation import combine_split_data
|
| 54 |
+
import time
|
| 55 |
+
|
| 56 |
+
# %% ../nbs/encoder.ipynb 7
|
| 57 |
+
class DCAE_torch(Module):
|
| 58 |
+
def __init__(self, c_in, seq_len, delta, nfs=[64, 32, 12], kss=[10, 5, 5],
|
| 59 |
+
pool_szs=[2,2,3], output_fsz=10):
|
| 60 |
+
"""
|
| 61 |
+
Create a Deep Convolutional Autoencoder for multivariate time series of `d` dimensions,
|
| 62 |
+
sliced with a window size of `w`. The parameter `delta` sets the number of latent features that will be
|
| 63 |
+
contained in the Dense layer of the network. The the number of features
|
| 64 |
+
maps (filters), the filter size and the pool size can also be adjusted."
|
| 65 |
+
"""
|
| 66 |
+
assert all_equal([len(x) for x in [nfs, kss, pool_szs]], np.repeat(len(nfs), 3)), \
|
| 67 |
+
'nfs, kss, and pool_szs must have the same length'
|
| 68 |
+
assert np.prod(pool_szs) == nfs[-1], \
|
| 69 |
+
'The number of filters in the last conv layer must be equal to the product of pool sizes'
|
| 70 |
+
assert seq_len % np.prod(pool_szs) == 0, \
|
| 71 |
+
'The product of pool sizes must be a divisor of the window size'
|
| 72 |
+
layers = []
|
| 73 |
+
for i in range_of(kss):
|
| 74 |
+
layers += [Conv1d(ni=nfs[i-1] if i>0 else c_in, nf=nfs[i], ks=kss[i]),
|
| 75 |
+
nn.MaxPool1d(kernel_size=pool_szs[i])]
|
| 76 |
+
self.downsample = nn.Sequential(*layers)
|
| 77 |
+
self.bottleneck = nn.Sequential(OrderedDict([
|
| 78 |
+
('flatten', nn.Flatten()),
|
| 79 |
+
('latent_in', nn.Linear(seq_len, delta)),
|
| 80 |
+
('latent_out', nn.Linear(delta, seq_len)),
|
| 81 |
+
('reshape', Reshape(nfs[-1], seq_len // np.prod(pool_szs)))
|
| 82 |
+
]))
|
| 83 |
+
layers = []
|
| 84 |
+
for i in reversed(range_of(kss)):
|
| 85 |
+
layers += [Conv1d(ni=nfs[i+1] if i != (len(nfs)-1) else nfs[-1],
|
| 86 |
+
nf=nfs[i], ks=kss[i]),
|
| 87 |
+
nn.Upsample(scale_factor=pool_szs[i])]
|
| 88 |
+
layers += [Conv1d(ni=nfs[0], nf=c_in, kernel_size=output_fsz)]
|
| 89 |
+
self.upsample = nn.Sequential(*layers)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def forward(self, x):
|
| 93 |
+
x = self.downsample(x)
|
| 94 |
+
x = self.bottleneck(x)
|
| 95 |
+
x = self.upsample(x)
|
| 96 |
+
return x
|
| 97 |
+
|
| 98 |
+
# %% ../nbs/encoder.ipynb 10
|
| 99 |
+
ENCODER_EMBS_MODULE_NAME = {
|
| 100 |
+
InceptionTimePlus: 'backbone', # for mvp based models
|
| 101 |
+
DCAE_torch: 'bottleneck.latent_in'
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
# %% ../nbs/encoder.ipynb 12
|
| 105 |
+
def get_enc_embs(X, enc_learn, module=None, cpu=False, average_seq_dim=True, to_numpy=True):
|
| 106 |
+
"""
|
| 107 |
+
Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
|
| 108 |
+
learner. By default, the embeddings are obtained from the last layer
|
| 109 |
+
before the model head, although any layer can be passed to `model`.
|
| 110 |
+
Input
|
| 111 |
+
- `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
|
| 112 |
+
- `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
|
| 113 |
+
- `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
|
| 114 |
+
"""
|
| 115 |
+
print("--> Check CUDA")
|
| 116 |
+
if cpu:
|
| 117 |
+
print("--> Get enc embs CPU")
|
| 118 |
+
enc_learn.dls.cpu()
|
| 119 |
+
enc_learn.cpu()
|
| 120 |
+
else:
|
| 121 |
+
print("--> Ensure empty cache")
|
| 122 |
+
torch.cuda.empty_cache()
|
| 123 |
+
print("--> Use CUDA |Get enc embs GPU ")
|
| 124 |
+
enc_learn.dls.cuda()
|
| 125 |
+
enc_learn.cuda()
|
| 126 |
+
if torch.cuda.is_available():
|
| 127 |
+
print("CUDA está disponible")
|
| 128 |
+
print("Dispositivo CUDA actual: ", torch.cuda.current_device())
|
| 129 |
+
print("Nombre del dispositivo CUDA actual: ", torch.cuda.get_device_name(torch.cuda.current_device()))
|
| 130 |
+
|
| 131 |
+
else:
|
| 132 |
+
print("CUDA no está disponible ")
|
| 133 |
+
print("Use CUDA -->")
|
| 134 |
+
if enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
|
| 135 |
+
|
| 136 |
+
print("--> Set dataset from X (enc_learn does not contain dls)")
|
| 137 |
+
aux_dl = enc_learn.dls.valid.new_dl(X=X)
|
| 138 |
+
aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
|
| 139 |
+
print("--> Get module")
|
| 140 |
+
module = nested_attr(enc_learn.model,ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) if module is None else module
|
| 141 |
+
|
| 142 |
+
print("--> Get enc embs bs: ", aux_dl.bs)
|
| 143 |
+
embs = [
|
| 144 |
+
get_acts_and_grads(
|
| 145 |
+
model=enc_learn.model,
|
| 146 |
+
modules=module,
|
| 147 |
+
x=xb[0],
|
| 148 |
+
cpu=cpu
|
| 149 |
+
)[0]
|
| 150 |
+
for xb in aux_dl
|
| 151 |
+
]
|
| 152 |
+
print("--> Concat")
|
| 153 |
+
if not cpu:
|
| 154 |
+
total_emb_size = sum([emb.element_size() * emb.nelement() for emb in embs])
|
| 155 |
+
free_memory = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated()
|
| 156 |
+
if (total_emb_size < free_memory):
|
| 157 |
+
print("Fit in GPU")
|
| 158 |
+
embs=[emb.cuda() for emb in embs]
|
| 159 |
+
else:
|
| 160 |
+
print("Dont fit in GPU --> Go to CPU")
|
| 161 |
+
embs=[emb.cpu() for emb in embs]
|
| 162 |
+
embs = to_concat(embs)
|
| 163 |
+
print("--> reduce")
|
| 164 |
+
if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
|
| 165 |
+
print("--> 2 numpy")
|
| 166 |
+
if to_numpy: embs = embs.numpy() if cpu else embs.cpu().numpy()
|
| 167 |
+
return embs
|
| 168 |
+
|
| 169 |
+
# %% ../nbs/encoder.ipynb 13
|
| 170 |
+
def get_enc_embs_set_stride_set_batch_size(
|
| 171 |
+
X, enc_learn, stride, batch_size, module=None, cpu=False, average_seq_dim=True, to_numpy=True,
|
| 172 |
+
print_flag = False, time_flag=False, chunk_size = 0, check_memory_usage = False
|
| 173 |
+
):
|
| 174 |
+
"""
|
| 175 |
+
Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
|
| 176 |
+
learner. By default, the embeddings are obtained from the last layer
|
| 177 |
+
before the model head, although any layer can be passed to `model`.
|
| 178 |
+
Input
|
| 179 |
+
- `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
|
| 180 |
+
- `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
|
| 181 |
+
- `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
|
| 182 |
+
"""
|
| 183 |
+
if time_flag:
|
| 184 |
+
t_start = time.time()
|
| 185 |
+
if print_flag:
|
| 186 |
+
print("--> get_enc_embs_set_stride_set_batch_size")
|
| 187 |
+
if check_memory_usage: gpu_memory_status_()
|
| 188 |
+
#print("get_enc_embs_set_stride_set_batch_size | Check versions")
|
| 189 |
+
#import sys
|
| 190 |
+
#print("get_enc_embs_set_stride_set_batch_size | Check versions | Python version", sys.version)
|
| 191 |
+
#print("get_enc_embs_set_stride_set_batch_size | Check versions | PyTorch version", torch.__version__)
|
| 192 |
+
#print("get_enc_embs_set_stride_set_batch_size | Check versions | CUDA version", torch.version.cuda)
|
| 193 |
+
#print("get_enc_embs_set_stride_set_batch_size | Apply stride & batch size")
|
| 194 |
+
|
| 195 |
+
X = X[::stride]
|
| 196 |
+
enc_learn.dls.bs = batch_size
|
| 197 |
+
|
| 198 |
+
if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Check CUDA | X ~ ", X.shape[0])
|
| 199 |
+
if cpu:
|
| 200 |
+
if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Get enc embs CPU")
|
| 201 |
+
enc_learn.dls.cpu()
|
| 202 |
+
enc_learn.cpu()
|
| 203 |
+
else:
|
| 204 |
+
if torch.cuda.is_available():
|
| 205 |
+
if (print_flag):
|
| 206 |
+
print("get_enc_embs_set_stride_set_batch_size | CUDA device id:", torch.cuda.current_device())
|
| 207 |
+
print("get_enc_embs_set_stride_set_batch_size | CUDA device name: ", torch.cuda.get_device_name(torch.cuda.current_device()))
|
| 208 |
+
print("get_enc_embs_set_stride_set_batch_size | Ensure empty cache & move 2 GPU")
|
| 209 |
+
torch.cuda.empty_cache()
|
| 210 |
+
enc_learn.dls.cuda()
|
| 211 |
+
enc_learn.cuda()
|
| 212 |
+
else:
|
| 213 |
+
if (print_flag): print("get_enc_embs_set_stride_set_batch_size | No cuda available. Set CPU = true")
|
| 214 |
+
cpu = True
|
| 215 |
+
|
| 216 |
+
if enc_learn.dls.bs is None or enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
|
| 217 |
+
|
| 218 |
+
if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Set dataset from X (enc_learn does not contain dls)")
|
| 219 |
+
aux_dl = enc_learn.dls.valid.new_dl(X=X)
|
| 220 |
+
aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
|
| 221 |
+
if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Get module")
|
| 222 |
+
module = nested_attr(enc_learn.model,ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) if module is None else module
|
| 223 |
+
|
| 224 |
+
if (print_flag):
|
| 225 |
+
#print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | module ", module)
|
| 226 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl len", len(aux_dl))
|
| 227 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl.batch_len ", len(next(iter(aux_dl))))
|
| 228 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl.bs ", aux_dl.bs)
|
| 229 |
+
if (not cpu):
|
| 230 |
+
total = torch.cuda.get_device_properties(device).total_memory
|
| 231 |
+
used = torch.cuda.memory_allocated(torch.cuda.current_device())
|
| 232 |
+
reserved = torch.cuda.memory_reserved(torch.cuda.current_device())
|
| 233 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | total_mem ", total)
|
| 234 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | used_mem ", used)
|
| 235 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | reserved_mem ", reserved)
|
| 236 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | available_mem ", total-reserved)
|
| 237 |
+
sys.stdout.flush()
|
| 238 |
+
|
| 239 |
+
if (cpu or ( chunk_size == 0 )):
|
| 240 |
+
embs = [
|
| 241 |
+
get_acts_and_grads(
|
| 242 |
+
model=enc_learn.model,
|
| 243 |
+
modules=module,
|
| 244 |
+
x=xb[0],
|
| 245 |
+
cpu=cpu
|
| 246 |
+
)[0]
|
| 247 |
+
for xb in aux_dl
|
| 248 |
+
]
|
| 249 |
+
if not cpu: embs=[emb.cpu() for emb in embs]
|
| 250 |
+
else:
|
| 251 |
+
embs = []
|
| 252 |
+
total_chunks=max(1,round(len(X)/chunk_size))
|
| 253 |
+
if print_flag: print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl len | " + str(len(X)) + " chunk size: " + str(chunk_size) + " => " + str(total_chunks) + " chunks")
|
| 254 |
+
for i in range(0, total_chunks):
|
| 255 |
+
if print_flag:
|
| 256 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | Chunk [ " + str(i) + "/"+str(total_chunks)+"] => " + str(round(i*100/total_chunks)) + "%")
|
| 257 |
+
sys.stdout.flush()
|
| 258 |
+
chunk = [batch for (n, batch) in enumerate(aux_dl) if (chunk_size*i <= n and chunk_size*(i+1) > n) ]
|
| 259 |
+
chunk_embs = [
|
| 260 |
+
get_acts_and_grads(
|
| 261 |
+
model=enc_learn.model,
|
| 262 |
+
modules=module,
|
| 263 |
+
x=xb[0],
|
| 264 |
+
cpu=cpu
|
| 265 |
+
)[0]
|
| 266 |
+
for xb in chunk
|
| 267 |
+
]
|
| 268 |
+
# Mueve los embeddings del bloque a la CPU
|
| 269 |
+
chunk_embs = [emb.cpu() for emb in chunk_embs]
|
| 270 |
+
embs.extend(chunk_embs)
|
| 271 |
+
torch.cuda.empty_cache()
|
| 272 |
+
if print_flag:
|
| 273 |
+
print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | 100%")
|
| 274 |
+
sys.stdout.flush()
|
| 275 |
+
|
| 276 |
+
if print_flag: print("get_enc_embs_set_stride_set_batch_size | concat embeddings")
|
| 277 |
+
|
| 278 |
+
embs = to_concat(embs)
|
| 279 |
+
|
| 280 |
+
if print_flag: print("get_enc_embs_set_stride_set_batch_size | Reduce")
|
| 281 |
+
|
| 282 |
+
if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
|
| 283 |
+
|
| 284 |
+
if print_flag: print("get_enc_embs_set_stride_set_batch_size | Convert to numpy")
|
| 285 |
+
|
| 286 |
+
if to_numpy:
|
| 287 |
+
if cpu or chunk_size > 0:
|
| 288 |
+
embs = embs.numpy()
|
| 289 |
+
else:
|
| 290 |
+
embs = embs.cpu().numpy()
|
| 291 |
+
torch.cuda.empty_cache()
|
| 292 |
+
if time_flag:
|
| 293 |
+
t = time.time()-t_start
|
| 294 |
+
if print_flag:
|
| 295 |
+
print("get_enc_embs_set_stride_set_batch_size " + str(t) + " seconds -->")
|
| 296 |
+
else:
|
| 297 |
+
print("get_enc_embs_set_stride_set_batch_size " + str(t) + " seconds")
|
| 298 |
+
if check_memory_usage: gpu_memory_status_()
|
| 299 |
+
if print_flag:
|
| 300 |
+
print("get_enc_embs_set_stride_set_batch_size -->")
|
| 301 |
+
return embs
|
dvats/imports.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from IPython.display import Audio, display, HTML, Javascript, clear_output # from tsai
|
| 2 |
+
import importlib
|
| 3 |
+
import numpy as np
|
| 4 |
+
import time
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
##
|
| 8 |
+
# Constants
|
| 9 |
+
##
|
| 10 |
+
WANDB_ARTIFACTS_DIR = 'data/wandb_artifacts'
|
| 11 |
+
|
| 12 |
+
# General purpose functions
|
| 13 |
+
def beep(inp=1, duration=.1, n=1):
|
| 14 |
+
rate = 10000
|
| 15 |
+
mult = 1.6 * inp if inp else .08
|
| 16 |
+
wave = np.sin(mult*np.arange(rate*duration))
|
| 17 |
+
for i in range(n):
|
| 18 |
+
display(Audio(wave, rate=10000, autoplay=True))
|
| 19 |
+
time.sleep(duration / .1)
|
| 20 |
+
|
| 21 |
+
def m_reload(package_name):
|
| 22 |
+
for k,v in sys.modules.items():
|
| 23 |
+
if k.startswith(package_name):
|
| 24 |
+
importlib.reload(v)
|
dvats/load.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/load.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['TSArtifact', 'infer_or_inject_freq']
|
| 5 |
+
|
| 6 |
+
# %% ../nbs/load.ipynb 2
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import numpy as np
|
| 9 |
+
from fastcore.all import *
|
| 10 |
+
import wandb
|
| 11 |
+
from datetime import datetime, timedelta
|
| 12 |
+
from .imports import *
|
| 13 |
+
from .utils import *
|
| 14 |
+
import pickle
|
| 15 |
+
import pyarrow.feather as ft
|
| 16 |
+
|
| 17 |
+
# %% ../nbs/load.ipynb 7
|
| 18 |
+
class TSArtifact(wandb.Artifact):
|
| 19 |
+
|
| 20 |
+
default_storage_path = Path(Path.home()/'data/wandb_artifacts/')
|
| 21 |
+
date_format = '%Y-%m-%d %H:%M:%S' # TODO add milliseconds
|
| 22 |
+
handle_missing_values_techniques = {
|
| 23 |
+
'linear_interpolation': lambda df : df.interpolate(method='linear', limit_direction='both'),
|
| 24 |
+
'overall_mean': lambda df : df.fillna(df.mean()),
|
| 25 |
+
'overall_median': lambda df : df.fillna(df.median()),
|
| 26 |
+
'backward_fill' : lambda df : df.fillna(method='bfill'),
|
| 27 |
+
'forward_fill' : lambda df : df.fillna(method='ffill')
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
"Class that represents a wandb artifact containing time series data. sd stands for start_date \
|
| 31 |
+
and ed for end_date. Both should be pd.Timestamps"
|
| 32 |
+
|
| 33 |
+
@delegates(wandb.Artifact.__init__)
|
| 34 |
+
def __init__(self, name, sd:pd.Timestamp, ed:pd.Timestamp, **kwargs):
|
| 35 |
+
super().__init__(type='dataset', name=name, **kwargs)
|
| 36 |
+
self.sd = sd
|
| 37 |
+
self.ed = ed
|
| 38 |
+
if self.metadata is None:
|
| 39 |
+
self.metadata = dict()
|
| 40 |
+
self.metadata['TS'] = dict(sd = self.sd.strftime(self.date_format),
|
| 41 |
+
ed = self.ed.strftime(self.date_format))
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@classmethod
|
| 45 |
+
def from_daily_csv_files(cls, root_path, fread=pd.read_csv, start_date=None, end_date=None, metadata=None, **kwargs):
|
| 46 |
+
|
| 47 |
+
"Create a wandb artifact of type `dataset`, containing the CSV files from `start_date` \
|
| 48 |
+
to `end_date`. Dates must be pased as `datetime.datetime` objects. If a `wandb_run` is \
|
| 49 |
+
defined, the created artifact will be logged to that run, using the longwall name as \
|
| 50 |
+
artifact name, and the date range as version."
|
| 51 |
+
|
| 52 |
+
return None
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@classmethod
|
| 56 |
+
@delegates(__init__)
|
| 57 |
+
def from_df(cls, df:pd.DataFrame, name:str, path:str=None, sd:pd.Timestamp=None, ed:pd.Timestamp=None,
|
| 58 |
+
normalize:bool=False, missing_values_technique:str=None, resampling_freq:str=None, **kwargs):
|
| 59 |
+
|
| 60 |
+
"""
|
| 61 |
+
Create a TSArtifact of type `dataset`, using the DataFrame `df` samples from \
|
| 62 |
+
`sd` (start date) to `ed` (end date). Dates must be passed as `datetime.datetime` \
|
| 63 |
+
objects. The transformed DataFrame is stored as a pickle file in the path `path` \
|
| 64 |
+
and its reference is added to the artifact entries. Additionally, the dataset can \
|
| 65 |
+
be normalized (see `normalize` argument) or transformed using missing values \
|
| 66 |
+
handling techniques (see `missing_values_technique` argument) or resampling (see \
|
| 67 |
+
`resampling_freq` argument).
|
| 68 |
+
|
| 69 |
+
Arguments:
|
| 70 |
+
df: (DataFrame) The dataframe you want to convert into an artifact.
|
| 71 |
+
name: (str) The artifact name.
|
| 72 |
+
path: (str, optional) The path where the file, containing the new transformed \
|
| 73 |
+
dataframe, is saved. Default None.
|
| 74 |
+
sd: (sd, optional) Start date. By default, the first index of `df` is taken.
|
| 75 |
+
ed: (ed, optional) End date. By default, the last index of `df` is taken.
|
| 76 |
+
normalize: (bool, optional) If the dataset values should be normalized. Default\
|
| 77 |
+
False.
|
| 78 |
+
missing_values_technique: (str, optional) The technique used to handle missing \
|
| 79 |
+
values. Options: "linear_iterpolation", "overall_mean", "overall_median" or \
|
| 80 |
+
None. Default None.
|
| 81 |
+
resampling_freq: (str, optional) The offset string or object representing \
|
| 82 |
+
frequency conversion for time series resampling. Default None.
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
TSArtifact object.
|
| 86 |
+
"""
|
| 87 |
+
sd = df.index[0] if sd is None else sd
|
| 88 |
+
ed = df.index[-1] if ed is None else ed
|
| 89 |
+
obj = cls(name, sd=sd, ed=ed, **kwargs)
|
| 90 |
+
df = df.query('@obj.sd <= index <= @obj.ed')
|
| 91 |
+
obj.metadata['TS']['created'] = 'from-df'
|
| 92 |
+
obj.metadata['TS']['n_vars'] = df.columns.__len__()
|
| 93 |
+
|
| 94 |
+
# Handle Missing Values
|
| 95 |
+
df = obj.handle_missing_values_techniques[missing_values_technique](df) if missing_values_technique is not None else df
|
| 96 |
+
obj.metadata['TS']['handle_missing_values_technique'] = missing_values_technique.__str__()
|
| 97 |
+
obj.metadata['TS']['has_missing_values'] = np.any(df.isna().values).__str__()
|
| 98 |
+
|
| 99 |
+
# Indexing and Resampling
|
| 100 |
+
if resampling_freq: df = df.resample(resampling_freq).mean()
|
| 101 |
+
obj.metadata['TS']['n_samples'] = len(df)
|
| 102 |
+
obj.metadata['TS']['freq'] = str(df.index.freq)
|
| 103 |
+
|
| 104 |
+
# Time Series Variables
|
| 105 |
+
obj.metadata['TS']['vars'] = list(df.columns)
|
| 106 |
+
|
| 107 |
+
# Normalization - Save the previous means and stds
|
| 108 |
+
if normalize:
|
| 109 |
+
obj.metadata['TS']['normalization'] = dict(means = df.describe().loc['mean'].to_dict(),
|
| 110 |
+
stds = df.describe().loc['std'].to_dict())
|
| 111 |
+
df = normalize_columns(df)
|
| 112 |
+
|
| 113 |
+
# Hash and save
|
| 114 |
+
hash_code = str(pd.util.hash_pandas_object(df).sum()) # str(hash(df.values.tobytes()))
|
| 115 |
+
path = obj.default_storage_path/f'{hash_code}' if path is None else Path(path)/f'{hash_code}'
|
| 116 |
+
print("About to write df to ", path)
|
| 117 |
+
ft.write_feather(df, path, compression = 'lz4')
|
| 118 |
+
#feather.write_dataframe
|
| 119 |
+
obj.metadata['TS']['hash'] = hash_code
|
| 120 |
+
obj.add_file(str(path))
|
| 121 |
+
|
| 122 |
+
return obj
|
| 123 |
+
|
| 124 |
+
# %% ../nbs/load.ipynb 14
|
| 125 |
+
@patch
|
| 126 |
+
def to_df(self:wandb.apis.public.Artifact):
|
| 127 |
+
"Download the files of a saved wandb artifact and process them as a single dataframe. The artifact must \
|
| 128 |
+
come from a call to `run.use_artifact` with a proper wandb run."
|
| 129 |
+
# The way we have to ensure that the argument comes from a TS arfitact is the metadata
|
| 130 |
+
if self.metadata.get('TS') is None:
|
| 131 |
+
print(f'ERROR:{self} does not come from a logged TSArtifact')
|
| 132 |
+
return None
|
| 133 |
+
dir = Path(self.download())
|
| 134 |
+
if self.metadata['TS']['created'] == 'from-df':
|
| 135 |
+
# Call read_pickle with the single file from dir
|
| 136 |
+
#return pd.read_pickle(dir.ls()[0])
|
| 137 |
+
return ft.read_feather(dir.ls()[0])
|
| 138 |
+
else:
|
| 139 |
+
print("ERROR: Only from_df method is allowed yet")
|
| 140 |
+
|
| 141 |
+
# %% ../nbs/load.ipynb 16
|
| 142 |
+
@patch
|
| 143 |
+
def to_tsartifact(self:wandb.apis.public.Artifact):
|
| 144 |
+
"Cast an artifact as a TS artifact. The artifact must have been created from one of the \
|
| 145 |
+
class creation methods of the class `TSArtifact`. This is useful to go back to a TSArtifact \
|
| 146 |
+
after downloading an artifact through the wand API"
|
| 147 |
+
return TSArtifact(name=self.digest, #TODO change this
|
| 148 |
+
sd=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
|
| 149 |
+
ed=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
|
| 150 |
+
description=self.description,
|
| 151 |
+
metadata=self.metadata)
|
| 152 |
+
|
| 153 |
+
# %% ../nbs/load.ipynb 18
|
| 154 |
+
@delegates(pd.to_datetime)
|
| 155 |
+
def infer_or_inject_freq(df, injected_freq='1s', start_date=None, **kwargs):
|
| 156 |
+
"""
|
| 157 |
+
Infer index frequency. If there's not a proper time index, create fake timestamps,
|
| 158 |
+
keeping the desired `injected_freq`. If that is None, set a default one of 1 second.
|
| 159 |
+
start_date: the first date of the index (int or string).
|
| 160 |
+
"""
|
| 161 |
+
inferred_freq = pd.infer_freq(df.index)
|
| 162 |
+
if inferred_freq == 'N':
|
| 163 |
+
timedelta = pd.to_timedelta(injected_freq)
|
| 164 |
+
df.index = pd.to_datetime(ifnone(start_date, 0), **kwargs) + timedelta*df.index
|
| 165 |
+
df.index.freq = pd.infer_freq(df.index)
|
| 166 |
+
else:
|
| 167 |
+
df.index.freq = inferred_freq
|
| 168 |
+
return df
|
dvats/utils.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
|
| 5 |
+
'get_wandb_artifacts', 'get_pickle_artifact', 'exec_with_feather', 'py_function',
|
| 6 |
+
'exec_with_feather_k_output', 'exec_with_and_feather_k_output', 'learner_module_leaves',
|
| 7 |
+
'learner_module_leaves_subtables']
|
| 8 |
+
|
| 9 |
+
# %% ../nbs/utils.ipynb 3
|
| 10 |
+
from .imports import *
|
| 11 |
+
from fastcore.all import *
|
| 12 |
+
import wandb
|
| 13 |
+
import pickle
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import numpy as np
|
| 16 |
+
#import tensorflow as tf
|
| 17 |
+
import torch.nn as nn
|
| 18 |
+
from fastai.basics import *
|
| 19 |
+
|
| 20 |
+
# %% ../nbs/utils.ipynb 5
|
| 21 |
+
def generate_TS_df(rows, cols):
|
| 22 |
+
"Generates a dataframe containing a multivariate time series, where each column \
|
| 23 |
+
represents a variable and each row a time point (sample). The timestamp is in the \
|
| 24 |
+
index of the dataframe, and it is created with a even space of 1 second between samples"
|
| 25 |
+
index = np.arange(pd.Timestamp.now(),
|
| 26 |
+
pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
|
| 27 |
+
pd.Timedelta(1, 'seconds'))
|
| 28 |
+
data = np.random.randn(len(index), cols)
|
| 29 |
+
return pd.DataFrame(data, index=index)
|
| 30 |
+
|
| 31 |
+
# %% ../nbs/utils.ipynb 10
|
| 32 |
+
def normalize_columns(df:pd.DataFrame):
|
| 33 |
+
"Normalize columns from `df` to have 0 mean and 1 standard deviation"
|
| 34 |
+
mean = df.mean()
|
| 35 |
+
std = df.std() + 1e-7
|
| 36 |
+
return (df-mean)/std
|
| 37 |
+
|
| 38 |
+
# %% ../nbs/utils.ipynb 16
|
| 39 |
+
def remove_constant_columns(df:pd.DataFrame):
|
| 40 |
+
return df.loc[:, (df != df.iloc[0]).any()]
|
| 41 |
+
|
| 42 |
+
# %% ../nbs/utils.ipynb 21
|
| 43 |
+
class ReferenceArtifact(wandb.Artifact):
|
| 44 |
+
default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
|
| 45 |
+
"This class is meant to create an artifact with a single reference to an object \
|
| 46 |
+
passed as argument in the contructor. The object will be pickled, hashed and stored \
|
| 47 |
+
in a specified folder."
|
| 48 |
+
@delegates(wandb.Artifact.__init__)
|
| 49 |
+
def __init__(self, obj, name, type='object', folder=None, **kwargs):
|
| 50 |
+
super().__init__(type=type, name=name, **kwargs)
|
| 51 |
+
# pickle dumps the object and then hash it
|
| 52 |
+
hash_code = str(hash(pickle.dumps(obj)))
|
| 53 |
+
folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
|
| 54 |
+
with open(f'{folder}/{hash_code}', 'wb') as f:
|
| 55 |
+
pickle.dump(obj, f)
|
| 56 |
+
self.add_reference(f'file://{folder}/{hash_code}')
|
| 57 |
+
if self.metadata is None:
|
| 58 |
+
self.metadata = dict()
|
| 59 |
+
self.metadata['ref'] = dict()
|
| 60 |
+
self.metadata['ref']['hash'] = hash_code
|
| 61 |
+
self.metadata['ref']['type'] = str(obj.__class__)
|
| 62 |
+
|
| 63 |
+
# %% ../nbs/utils.ipynb 24
|
| 64 |
+
@patch
|
| 65 |
+
def to_obj(self:wandb.apis.public.Artifact):
|
| 66 |
+
"""Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
|
| 67 |
+
come from a call to `run.use_artifact` with a proper wandb run."""
|
| 68 |
+
if self.metadata.get('ref') is None:
|
| 69 |
+
print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
|
| 70 |
+
return None
|
| 71 |
+
original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
|
| 72 |
+
path = original_path if original_path.exists() else Path(self.download()).ls()[0]
|
| 73 |
+
with open(path, 'rb') as f:
|
| 74 |
+
obj = pickle.load(f)
|
| 75 |
+
return obj
|
| 76 |
+
|
| 77 |
+
# %% ../nbs/utils.ipynb 33
|
| 78 |
+
import torch.nn as nn
|
| 79 |
+
class PrintLayer(nn.Module):
|
| 80 |
+
def __init__(self):
|
| 81 |
+
super(PrintLayer, self).__init__()
|
| 82 |
+
|
| 83 |
+
def forward(self, x):
|
| 84 |
+
# Do your print / debug stuff here
|
| 85 |
+
print(x.shape)
|
| 86 |
+
return x
|
| 87 |
+
|
| 88 |
+
# %% ../nbs/utils.ipynb 34
|
| 89 |
+
@patch
|
| 90 |
+
def export_and_get(self:Learner, keep_exported_file=False):
|
| 91 |
+
"""
|
| 92 |
+
Export the learner into an auxiliary file, load it and return it back.
|
| 93 |
+
"""
|
| 94 |
+
aux_path = Path('aux.pkl')
|
| 95 |
+
self.export(fname='aux.pkl')
|
| 96 |
+
aux_learn = load_learner('aux.pkl')
|
| 97 |
+
if not keep_exported_file: aux_path.unlink()
|
| 98 |
+
return aux_learn
|
| 99 |
+
|
| 100 |
+
# %% ../nbs/utils.ipynb 35
|
| 101 |
+
def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
|
| 102 |
+
"""
|
| 103 |
+
Get the artifacts logged in a wandb project.
|
| 104 |
+
Input:
|
| 105 |
+
- `project_path` (str): entity/project_name
|
| 106 |
+
- `type` (str): whether to return only one type of artifacts
|
| 107 |
+
- `name` (str): Leave none to have all artifact names
|
| 108 |
+
- `last_version`: whether to return only the last version of each artifact or not
|
| 109 |
+
|
| 110 |
+
Output: List of artifacts
|
| 111 |
+
"""
|
| 112 |
+
public_api = wandb.Api()
|
| 113 |
+
if type is not None:
|
| 114 |
+
types = [public_api.artifact_type(type, project_path)]
|
| 115 |
+
else:
|
| 116 |
+
types = public_api.artifact_types(project_path)
|
| 117 |
+
|
| 118 |
+
res = L()
|
| 119 |
+
for kind in types:
|
| 120 |
+
for collection in kind.collections():
|
| 121 |
+
if name is None or name == collection.name:
|
| 122 |
+
versions = public_api.artifact_versions(
|
| 123 |
+
kind.type,
|
| 124 |
+
"/".join([kind.entity, kind.project, collection.name]),
|
| 125 |
+
per_page=1,
|
| 126 |
+
)
|
| 127 |
+
if last_version: res += next(versions)
|
| 128 |
+
else: res += L(versions)
|
| 129 |
+
return list(res)
|
| 130 |
+
|
| 131 |
+
# %% ../nbs/utils.ipynb 39
|
| 132 |
+
def get_pickle_artifact(filename):
|
| 133 |
+
|
| 134 |
+
with open(filename, "rb") as f:
|
| 135 |
+
df = pickle.load(f)
|
| 136 |
+
|
| 137 |
+
return df
|
| 138 |
+
|
| 139 |
+
# %% ../nbs/utils.ipynb 41
|
| 140 |
+
import pyarrow.feather as ft
|
| 141 |
+
import pickle
|
| 142 |
+
|
| 143 |
+
# %% ../nbs/utils.ipynb 42
|
| 144 |
+
def exec_with_feather(function, path = None, print_flag = False, *args, **kwargs):
|
| 145 |
+
result = None
|
| 146 |
+
if not (path is none):
|
| 147 |
+
if print_flag: print("--> Exec with feather | reading input from ", path)
|
| 148 |
+
input = ft.read_feather(path)
|
| 149 |
+
if print_flag: print("--> Exec with feather | Apply function ", path)
|
| 150 |
+
result = function(input, *args, **kwargs)
|
| 151 |
+
if print_flag: print("Exec with feather --> ", path)
|
| 152 |
+
return result
|
| 153 |
+
|
| 154 |
+
# %% ../nbs/utils.ipynb 43
|
| 155 |
+
def py_function(module_name, function_name, print_flag = False):
|
| 156 |
+
try:
|
| 157 |
+
function = getattr(__import__('__main__'), function_name)
|
| 158 |
+
except:
|
| 159 |
+
module = __import__(module_name, fromlist=[''])
|
| 160 |
+
function = getattr(module, function_name)
|
| 161 |
+
print("py function: ", function_name, ": ", function)
|
| 162 |
+
return function
|
| 163 |
+
|
| 164 |
+
# %% ../nbs/utils.ipynb 46
|
| 165 |
+
import time
|
| 166 |
+
def exec_with_feather_k_output(function_name, module_name = "main", path = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
|
| 167 |
+
result = None
|
| 168 |
+
function = py_function(module_name, function_name, print_flag)
|
| 169 |
+
if time_flag: t_start = time.time()
|
| 170 |
+
if not (path is None):
|
| 171 |
+
if print_flag: print("--> Exec with feather | reading input from ", path)
|
| 172 |
+
input = ft.read_feather(path)
|
| 173 |
+
if print_flag: print("--> Exec with feather | Apply function ", path)
|
| 174 |
+
result = function(input, *args, **kwargs)[k_output]
|
| 175 |
+
if time_flag:
|
| 176 |
+
t_end = time.time()
|
| 177 |
+
print("Exec with feather | time: ", t_end-t_start)
|
| 178 |
+
if print_flag: print("Exec with feather --> ", path)
|
| 179 |
+
return result
|
| 180 |
+
|
| 181 |
+
# %% ../nbs/utils.ipynb 48
|
| 182 |
+
def exec_with_and_feather_k_output(function_name, module_name = "main", path_input = None, path_output = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
|
| 183 |
+
result = None
|
| 184 |
+
function = py_function(module_name, function_name, print_flag)
|
| 185 |
+
if time_flag: t_start = time.time()
|
| 186 |
+
if not (path_input is None):
|
| 187 |
+
if print_flag: print("--> Exec with feather | reading input from ", path_input)
|
| 188 |
+
input = ft.read_feather(path_input)
|
| 189 |
+
if print_flag:
|
| 190 |
+
print("--> Exec with feather | Apply function ", function_name, "input type: ", type(input))
|
| 191 |
+
|
| 192 |
+
result = function(input, *args, **kwargs)[k_output]
|
| 193 |
+
ft.write_feather(df, path, compression = 'lz4')
|
| 194 |
+
if time_flag:
|
| 195 |
+
t_end = time.time()
|
| 196 |
+
print("Exec with feather | time: ", t_end-t_start)
|
| 197 |
+
if print_flag: print("Exec with feather --> ", path_output)
|
| 198 |
+
return path_output
|
| 199 |
+
|
| 200 |
+
# %% ../nbs/utils.ipynb 52
|
| 201 |
+
def learner_module_leaves(learner):
|
| 202 |
+
modules = list(learner.modules())[0] # Obtener el módulo raíz
|
| 203 |
+
rows = []
|
| 204 |
+
|
| 205 |
+
def find_leave_modules(module, path=[]):
|
| 206 |
+
for name, sub_module in module.named_children():
|
| 207 |
+
current_path = path + [f"{type(sub_module).__name__}"]
|
| 208 |
+
if not list(sub_module.children()):
|
| 209 |
+
leave_name = ' -> '.join(current_path)
|
| 210 |
+
leave_params = str(sub_module).strip()
|
| 211 |
+
rows.append([
|
| 212 |
+
leave_name,
|
| 213 |
+
f"{type(sub_module).__name__}",
|
| 214 |
+
name,
|
| 215 |
+
leave_params
|
| 216 |
+
]
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
find_leave_modules(sub_module, current_path)
|
| 220 |
+
|
| 221 |
+
find_leave_modules(modules)
|
| 222 |
+
|
| 223 |
+
df = pd.DataFrame(rows, columns=['Path', 'Module_type', 'Module_name', 'Module'])
|
| 224 |
+
return df
|
| 225 |
+
|
| 226 |
+
# %% ../nbs/utils.ipynb 56
|
| 227 |
+
def learner_module_leaves_subtables(learner, print_flag = False):
|
| 228 |
+
df = pd.DataFrame(columns=['Path', 'Module_type', 'Module_name', 'Module'])
|
| 229 |
+
md = learner_module_leaves(learner).drop(
|
| 230 |
+
'Path', axis = 1
|
| 231 |
+
).sort_values(
|
| 232 |
+
by = 'Module_type'
|
| 233 |
+
)
|
| 234 |
+
if print_flag: print("The layers are of this types:")
|
| 235 |
+
|
| 236 |
+
md_types = pd.DataFrame(md['Module_type'].drop_duplicates())
|
| 237 |
+
if print_flag:
|
| 238 |
+
display(md_types)
|
| 239 |
+
print("And they are called with this parameters:")
|
| 240 |
+
|
| 241 |
+
md_modules = pd.DataFrame(md['Module'].drop_duplicates())
|
| 242 |
+
|
| 243 |
+
if print_flag: display(md_modules)
|
| 244 |
+
|
| 245 |
+
return md_types, md_modules
|
dvats/visualization.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/visualization.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['plot_TS', 'plot_validation_ts_ae', 'plot_mask']
|
| 5 |
+
|
| 6 |
+
# %% ../nbs/visualization.ipynb 3
|
| 7 |
+
from fastcore.all import *
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
import torch
|
| 12 |
+
|
| 13 |
+
# %% ../nbs/visualization.ipynb 6
|
| 14 |
+
@delegates(pd.DataFrame.plot)
|
| 15 |
+
def plot_TS(df:pd.core.frame.DataFrame, **kwargs):
|
| 16 |
+
df.plot(subplots=True, **kwargs)
|
| 17 |
+
plt.show()
|
| 18 |
+
|
| 19 |
+
# %% ../nbs/visualization.ipynb 8
|
| 20 |
+
def plot_validation_ts_ae(prediction:np.array, original:np.array, title_str = "Validation plot", fig_size = (15,15), anchor = (-0.01, 0.89), window_num = 0, return_fig=True, title_pos = 0.9):
|
| 21 |
+
# Create the figure
|
| 22 |
+
fig = plt.figure(figsize=(fig_size[0],fig_size[1]))
|
| 23 |
+
# Create the subplot axes
|
| 24 |
+
axes = fig.subplots(nrows=original.shape[2], ncols=1)
|
| 25 |
+
# We iterate over the sensor data and plot both the original and the prediction
|
| 26 |
+
for i,ax in zip(range(original.shape[2]),fig.axes):
|
| 27 |
+
ax.plot(original[window_num,:,i], label='Original Data')
|
| 28 |
+
ax.plot(prediction[window_num,:,i], label='Prediction')
|
| 29 |
+
# Handle the legend configuration and position
|
| 30 |
+
lines, labels = fig.axes[-1].get_legend_handles_labels()
|
| 31 |
+
fig.legend(lines, labels,loc='upper left', ncol=2)
|
| 32 |
+
# Write the plot title (and position it closer to the top of the graph)
|
| 33 |
+
fig.suptitle(title_str, y = title_pos)
|
| 34 |
+
# Tight results:
|
| 35 |
+
fig.tight_layout()
|
| 36 |
+
# Returns
|
| 37 |
+
if return_fig:
|
| 38 |
+
return fig
|
| 39 |
+
fig
|
| 40 |
+
return None
|
| 41 |
+
|
| 42 |
+
# %% ../nbs/visualization.ipynb 12
|
| 43 |
+
def plot_mask(mask, i=0, fig_size=(10,10), title_str="Mask", return_fig=False):
|
| 44 |
+
"""
|
| 45 |
+
Plot the mask passed as argument. The mask is a 3D boolean tensor. The first
|
| 46 |
+
dimension is the window number (or item index), the second is the variable, and the third is the time step.
|
| 47 |
+
Input:
|
| 48 |
+
mask: 3D boolean tensor
|
| 49 |
+
i: index of the window to plot
|
| 50 |
+
fig_size: size of the figure
|
| 51 |
+
title_str: title of the plot
|
| 52 |
+
return_fig: if True, returns the figure
|
| 53 |
+
Output:
|
| 54 |
+
if return_fig is True, returns the figure, otherwise, it does not return anything
|
| 55 |
+
"""
|
| 56 |
+
plt.figure(figsize=fig_size)
|
| 57 |
+
plt.pcolormesh(mask[i], cmap='cool')
|
| 58 |
+
plt.title(f'{title_str} {i}, mean: {mask[0].float().mean().item():.3f}')
|
| 59 |
+
if return_fig:
|
| 60 |
+
return plt.gcf()
|
| 61 |
+
else:
|
| 62 |
+
plt.show()
|
| 63 |
+
return None
|
dvats/xai.py
ADDED
|
@@ -0,0 +1,964 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/xai.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['get_embeddings', 'get_dataset', 'umap_parameters', 'get_prjs', 'plot_projections', 'plot_projections_clusters',
|
| 5 |
+
'calculate_cluster_stats', 'anomaly_score', 'detector', 'plot_anomaly_scores_distribution',
|
| 6 |
+
'plot_clusters_with_anomalies', 'update_plot', 'plot_clusters_with_anomalies_interactive_plot',
|
| 7 |
+
'get_df_selected', 'shift_datetime', 'get_dateformat', 'get_anomalies', 'get_anomaly_styles',
|
| 8 |
+
'InteractiveAnomalyPlot', 'plot_save', 'plot_initial_config', 'merge_overlapping_windows',
|
| 9 |
+
'InteractiveTSPlot', 'add_selected_features', 'add_windows', 'setup_style', 'toggle_trace',
|
| 10 |
+
'set_features_buttons', 'move_left', 'move_right', 'move_down', 'move_up', 'delta_x_bigger',
|
| 11 |
+
'delta_y_bigger', 'delta_x_lower', 'delta_y_lower', 'add_movement_buttons', 'setup_boxes', 'initial_plot',
|
| 12 |
+
'show']
|
| 13 |
+
|
| 14 |
+
# %% ../nbs/xai.ipynb 1
|
| 15 |
+
#Weight & Biases
|
| 16 |
+
import wandb
|
| 17 |
+
|
| 18 |
+
#Yaml
|
| 19 |
+
from yaml import load, FullLoader
|
| 20 |
+
|
| 21 |
+
#Embeddings
|
| 22 |
+
from .all import *
|
| 23 |
+
from tsai.data.preparation import prepare_forecasting_data
|
| 24 |
+
from tsai.data.validation import get_forecasting_splits
|
| 25 |
+
from fastcore.all import *
|
| 26 |
+
|
| 27 |
+
#Dimensionality reduction
|
| 28 |
+
from tsai.imports import *
|
| 29 |
+
|
| 30 |
+
#Clustering
|
| 31 |
+
import hdbscan
|
| 32 |
+
import time
|
| 33 |
+
from .dr import get_PCA_prjs, get_UMAP_prjs, get_TSNE_prjs
|
| 34 |
+
|
| 35 |
+
import seaborn as sns
|
| 36 |
+
import matplotlib.pyplot as plt
|
| 37 |
+
import pandas as pd
|
| 38 |
+
import ipywidgets as widgets
|
| 39 |
+
from IPython.display import display
|
| 40 |
+
from functools import partial
|
| 41 |
+
|
| 42 |
+
from IPython.display import display, clear_output, HTML as IPHTML
|
| 43 |
+
from ipywidgets import Button, Output, VBox, HBox, HTML, Layout, FloatSlider
|
| 44 |
+
|
| 45 |
+
import plotly.graph_objs as go
|
| 46 |
+
import plotly.offline as py
|
| 47 |
+
import plotly.io as pio
|
| 48 |
+
#! pip install kaleido
|
| 49 |
+
import kaleido
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# %% ../nbs/xai.ipynb 4
|
| 53 |
+
def get_embeddings(config_lrp, run_lrp, api, print_flag = False):
|
| 54 |
+
artifacts_gettr = run_lrp.use_artifact if config_lrp.use_wandb else api.artifact
|
| 55 |
+
emb_artifact = artifacts_gettr(config_lrp.emb_artifact, type='embeddings')
|
| 56 |
+
if print_flag: print(emb_artifact.name)
|
| 57 |
+
emb_config = emb_artifact.logged_by().config
|
| 58 |
+
return emb_artifact.to_obj(), emb_artifact, emb_config
|
| 59 |
+
|
| 60 |
+
# %% ../nbs/xai.ipynb 5
|
| 61 |
+
def get_dataset(
|
| 62 |
+
config_lrp,
|
| 63 |
+
config_emb,
|
| 64 |
+
config_dr,
|
| 65 |
+
run_lrp,
|
| 66 |
+
api,
|
| 67 |
+
print_flag = False
|
| 68 |
+
):
|
| 69 |
+
# Botch to use artifacts offline
|
| 70 |
+
artifacts_gettr = run_lrp.use_artifact if config_lrp.use_wandb else api.artifact
|
| 71 |
+
enc_artifact = artifacts_gettr(config_emb['enc_artifact'], type='learner')
|
| 72 |
+
if print_flag: print (enc_artifact.name)
|
| 73 |
+
## TODO: This only works when you run it two timeS! WTF?
|
| 74 |
+
try:
|
| 75 |
+
enc_learner = enc_artifact.to_obj()
|
| 76 |
+
except:
|
| 77 |
+
enc_learner = enc_artifact.to_obj()
|
| 78 |
+
|
| 79 |
+
## Restore artifact
|
| 80 |
+
enc_logger = enc_artifact.logged_by()
|
| 81 |
+
enc_artifact_train = artifacts_gettr(enc_logger.config['train_artifact'], type='dataset')
|
| 82 |
+
#cfg_.show_attrdict(enc_logger.config)
|
| 83 |
+
if enc_logger.config['valid_artifact'] is not None:
|
| 84 |
+
enc_artifact_valid = artifacts_gettr(enc_logger.config['valid_artifact'], type='dataset')
|
| 85 |
+
if print_flag: print("enc_artifact_valid:", enc_artifact_valid.name)
|
| 86 |
+
if print_flag: print("enc_artifact_train: ", enc_artifact_train.name)
|
| 87 |
+
|
| 88 |
+
if config_dr['dr_artifact'] is not None:
|
| 89 |
+
print("Is not none")
|
| 90 |
+
dr_artifact = artifacts_gettr(config_dr['enc_artifact'])
|
| 91 |
+
else:
|
| 92 |
+
dr_artifact = enc_artifact_train
|
| 93 |
+
if print_flag: print("DR artifact train: ", dr_artifact.name)
|
| 94 |
+
if print_flag: print("--> DR artifact name", dr_artifact.name)
|
| 95 |
+
dr_artifact
|
| 96 |
+
df = dr_artifact.to_df()
|
| 97 |
+
if print_flag: print("--> DR After to df", df.shape)
|
| 98 |
+
if print_flag: display(df.head())
|
| 99 |
+
return df, dr_artifact, enc_artifact, enc_learner
|
| 100 |
+
|
| 101 |
+
# %% ../nbs/xai.ipynb 6
|
| 102 |
+
def umap_parameters(config_dr, config):
|
| 103 |
+
umap_params_cpu = {
|
| 104 |
+
'n_neighbors' : config_dr.n_neighbors,
|
| 105 |
+
'min_dist' : config_dr.min_dist,
|
| 106 |
+
'random_state': np.uint64(822569775),
|
| 107 |
+
'metric': config_dr.metric,
|
| 108 |
+
#'a': 1.5769434601962196,
|
| 109 |
+
#'b': 0.8950608779914887,
|
| 110 |
+
#'metric_kwds': {'p': 2}, #No debería ser necesario, just in case
|
| 111 |
+
#'output_metric': 'euclidean',
|
| 112 |
+
'verbose': 4,
|
| 113 |
+
#'n_epochs': 200
|
| 114 |
+
}
|
| 115 |
+
umap_params_gpu = {
|
| 116 |
+
'n_neighbors' : config_dr.n_neighbors,
|
| 117 |
+
'min_dist' : config_dr.min_dist,
|
| 118 |
+
'random_state': np.uint64(1234),
|
| 119 |
+
'metric': config_dr.metric,
|
| 120 |
+
'a': 1.5769434601962196,
|
| 121 |
+
'b': 0.8950608779914887,
|
| 122 |
+
'target_metric': 'euclidean',
|
| 123 |
+
'target_n_neighbors': config_dr.n_neighbors,
|
| 124 |
+
'verbose': 4, #6, #CUML_LEVEL_TRACE
|
| 125 |
+
'n_epochs': 200*3*2,
|
| 126 |
+
'init': 'random',
|
| 127 |
+
'hash_input': True
|
| 128 |
+
}
|
| 129 |
+
if config_dr.cpu_flag:
|
| 130 |
+
umap_params = umap_params_cpu
|
| 131 |
+
else:
|
| 132 |
+
umap_params = umap_params_gpu
|
| 133 |
+
return umap_params
|
| 134 |
+
|
| 135 |
+
# %% ../nbs/xai.ipynb 7
|
| 136 |
+
def get_prjs(embs_no_nan, config_dr, config, print_flag = False):
|
| 137 |
+
umap_params = umap_parameters(config_dr, config)
|
| 138 |
+
prjs_pca = get_PCA_prjs(
|
| 139 |
+
X = embs_no_nan,
|
| 140 |
+
cpu = False,
|
| 141 |
+
print_flag = print_flag,
|
| 142 |
+
**umap_params
|
| 143 |
+
)
|
| 144 |
+
if print_flag:
|
| 145 |
+
print(prjs_pca.shape)
|
| 146 |
+
prjs_umap = get_UMAP_prjs(
|
| 147 |
+
input_data = prjs_pca,
|
| 148 |
+
cpu = config_dr.cpu_flag, #config_dr.cpu,
|
| 149 |
+
print_flag = print_flag,
|
| 150 |
+
**umap_params
|
| 151 |
+
)
|
| 152 |
+
if print_flag: prjs_umap.shape
|
| 153 |
+
return prjs_umap
|
| 154 |
+
|
| 155 |
+
# %% ../nbs/xai.ipynb 9
|
| 156 |
+
def plot_projections(prjs, umap_params, fig_size = (25,25)):
|
| 157 |
+
"Plot 2D projections thorugh a connected scatter plot"
|
| 158 |
+
df_prjs = pd.DataFrame(prjs, columns = ['x1', 'x2'])
|
| 159 |
+
fig = plt.figure(figsize=(fig_size[0],fig_size[1]))
|
| 160 |
+
ax = fig.add_subplot(111)
|
| 161 |
+
ax.scatter(df_prjs['x1'], df_prjs['x2'], marker='o', facecolors='none', edgecolors='b', alpha=0.1)
|
| 162 |
+
ax.plot(df_prjs['x1'], df_prjs['x2'], alpha=0.5, picker=1)
|
| 163 |
+
plt.title('DR params - n_neighbors:{:d} min_dist:{:f}'.format(
|
| 164 |
+
umap_params['n_neighbors'],umap_params['min_dist']))
|
| 165 |
+
return ax
|
| 166 |
+
|
| 167 |
+
# %% ../nbs/xai.ipynb 10
|
| 168 |
+
def plot_projections_clusters(prjs, clusters_labels, umap_params, fig_size = (25,25)):
|
| 169 |
+
"Plot 2D projections thorugh a connected scatter plot"
|
| 170 |
+
df_prjs = pd.DataFrame(prjs, columns = ['x1', 'x2'])
|
| 171 |
+
df_prjs['cluster'] = clusters_labels
|
| 172 |
+
|
| 173 |
+
fig = plt.figure(figsize=(fig_size[0],fig_size[1]))
|
| 174 |
+
ax = fig.add_subplot(111)
|
| 175 |
+
|
| 176 |
+
# Create a scatter plot for each cluster with different colors
|
| 177 |
+
unique_labels = df_prjs['cluster'].unique()
|
| 178 |
+
print(unique_labels)
|
| 179 |
+
for label in unique_labels:
|
| 180 |
+
cluster_data = df_prjs[df_prjs['cluster'] == label]
|
| 181 |
+
ax.scatter(cluster_data['x1'], cluster_data['x2'], label=f'Cluster {label}')
|
| 182 |
+
#ax.scatter(df_prjs['x1'], df_prjs['x2'], marker='o', facecolors='none', edgecolors='b', alpha=0.1)
|
| 183 |
+
|
| 184 |
+
#ax.plot(df_prjs['x1'], df_prjs['x2'], alpha=0.5, picker=1)
|
| 185 |
+
plt.title('DR params - n_neighbors:{:d} min_dist:{:f}'.format(
|
| 186 |
+
umap_params['n_neighbors'],umap_params['min_dist']))
|
| 187 |
+
return ax
|
| 188 |
+
|
| 189 |
+
# %% ../nbs/xai.ipynb 11
|
| 190 |
+
def calculate_cluster_stats(data, labels):
|
| 191 |
+
"""Computes the media and the standard deviation for every cluster."""
|
| 192 |
+
cluster_stats = {}
|
| 193 |
+
for label in np.unique(labels):
|
| 194 |
+
#members = data[labels == label]
|
| 195 |
+
members = data
|
| 196 |
+
mean = np.mean(members, axis = 0)
|
| 197 |
+
std = np.std(members, axis = 0)
|
| 198 |
+
cluster_stats[label] = (mean, std)
|
| 199 |
+
return cluster_stats
|
| 200 |
+
|
| 201 |
+
# %% ../nbs/xai.ipynb 12
|
| 202 |
+
def anomaly_score(point, cluster_stats, label):
|
| 203 |
+
"""Computes an anomaly score for each point."""
|
| 204 |
+
mean, std = cluster_stats[label]
|
| 205 |
+
return np.linalg.norm((point - mean) / std)
|
| 206 |
+
|
| 207 |
+
# %% ../nbs/xai.ipynb 13
|
| 208 |
+
def detector(data, labels):
|
| 209 |
+
"""Anomaly detection function."""
|
| 210 |
+
cluster_stats = calculate_cluster_stats(data, labels)
|
| 211 |
+
scores = []
|
| 212 |
+
for point, label in zip(data, labels):
|
| 213 |
+
score = anomaly_score(point, cluster_stats, label)
|
| 214 |
+
scores.append(score)
|
| 215 |
+
return np.array(scores)
|
| 216 |
+
|
| 217 |
+
# %% ../nbs/xai.ipynb 15
|
| 218 |
+
def plot_anomaly_scores_distribution(anomaly_scores):
|
| 219 |
+
"Plot the distribution of anomaly scores to check for normality"
|
| 220 |
+
plt.figure(figsize=(10, 6))
|
| 221 |
+
sns.histplot(anomaly_scores, kde=True, bins=30)
|
| 222 |
+
plt.title("Distribución de Anomaly Scores")
|
| 223 |
+
plt.xlabel("Anomaly Score")
|
| 224 |
+
plt.ylabel("Frecuencia")
|
| 225 |
+
plt.show()
|
| 226 |
+
|
| 227 |
+
# %% ../nbs/xai.ipynb 16
|
| 228 |
+
def plot_clusters_with_anomalies(prjs, clusters_labels, anomaly_scores, threshold, fig_size=(25, 25)):
|
| 229 |
+
"Plot 2D projections of clusters and superimpose anomalies"
|
| 230 |
+
df_prjs = pd.DataFrame(prjs, columns=['x1', 'x2'])
|
| 231 |
+
df_prjs['cluster'] = clusters_labels
|
| 232 |
+
df_prjs['anomaly'] = anomaly_scores > threshold
|
| 233 |
+
|
| 234 |
+
fig = plt.figure(figsize=(fig_size[0], fig_size[1]))
|
| 235 |
+
ax = fig.add_subplot(111)
|
| 236 |
+
|
| 237 |
+
# Plot each cluster with different colors
|
| 238 |
+
unique_labels = df_prjs['cluster'].unique()
|
| 239 |
+
for label in unique_labels:
|
| 240 |
+
cluster_data = df_prjs[df_prjs['cluster'] == label]
|
| 241 |
+
ax.scatter(cluster_data['x1'], cluster_data['x2'], label=f'Cluster {label}', alpha=0.7)
|
| 242 |
+
|
| 243 |
+
# Superimpose anomalies
|
| 244 |
+
anomalies = df_prjs[df_prjs['anomaly']]
|
| 245 |
+
ax.scatter(anomalies['x1'], anomalies['x2'], color='red', label='Anomalies', edgecolor='k', s=50)
|
| 246 |
+
|
| 247 |
+
plt.title('Clusters and anomalies')
|
| 248 |
+
plt.legend()
|
| 249 |
+
plt.show()
|
| 250 |
+
|
| 251 |
+
def update_plot(threshold, prjs_umap, clusters_labels, anomaly_scores, fig_size):
|
| 252 |
+
plot_clusters_with_anomalies(prjs_umap, clusters_labels, anomaly_scores, threshold, fig_size)
|
| 253 |
+
|
| 254 |
+
def plot_clusters_with_anomalies_interactive_plot(threshold, prjs_umap, clusters_labels, anomaly_scores, fig_size):
|
| 255 |
+
threshold_slider = widgets.FloatSlider(value=threshold, min=0.001, max=3, step=0.001, description='Threshold')
|
| 256 |
+
interactive_plot = widgets.interactive(update_plot, threshold = threshold_slider,
|
| 257 |
+
prjs_umap = widgets.fixed(prjs_umap),
|
| 258 |
+
clusters_labels = widgets.fixed(clusters_labels),
|
| 259 |
+
anomaly_scores = widgets.fixed(anomaly_scores),
|
| 260 |
+
fig_size = widgets.fixed((25,25)))
|
| 261 |
+
display(interactive_plot)
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
# %% ../nbs/xai.ipynb 18
|
| 265 |
+
import plotly.express as px
|
| 266 |
+
from datetime import timedelta
|
| 267 |
+
|
| 268 |
+
# %% ../nbs/xai.ipynb 19
|
| 269 |
+
def get_df_selected(df, selected_indices, w, stride = 1): #Cuidado con stride
|
| 270 |
+
'''Links back the selected points to the original dataframe and returns the associated windows indices'''
|
| 271 |
+
n_windows = len(selected_indices)
|
| 272 |
+
window_ranges = [(id*stride, (id*stride)+w) for id in selected_indices]
|
| 273 |
+
#window_ranges = [(id*w, (id+1)*w+1) for id in selected_indices]
|
| 274 |
+
#window_ranges = [(id*stride, (id*stride)+w) for id in selected_indices]
|
| 275 |
+
#print(window_ranges)
|
| 276 |
+
valores_tramos = [df.iloc[inicio:fin+1] for inicio, fin in window_ranges]
|
| 277 |
+
df_selected = pd.concat(valores_tramos, ignore_index=False)
|
| 278 |
+
return window_ranges, n_windows, df_selected
|
| 279 |
+
|
| 280 |
+
# %% ../nbs/xai.ipynb 20
|
| 281 |
+
def shift_datetime(dt, seconds, sign, dateformat="%Y-%m-%d %H:%M:%S.%f", print_flag = False):
|
| 282 |
+
"""
|
| 283 |
+
This function gets a datetime dt, a number of seconds,
|
| 284 |
+
a sign and moves the date such number of seconds to the future
|
| 285 |
+
if sign is '+' and to the past if sing is '-'.
|
| 286 |
+
"""
|
| 287 |
+
|
| 288 |
+
if print_flag: print(dateformat)
|
| 289 |
+
dateformat2= "%Y-%m-%d %H:%M:%S.%f"
|
| 290 |
+
dateformat3 = "%Y-%m-%d"
|
| 291 |
+
ok = False
|
| 292 |
+
|
| 293 |
+
try:
|
| 294 |
+
if print_flag: print("dt ", dt, "seconds", seconds, "sign", sign)
|
| 295 |
+
new_dt = datetime.strptime(dt, dateformat)
|
| 296 |
+
if print_flag: print("ndt", new_dt)
|
| 297 |
+
ok = True
|
| 298 |
+
except ValueError as e:
|
| 299 |
+
if print_flag:
|
| 300 |
+
print("Error: ", e)
|
| 301 |
+
|
| 302 |
+
if (not ok):
|
| 303 |
+
try:
|
| 304 |
+
if print_flag: print("Parsing alternative dataformat", dt, "seconds", seconds, "sign", sign, dateformat2)
|
| 305 |
+
new_dt = datetime.strptime(dt, dateformat3)
|
| 306 |
+
if print_flag: print("2ndt", new_dt)
|
| 307 |
+
except ValueError as e:
|
| 308 |
+
print("Error: ", e)
|
| 309 |
+
if print_flag: print(new_dt)
|
| 310 |
+
try:
|
| 311 |
+
|
| 312 |
+
if new_dt.hour == 0 and new_dt.minute == 0 and new_dt.second == 0:
|
| 313 |
+
if print_flag: "Aqui"
|
| 314 |
+
new_dt = new_dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
| 315 |
+
if print_flag: print(new_dt)
|
| 316 |
+
|
| 317 |
+
if print_flag: print("ndt", new_dt)
|
| 318 |
+
|
| 319 |
+
if (sign == '+'):
|
| 320 |
+
if print_flag: print("Aqui")
|
| 321 |
+
new_dt = new_dt + timedelta(seconds = seconds)
|
| 322 |
+
if print_flag: print(new_dt)
|
| 323 |
+
else:
|
| 324 |
+
if print_flag: print(sign, type(dt))
|
| 325 |
+
new_dt = new_dt - timedelta(seconds = seconds)
|
| 326 |
+
if print_flag: print(new_dt)
|
| 327 |
+
if new_dt.hour == 0 and new_dt.minute == 0 and new_dt.second == 0:
|
| 328 |
+
if print_flag: print("replacing")
|
| 329 |
+
new_dt = new_dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
| 330 |
+
|
| 331 |
+
new_dt_str = new_dt.strftime(dateformat2)
|
| 332 |
+
if print_flag: print("new dt ", new_dt)
|
| 333 |
+
except ValueError as e:
|
| 334 |
+
if print_flag: print("Aqui3")
|
| 335 |
+
shift_datetime(dt, 0, sign, dateformat = "%Y-%m-%d", print_flag = False)
|
| 336 |
+
return str(e)
|
| 337 |
+
return new_dt_str
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
# %% ../nbs/xai.ipynb 21
|
| 342 |
+
def get_dateformat(text_date):
|
| 343 |
+
dateformat1 = "%Y-%m-%d %H:%M:%S"
|
| 344 |
+
dateformat2 = "%Y-%m-%d %H:%M:%S.%f"
|
| 345 |
+
dateformat3 = "%Y-%m-%d"
|
| 346 |
+
dateformat = ""
|
| 347 |
+
parts = text_date.split()
|
| 348 |
+
|
| 349 |
+
if len(parts) == 2:
|
| 350 |
+
time_parts = parts[1].split(':')
|
| 351 |
+
if len(time_parts) == 3:
|
| 352 |
+
sec_parts = time_parts[2].split('.')
|
| 353 |
+
if len(sec_parts) == 2:
|
| 354 |
+
dateformat = dateformat2
|
| 355 |
+
else:
|
| 356 |
+
dateformat = dateformat1
|
| 357 |
+
else:
|
| 358 |
+
dateformat = "unknown format 1"
|
| 359 |
+
elif len(parts) == 1:
|
| 360 |
+
dateformat = dateformat3
|
| 361 |
+
else:
|
| 362 |
+
dateformat = "unknown format 2"
|
| 363 |
+
|
| 364 |
+
return dateformat
|
| 365 |
+
|
| 366 |
+
# %% ../nbs/xai.ipynb 23
|
| 367 |
+
def get_anomalies(df, threshold, flag):
|
| 368 |
+
df['anomaly'] = [ (score > threshold) and flag for score in df['anomaly_score']]
|
| 369 |
+
|
| 370 |
+
def get_anomaly_styles(df, threshold, anomaly_scores, flag = False, print_flag = False):
|
| 371 |
+
if print_flag: print("Threshold: ", threshold)
|
| 372 |
+
if print_flag: print("Flag", flag)
|
| 373 |
+
if print_flag: print("df ~", df.shape)
|
| 374 |
+
df['anomaly'] = [ (score > threshold) and flag for score in df['anomaly_score'] ]
|
| 375 |
+
if print_flag: print(df)
|
| 376 |
+
get_anomalies(df, threshold, flag)
|
| 377 |
+
anomalies = df[df['anomaly']]
|
| 378 |
+
if flag:
|
| 379 |
+
df['anomaly'] = [
|
| 380 |
+
(score > threshold) and flag
|
| 381 |
+
for score in anomaly_scores
|
| 382 |
+
]
|
| 383 |
+
symbols = [
|
| 384 |
+
'x' if is_anomaly else 'circle'
|
| 385 |
+
for is_anomaly in df['anomaly']
|
| 386 |
+
]
|
| 387 |
+
line_colors = [
|
| 388 |
+
'black'
|
| 389 |
+
if (is_anomaly and flag) else 'rgba(0,0,0,0)'
|
| 390 |
+
for is_anomaly in df['anomaly']
|
| 391 |
+
]
|
| 392 |
+
else:
|
| 393 |
+
symbols = ['circle' for _ in df['x1']]
|
| 394 |
+
line_colors = ['rgba(0,0,0,0)' for _ in df['x1']]
|
| 395 |
+
if print_flag: print(anomalies)
|
| 396 |
+
return symbols, line_colors
|
| 397 |
+
### Example of use
|
| 398 |
+
#prjs_df = pd.DataFrame(prjs_umap, columns = ['x1', 'x2'])
|
| 399 |
+
#prjs_df['anomaly_score'] = anomaly_scores
|
| 400 |
+
#s, l = get_anomaly_styles(prjs_df, 1, True)
|
| 401 |
+
|
| 402 |
+
# %% ../nbs/xai.ipynb 24
|
| 403 |
+
class InteractiveAnomalyPlot():
|
| 404 |
+
def __init__(
|
| 405 |
+
self, selected_indices = [],
|
| 406 |
+
threshold = 0.15,
|
| 407 |
+
anomaly_flag = False,
|
| 408 |
+
path = "../imgs", w = 0
|
| 409 |
+
):
|
| 410 |
+
self.selected_indices = selected_indices
|
| 411 |
+
self.selected_indices_tmp = selected_indices
|
| 412 |
+
self.threshold = threshold
|
| 413 |
+
self.threshold_ = threshold
|
| 414 |
+
self.anomaly_flag = anomaly_flag
|
| 415 |
+
self.w = w
|
| 416 |
+
self.name = f"w={self.w}"
|
| 417 |
+
self.path = f"{path}{self.name}.png"
|
| 418 |
+
self.interaction_enabled = True
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
def plot_projections_clusters_interactive(
|
| 422 |
+
self, prjs, cluster_labels, umap_params, anomaly_scores=[], fig_size=(7,7), print_flag = False
|
| 423 |
+
):
|
| 424 |
+
self.selected_indices_tmp = self.selected_indices
|
| 425 |
+
py.init_notebook_mode()
|
| 426 |
+
|
| 427 |
+
prjs_df, cluster_colors = plot_initial_config(prjs, cluster_labels, anomaly_scores)
|
| 428 |
+
legend_items = [widgets.HTML(f'<b>Cluster {cluster}:</b> <span style="color:{color};">■</span>')
|
| 429 |
+
for cluster, color in cluster_colors.items()]
|
| 430 |
+
legend = widgets.VBox(legend_items)
|
| 431 |
+
|
| 432 |
+
marker_colors = prjs_df['cluster'].map(cluster_colors)
|
| 433 |
+
|
| 434 |
+
symbols, line_colors = get_anomaly_styles(prjs_df, self.threshold_, anomaly_scores, self.anomaly_flag, print_flag)
|
| 435 |
+
|
| 436 |
+
fig = go.FigureWidget(
|
| 437 |
+
[
|
| 438 |
+
go.Scatter(
|
| 439 |
+
x=prjs_df['x1'], y=prjs_df['x2'],
|
| 440 |
+
mode="markers",
|
| 441 |
+
marker= {
|
| 442 |
+
'color': marker_colors,
|
| 443 |
+
'line': { 'color': line_colors, 'width': 1 },
|
| 444 |
+
'symbol': symbols
|
| 445 |
+
},
|
| 446 |
+
text = prjs_df.index
|
| 447 |
+
)
|
| 448 |
+
]
|
| 449 |
+
)
|
| 450 |
+
|
| 451 |
+
line_trace = go.Scatter(
|
| 452 |
+
x=prjs_df['x1'],
|
| 453 |
+
y=prjs_df['x2'],
|
| 454 |
+
mode="lines",
|
| 455 |
+
line=dict(color='rgba(128, 128, 128, 0.5)', width=1)#,
|
| 456 |
+
#showlegend=False # Puedes configurar si deseas mostrar esta línea en la leyenda
|
| 457 |
+
)
|
| 458 |
+
|
| 459 |
+
fig.add_trace(line_trace)
|
| 460 |
+
|
| 461 |
+
sca = fig.data[0]
|
| 462 |
+
|
| 463 |
+
fig.update_layout(
|
| 464 |
+
dragmode='lasso',
|
| 465 |
+
width=700,
|
| 466 |
+
height=500,
|
| 467 |
+
title={
|
| 468 |
+
'text': '<span style="font-weight:bold">DR params - n_neighbors:{:d} min_dist:{:f}</span>'.format(
|
| 469 |
+
umap_params['n_neighbors'], umap_params['min_dist']),
|
| 470 |
+
'y':0.98,
|
| 471 |
+
'x':0.5,
|
| 472 |
+
'xanchor': 'center',
|
| 473 |
+
'yanchor': 'top'
|
| 474 |
+
},
|
| 475 |
+
plot_bgcolor='white',
|
| 476 |
+
paper_bgcolor='#f0f0f0',
|
| 477 |
+
xaxis=dict(gridcolor='lightgray', zerolinecolor='black', title = 'x'),
|
| 478 |
+
yaxis=dict(gridcolor='lightgray', zerolinecolor='black', title = 'y'),
|
| 479 |
+
margin=dict(l=10, r=20, t=30, b=10)
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
output_tmp = Output()
|
| 485 |
+
output_button = Output()
|
| 486 |
+
output_anomaly = Output()
|
| 487 |
+
output_threshold = Output()
|
| 488 |
+
output_width = Output()
|
| 489 |
+
|
| 490 |
+
def select_action(trace, points, selector):
|
| 491 |
+
self.selected_indices_tmp = points.point_inds
|
| 492 |
+
with output_tmp:
|
| 493 |
+
output_tmp.clear_output(wait=True)
|
| 494 |
+
if print_flag: print("Selected indices tmp:", self.selected_indices_tmp)
|
| 495 |
+
|
| 496 |
+
def button_action(b):
|
| 497 |
+
self.selected_indices = self.selected_indices_tmp
|
| 498 |
+
with output_button:
|
| 499 |
+
output_button.clear_output(wait = True)
|
| 500 |
+
if print_flag: print("Selected indices:", self.selected_indices)
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
def update_anomalies():
|
| 504 |
+
if print_flag: print("About to update anomalies")
|
| 505 |
+
|
| 506 |
+
symbols, line_colors = get_anomaly_styles(prjs_df, self.threshold_, anomaly_scores, self.anomaly_flag, print_flag)
|
| 507 |
+
|
| 508 |
+
if print_flag: print("Anomaly styles got")
|
| 509 |
+
|
| 510 |
+
with fig.batch_update():
|
| 511 |
+
fig.data[0].marker.symbol = symbols
|
| 512 |
+
fig.data[0].marker.line.color = line_colors
|
| 513 |
+
if print_flag: print("Anomalies updated")
|
| 514 |
+
if print_flag: print("Threshold: ", self.threshold_)
|
| 515 |
+
if print_flag: print("Scores: ", anomaly_scores)
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
def anomaly_action(b):
|
| 519 |
+
with output_anomaly: # Cambia output_flag a output_anomaly
|
| 520 |
+
output_anomaly.clear_output(wait=True)
|
| 521 |
+
if print_fllag: print("Negate anomaly flag")
|
| 522 |
+
self.anomaly_flag = not self.anomaly_flag
|
| 523 |
+
if print_flag: print("Show anomalies:", self.anomaly_flag)
|
| 524 |
+
update_anomalies()
|
| 525 |
+
|
| 526 |
+
sca.on_selection(select_action)
|
| 527 |
+
layout = widgets.Layout(width='auto', height='40px')
|
| 528 |
+
button = Button(
|
| 529 |
+
description="Update selected_indices",
|
| 530 |
+
style = {'button_color': 'lightblue'},
|
| 531 |
+
display = 'flex',
|
| 532 |
+
flex_row = 'column',
|
| 533 |
+
align_items = 'stretch',
|
| 534 |
+
layout = layout
|
| 535 |
+
)
|
| 536 |
+
anomaly_button = Button(
|
| 537 |
+
description = "Show anomalies",
|
| 538 |
+
style = {'button_color': 'lightgray'},
|
| 539 |
+
display = 'flex',
|
| 540 |
+
flex_row = 'column',
|
| 541 |
+
align_items = 'stretch',
|
| 542 |
+
layout = layout
|
| 543 |
+
)
|
| 544 |
+
|
| 545 |
+
button.on_click(button_action)
|
| 546 |
+
anomaly_button.on_click(anomaly_action)
|
| 547 |
+
|
| 548 |
+
##### Reactivity buttons
|
| 549 |
+
pause_button = Button(
|
| 550 |
+
description = "Pause interactiveness",
|
| 551 |
+
style = {'button_color': 'pink'},
|
| 552 |
+
display = 'flex',
|
| 553 |
+
flex_row = 'column',
|
| 554 |
+
align_items = 'stretch',
|
| 555 |
+
layout = layout
|
| 556 |
+
)
|
| 557 |
+
resume_button = Button(
|
| 558 |
+
description = "Resume interactiveness",
|
| 559 |
+
style = {'button_color': 'lightgreen'},
|
| 560 |
+
display = 'flex',
|
| 561 |
+
flex_row = 'column',
|
| 562 |
+
align_items = 'stretch',
|
| 563 |
+
layout = layout
|
| 564 |
+
)
|
| 565 |
+
|
| 566 |
+
|
| 567 |
+
threshold_slider = FloatSlider(
|
| 568 |
+
value=self.threshold_,
|
| 569 |
+
min=0.0,
|
| 570 |
+
max=float(np.ceil(self.threshold+5)),
|
| 571 |
+
step=0.0001,
|
| 572 |
+
description='Anomaly threshold:',
|
| 573 |
+
continuous_update=False
|
| 574 |
+
)
|
| 575 |
+
|
| 576 |
+
def pause_interaction(b):
|
| 577 |
+
self.interaction_enabled = False
|
| 578 |
+
fig.update_layout(dragmode='pan')
|
| 579 |
+
|
| 580 |
+
def resume_interaction(b):
|
| 581 |
+
self.interaction_enabled = True
|
| 582 |
+
fig.update_layout(dragmode='lasso')
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
def update_threshold(change):
|
| 586 |
+
with output_threshold:
|
| 587 |
+
output_threshold.clear_output(wait = True)
|
| 588 |
+
if print_flag: print("Update threshold")
|
| 589 |
+
self.threshold_ = change.new
|
| 590 |
+
if print_flag: print("Update anomalies threshold = ", self.threshold_)
|
| 591 |
+
update_anomalies()
|
| 592 |
+
|
| 593 |
+
#### Width
|
| 594 |
+
width_slider = FloatSlider(
|
| 595 |
+
value = 0.5,
|
| 596 |
+
min = 0.0,
|
| 597 |
+
max = 1.0,
|
| 598 |
+
step = 0.0001,
|
| 599 |
+
description = 'Line width:',
|
| 600 |
+
continuous_update = False
|
| 601 |
+
)
|
| 602 |
+
|
| 603 |
+
def update_width(change):
|
| 604 |
+
with output_width:
|
| 605 |
+
try:
|
| 606 |
+
output_width.clear_output(wait = True)
|
| 607 |
+
if print_flag:
|
| 608 |
+
print("Change line width")
|
| 609 |
+
print("Trace to update:", fig.data[1])
|
| 610 |
+
with fig.batch_update():
|
| 611 |
+
fig.data[1].line.width = change.new # Actualiza la opacidad de la línea
|
| 612 |
+
if print_flag: print("ChangeD line width")
|
| 613 |
+
except Exception as e:
|
| 614 |
+
print("Error updating line width:", e)
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
|
| 618 |
+
pause_button.on_click(pause_interaction)
|
| 619 |
+
resume_button.on_click(resume_interaction)
|
| 620 |
+
|
| 621 |
+
threshold_slider.observe(update_threshold, 'value')
|
| 622 |
+
|
| 623 |
+
####
|
| 624 |
+
width_slider.observe(update_width, names = 'value')
|
| 625 |
+
|
| 626 |
+
#####
|
| 627 |
+
space = HTML(" ")
|
| 628 |
+
|
| 629 |
+
vbox = VBox((output_tmp, output_button, output_anomaly, output_threshold, fig))
|
| 630 |
+
hbox = HBox((space, button, space, pause_button, space, resume_button, anomaly_button))
|
| 631 |
+
|
| 632 |
+
# Centrar las dos cajas horizontalmente en el VBox
|
| 633 |
+
|
| 634 |
+
box_layout = widgets.Layout(display='flex',
|
| 635 |
+
flex_flow='column',
|
| 636 |
+
align_items='center',
|
| 637 |
+
width='100%')
|
| 638 |
+
|
| 639 |
+
if self.anomaly_flag:
|
| 640 |
+
box = VBox((hbox,threshold_slider,width_slider, output_width, vbox), layout = box_layout)
|
| 641 |
+
else:
|
| 642 |
+
box = VBox((hbox, width_slider, output_width, vbox), layout = box_layout)
|
| 643 |
+
box.add_class("layout")
|
| 644 |
+
plot_save(fig, self.w)
|
| 645 |
+
|
| 646 |
+
display(box)
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
# %% ../nbs/xai.ipynb 25
|
| 650 |
+
def plot_save(fig, w):
|
| 651 |
+
image_bytes = pio.to_image(fig, format='png')
|
| 652 |
+
with open(f"../imgs/w={w}.png", 'wb') as f:
|
| 653 |
+
f.write(image_bytes)
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
# %% ../nbs/xai.ipynb 26
|
| 657 |
+
def plot_initial_config(prjs, cluster_labels, anomaly_scores):
|
| 658 |
+
prjs_df = pd.DataFrame(prjs, columns = ['x1', 'x2'])
|
| 659 |
+
prjs_df['cluster'] = cluster_labels
|
| 660 |
+
prjs_df['anomaly_score'] = anomaly_scores
|
| 661 |
+
|
| 662 |
+
cluster_colors_df = pd.DataFrame({'cluster': cluster_labels}).drop_duplicates()
|
| 663 |
+
cluster_colors_df['color'] = px.colors.qualitative.Set1[:len(cluster_colors_df)]
|
| 664 |
+
cluster_colors = dict(zip(cluster_colors_df['cluster'], cluster_colors_df['color']))
|
| 665 |
+
return prjs_df, cluster_colors
|
| 666 |
+
|
| 667 |
+
# %% ../nbs/xai.ipynb 27
|
| 668 |
+
def merge_overlapping_windows(windows):
|
| 669 |
+
if not windows:
|
| 670 |
+
return []
|
| 671 |
+
|
| 672 |
+
# Order
|
| 673 |
+
sorted_windows = sorted(windows, key=lambda x: x[0])
|
| 674 |
+
|
| 675 |
+
merged_windows = [sorted_windows[0]]
|
| 676 |
+
|
| 677 |
+
for window in sorted_windows[1:]:
|
| 678 |
+
if window[0] <= merged_windows[-1][1]:
|
| 679 |
+
# Merge!
|
| 680 |
+
merged_windows[-1] = (merged_windows[-1][0], max(window[1], merged_windows[-1][1]))
|
| 681 |
+
else:
|
| 682 |
+
merged_windows.append(window)
|
| 683 |
+
|
| 684 |
+
return merged_windows
|
| 685 |
+
|
| 686 |
+
# %% ../nbs/xai.ipynb 29
|
| 687 |
+
class InteractiveTSPlot:
|
| 688 |
+
def __init__(
|
| 689 |
+
self,
|
| 690 |
+
df,
|
| 691 |
+
selected_indices,
|
| 692 |
+
meaningful_features_subset_ids,
|
| 693 |
+
w,
|
| 694 |
+
stride=1,
|
| 695 |
+
print_flag=False,
|
| 696 |
+
num_points=10000,
|
| 697 |
+
dateformat='%Y-%m-%d %H:%M:%S',
|
| 698 |
+
delta_x = 10,
|
| 699 |
+
delta_y = 0.1
|
| 700 |
+
):
|
| 701 |
+
self.df = df
|
| 702 |
+
self.selected_indices = selected_indices
|
| 703 |
+
self.meaningful_features_subset_ids = meaningful_features_subset_ids
|
| 704 |
+
self.w = w
|
| 705 |
+
self.stride = stride
|
| 706 |
+
self.print_flag = print_flag
|
| 707 |
+
self.num_points = num_points
|
| 708 |
+
self.dateformat = dateformat
|
| 709 |
+
self.fig = go.FigureWidget()
|
| 710 |
+
self.buttons = []
|
| 711 |
+
self.print_flag = print_flag
|
| 712 |
+
|
| 713 |
+
self.delta_x = delta_x
|
| 714 |
+
self.delta_y = delta_y
|
| 715 |
+
|
| 716 |
+
self.window_ranges, self.n_windows, self.df_selected = get_df_selected(
|
| 717 |
+
self.df, self.selected_indices, self.w, self.stride
|
| 718 |
+
)
|
| 719 |
+
# Ensure the small possible number of windows to plot (like in R Shiny App)
|
| 720 |
+
self.window_ranges = merge_overlapping_windows(self.window_ranges)
|
| 721 |
+
|
| 722 |
+
#Num points no va bien...
|
| 723 |
+
#num_points = min(df_selected.shape[0], num_points)
|
| 724 |
+
|
| 725 |
+
if self.print_flag:
|
| 726 |
+
print("windows: ", self.n_windows, self.window_ranges)
|
| 727 |
+
print("selected id: ", self.df_selected.index)
|
| 728 |
+
print("points: ", self.num_points)
|
| 729 |
+
|
| 730 |
+
self.df.index = self.df.index.astype(str)
|
| 731 |
+
self.fig = go.FigureWidget()
|
| 732 |
+
self.colors = [
|
| 733 |
+
f'rgb({np.random.randint(0, 256)}, {np.random.randint(0, 256)}, {np.random.randint(0, 256)})'
|
| 734 |
+
for _ in range(self.n_windows)
|
| 735 |
+
]
|
| 736 |
+
|
| 737 |
+
##############################
|
| 738 |
+
# Outputs for debug printing #
|
| 739 |
+
##############################
|
| 740 |
+
self.output_windows = Output()
|
| 741 |
+
self.output_move = Output()
|
| 742 |
+
self.output_delta_x = Output()
|
| 743 |
+
self.output_delta_y = Output()
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
|
| 747 |
+
|
| 748 |
+
|
| 749 |
+
# %% ../nbs/xai.ipynb 30
|
| 750 |
+
def add_selected_features(self: InteractiveTSPlot):
|
| 751 |
+
# Add features time series
|
| 752 |
+
for feature_id in self.df.columns:
|
| 753 |
+
feature_pos = self.df.columns.get_loc(feature_id)
|
| 754 |
+
trace = go.Scatter(
|
| 755 |
+
#x=df.index[:num_points],
|
| 756 |
+
#y=df[feature_id][:num_points],
|
| 757 |
+
x = self.df.index,
|
| 758 |
+
y = self.df[feature_id],
|
| 759 |
+
mode='lines',
|
| 760 |
+
name=feature_id,
|
| 761 |
+
visible=feature_pos in self.meaningful_features_subset_ids,
|
| 762 |
+
text=self.df.index
|
| 763 |
+
#text=[f'{i}-{val}' for i, val in enumerate(df.index)]
|
| 764 |
+
)
|
| 765 |
+
self.fig.add_trace(trace)
|
| 766 |
+
|
| 767 |
+
InteractiveTSPlot.add_selected_features = add_selected_features
|
| 768 |
+
|
| 769 |
+
# %% ../nbs/xai.ipynb 31
|
| 770 |
+
def add_windows(self: InteractiveTSPlot):
|
| 771 |
+
for i, (start, end) in enumerate(self.window_ranges):
|
| 772 |
+
self.fig.add_shape(
|
| 773 |
+
type="rect",
|
| 774 |
+
x0=self.df.index[start],
|
| 775 |
+
x1=self.df.index[end],
|
| 776 |
+
y0= 0,
|
| 777 |
+
y1= 1,
|
| 778 |
+
yref = "paper",
|
| 779 |
+
fillcolor=self.colors[i], #"LightSalmon",
|
| 780 |
+
opacity=0.25,
|
| 781 |
+
layer="below",
|
| 782 |
+
line=dict(color=self.colors[i], width=1),
|
| 783 |
+
name = f"w_{i}"
|
| 784 |
+
)
|
| 785 |
+
with self.output_windows:
|
| 786 |
+
print("w[" + str( self.selected_indices[i] )+ "]="+str(self.df.index[start])+", "+str(self.df.index[end])+")")
|
| 787 |
+
|
| 788 |
+
InteractiveTSPlot.add_windows = add_windows
|
| 789 |
+
|
| 790 |
+
# %% ../nbs/xai.ipynb 32
|
| 791 |
+
def setup_style(self: InteractiveTSPlot):
|
| 792 |
+
self.fig.update_layout(
|
| 793 |
+
title='Time Series with time window plot',
|
| 794 |
+
xaxis_title='Datetime',
|
| 795 |
+
yaxis_title='Value',
|
| 796 |
+
legend_title='Variables',
|
| 797 |
+
margin=dict(l=10, r=10, t=30, b=10),
|
| 798 |
+
xaxis=dict(
|
| 799 |
+
tickformat = '%d-' + self.dateformat,
|
| 800 |
+
#tickvals=list(range(len(df.index))),
|
| 801 |
+
#ticktext = [f'{i}-{val}' for i, val in enumerate(df.index)]
|
| 802 |
+
#grid_color = 'lightgray', zerolinecolor='black', title = 'x'
|
| 803 |
+
),
|
| 804 |
+
#yaxis = dict(grid_color = 'lightgray', zerolinecolor='black', title = 'y'),
|
| 805 |
+
#plot_color = 'white',
|
| 806 |
+
paper_bgcolor='#f0f0f0'
|
| 807 |
+
)
|
| 808 |
+
self.fig.update_yaxes(fixedrange=True)
|
| 809 |
+
|
| 810 |
+
InteractiveTSPlot.setup_style = setup_style
|
| 811 |
+
|
| 812 |
+
# %% ../nbs/xai.ipynb 34
|
| 813 |
+
def toggle_trace(self : InteractiveTSPlot, button : Button):
|
| 814 |
+
idx = button.description
|
| 815 |
+
trace = self.fig.data[self.df.columns.get_loc(idx)]
|
| 816 |
+
trace.visible = not trace.visible
|
| 817 |
+
|
| 818 |
+
InteractiveTSPlot.toggle_trace = toggle_trace
|
| 819 |
+
|
| 820 |
+
# %% ../nbs/xai.ipynb 35
|
| 821 |
+
def set_features_buttons(self):
|
| 822 |
+
self.buttons = [
|
| 823 |
+
Button(
|
| 824 |
+
description=str(feature_id),
|
| 825 |
+
button_style='success' if self.df.columns.get_loc(feature_id) in self.meaningful_features_subset_ids else ''
|
| 826 |
+
)
|
| 827 |
+
for feature_id in self.df.columns
|
| 828 |
+
]
|
| 829 |
+
for button in self.buttons:
|
| 830 |
+
button.on_click(self.toggle_trace)
|
| 831 |
+
InteractiveTSPlot.set_features_buttons = set_features_buttons
|
| 832 |
+
|
| 833 |
+
# %% ../nbs/xai.ipynb 36
|
| 834 |
+
def move_left(self : InteractiveTSPlot, button : Button):
|
| 835 |
+
with self.output_move:
|
| 836 |
+
self.output_move.clear_output(wait=True)
|
| 837 |
+
start_date, end_date = self.fig.layout.xaxis.range
|
| 838 |
+
new_start_date = shift_datetime(start_date, self.delta_x, '-', self.dateformat, self.print_flag)
|
| 839 |
+
new_end_date = shift_datetime(end_date, self.delta_x, '-', self.dateformat, self.print_flag)
|
| 840 |
+
with self.fig.batch_update():
|
| 841 |
+
self.fig.layout.xaxis.range = [new_start_date, new_end_date]
|
| 842 |
+
|
| 843 |
+
def move_right(self : InteractiveTSPlot, button : Button):
|
| 844 |
+
self.output_move.clear_output(wait=True)
|
| 845 |
+
with self.output_move:
|
| 846 |
+
start_date, end_date = self.fig.layout.xaxis.range
|
| 847 |
+
new_start_date = shift_datetime(start_date, self.delta_x, '+', self.dateformat, self.print_flag)
|
| 848 |
+
new_end_date = shift_datetime(end_date, self.delta_x, '+', self.dateformat, self.print_flag)
|
| 849 |
+
with self.fig.batch_update():
|
| 850 |
+
self.fig.layout.xaxis.range = [new_start_date, new_end_date]
|
| 851 |
+
|
| 852 |
+
def move_down(self: InteractiveTSPlot, button : Button):
|
| 853 |
+
with self.output_move:
|
| 854 |
+
self.output_move.clear_output(wait=True)
|
| 855 |
+
start_y, end_y = self.fig.layout.yaxis.range
|
| 856 |
+
with self.fig.batch_update():
|
| 857 |
+
self.ig.layout.yaxis.range = [start_y-self.delta_y, end_y-self.delta_y]
|
| 858 |
+
def move_up(self: InteractiveTSPlot, button : Button):
|
| 859 |
+
with self.output_move:
|
| 860 |
+
self.output_move.clear_output(wait=True)
|
| 861 |
+
start_y, end_y = self.fig.layout.yaxis.range
|
| 862 |
+
with self.fig.batch_update():
|
| 863 |
+
self.fig.layout.yaxis.range = [start_y+self.delta_y, end_y+self.delta_y]
|
| 864 |
+
|
| 865 |
+
InteractiveTSPlot.move_left = move_left
|
| 866 |
+
InteractiveTSPlot.move_right = move_right
|
| 867 |
+
InteractiveTSPlot.move_down = move_down
|
| 868 |
+
InteractiveTSPlot.move_up = move_up
|
| 869 |
+
|
| 870 |
+
# %% ../nbs/xai.ipynb 37
|
| 871 |
+
def delta_x_bigger(self: InteractiveTSPlot):
|
| 872 |
+
with self.output_delta_x:
|
| 873 |
+
self.output_delta_x.clear_output(wait = True)
|
| 874 |
+
if self.print_flag: print("Delta before", self.delta_x)
|
| 875 |
+
self.delta_x *= 10
|
| 876 |
+
if self.print_flag: print("delta_x:", self.delta_x)
|
| 877 |
+
|
| 878 |
+
def delta_y_bigger(self: InteractiveTSPlot):
|
| 879 |
+
with self.output_delta_y:
|
| 880 |
+
self.output_delta_y.clear_output(wait = True)
|
| 881 |
+
if self.print_flag: print("Delta before", self.delta_y)
|
| 882 |
+
self.delta_y *= 10
|
| 883 |
+
if self.print_flag: print("delta_y:", self.delta_y)
|
| 884 |
+
|
| 885 |
+
def delta_x_lower(self:InteractiveTSPlot):
|
| 886 |
+
with self.output_delta_x:
|
| 887 |
+
self.output_delta_x.clear_output(wait = True)
|
| 888 |
+
if self.print_flag: print("Delta before", self.delta_x)
|
| 889 |
+
self.delta_x /= 10
|
| 890 |
+
if self.print_flag: print("delta_x:", self.delta_x)
|
| 891 |
+
|
| 892 |
+
def delta_y_lower(self:InteractiveTSPlot):
|
| 893 |
+
with self.output_delta_y:
|
| 894 |
+
self.output_delta_y.clear_output(wait = True)
|
| 895 |
+
print("Delta before", self.delta_y)
|
| 896 |
+
self.delta_y = self.delta_y * 10
|
| 897 |
+
print("delta_y:", self.delta_y)
|
| 898 |
+
InteractiveTSPlot.delta_x_bigger = delta_x_bigger
|
| 899 |
+
InteractiveTSPlot.delta_y_bigger = delta_y_bigger
|
| 900 |
+
InteractiveTSPlot.delta_x_lower = delta_x_lower
|
| 901 |
+
InteractiveTSPlot.delta_y_lower = delta_y_lower
|
| 902 |
+
|
| 903 |
+
# %% ../nbs/xai.ipynb 38
|
| 904 |
+
def add_movement_buttons(self: InteractiveTSPlot):
|
| 905 |
+
self.button_left = Button(description="←")
|
| 906 |
+
self.button_right = Button(description="→")
|
| 907 |
+
self.button_up = Button(description="↑")
|
| 908 |
+
self.button_down = Button(description="↓")
|
| 909 |
+
|
| 910 |
+
self.button_step_x_up = Button(description="dx ↑")
|
| 911 |
+
self.button_step_x_down = Button(description="dx ↓")
|
| 912 |
+
self.button_step_y_up = Button(description="dy↑")
|
| 913 |
+
self.button_step_y_down = Button(description="dy↓")
|
| 914 |
+
|
| 915 |
+
|
| 916 |
+
# TODO: Arreglar que se pueda modificar el paso con el que se avanza. No se ve el output y no se modifica el valor
|
| 917 |
+
self.button_step_x_up.on_click(self.delta_x_bigger)
|
| 918 |
+
self.button_step_x_down.on_click(self.delta_x_lower)
|
| 919 |
+
self.button_step_y_up.on_click(self.delta_y_bigger)
|
| 920 |
+
self.button_step_y_down.on_click(self.delta_y_lower)
|
| 921 |
+
|
| 922 |
+
self.button_left.on_click(self.move_left)
|
| 923 |
+
self.button_right.on_click(self.move_right)
|
| 924 |
+
self.button_up.on_click(self.move_up)
|
| 925 |
+
self.button_down.on_click(self.move_down)
|
| 926 |
+
|
| 927 |
+
InteractiveTSPlot.add_movement_buttons = add_movement_buttons
|
| 928 |
+
|
| 929 |
+
# %% ../nbs/xai.ipynb 40
|
| 930 |
+
def setup_boxes(self: InteractiveTSPlot):
|
| 931 |
+
self.steps_x = VBox([self.button_step_x_up, self.button_step_x_down])
|
| 932 |
+
self.steps_y = VBox([self.button_step_y_up, self.button_step_y_down])
|
| 933 |
+
arrow_buttons = HBox([self.button_left, self.button_right, self.button_up, self.button_down, self.steps_x, self.steps_y])
|
| 934 |
+
hbox_layout = widgets.Layout(display='flex', flex_flow='row wrap', align_items='flex-start')
|
| 935 |
+
hbox = HBox(self.buttons, layout=hbox_layout)
|
| 936 |
+
box_layout = widgets.Layout(
|
| 937 |
+
display='flex',
|
| 938 |
+
flex_flow='column',
|
| 939 |
+
align_items='center',
|
| 940 |
+
width='100%'
|
| 941 |
+
)
|
| 942 |
+
if self.print_flag:
|
| 943 |
+
self.box = VBox([hbox, arrow_buttons, self.output_move, self.output_delta_x, self.output_delta_y, self.fig, self.output_windows], layout=box_layout)
|
| 944 |
+
else:
|
| 945 |
+
self.box = VBox([hbox, arrow_buttons, self.fig, self.output_windows], layout=box_layout)
|
| 946 |
+
|
| 947 |
+
InteractiveTSPlot.setup_boxes = setup_boxes
|
| 948 |
+
|
| 949 |
+
|
| 950 |
+
# %% ../nbs/xai.ipynb 41
|
| 951 |
+
def initial_plot(self: InteractiveTSPlot):
|
| 952 |
+
self.add_selected_features()
|
| 953 |
+
self.add_windows()
|
| 954 |
+
self.setup_style()
|
| 955 |
+
self.set_features_buttons()
|
| 956 |
+
self.add_movement_buttons()
|
| 957 |
+
self.setup_boxes()
|
| 958 |
+
InteractiveTSPlot.initial_plot = initial_plot
|
| 959 |
+
|
| 960 |
+
# %% ../nbs/xai.ipynb 42
|
| 961 |
+
def show(self : InteractiveTSPlot):
|
| 962 |
+
self.initial_plot()
|
| 963 |
+
display(self.box)
|
| 964 |
+
InteractiveTSPlot.show = show
|
entrypoint-rstudio.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash --login
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo ". ${HOME}/miniconda3/etc/profile.d/conda.sh" >> ${HOME}/.bashrc
|
| 5 |
+
# Make bash automatically activate the conda environment
|
| 6 |
+
echo "conda activate ${ENV_PREFIX}" >> ~/.bashrc
|
| 7 |
+
#echo "export WANDB_ENTITY=${WANDB_ENTITY:-default}" >> ${HOME}/.bashrc
|
| 8 |
+
# echo "WANDB_ENTITY=${WANDB_ENTITY:-default}" >> ${HOME}/.Renviron
|
| 9 |
+
|
| 10 |
+
# Define an array of environment variable names from the ENV_VARS Compose variable
|
| 11 |
+
IFS=',' read -ra ENV_VAR_NAMES <<< "$ENV_VARS"
|
| 12 |
+
|
| 13 |
+
echo "ENV_VAR_NAMES=${ENV_VAR_NAMES[@]}"
|
| 14 |
+
|
| 15 |
+
# Loop through the array of environment variable names and set the variables
|
| 16 |
+
for ENV_VAR_NAME in "${ENV_VAR_NAMES[@]}"; do
|
| 17 |
+
ENV_VAR_VALUE="${!ENV_VAR_NAME:-default}"
|
| 18 |
+
echo "$ENV_VAR_NAME=$ENV_VAR_VALUE" >> ${HOME}/.Renviron
|
| 19 |
+
done
|
| 20 |
+
|
| 21 |
+
ulimit -s 16384
|
| 22 |
+
|
| 23 |
+
# start rstudio server
|
| 24 |
+
/init
|
| 25 |
+
exec "$@"
|
requirements.txt
CHANGED
|
@@ -1,3 +1 @@
|
|
| 1 |
-
|
| 2 |
-
uvicorn
|
| 3 |
-
fastapi
|
|
|
|
| 1 |
+
|
|
|
|
|
|