Spaces:

NX-AI
/

TiRex-demo

Running on T4

App Files Files Community

Nikita commited on Jun 6

Commit

14d91dc

1 Parent(s): 810611f

added tirex as model

Browse files

Files changed (13) hide show

.DS_Store +0 -0
tirex/__init__.py +8 -0
tirex/api_adapter/__init__.py +2 -0
tirex/api_adapter/forecast.py +209 -0
tirex/api_adapter/gluon.py +48 -0
tirex/api_adapter/hf_data.py +38 -0
tirex/api_adapter/standard_adapter.py +67 -0
tirex/base.py +73 -0
tirex/models/__init__.py +2 -0
tirex/models/components.py +147 -0
tirex/models/mixed_stack.py +143 -0
tirex/models/predict_utils.py +72 -0
tirex/models/tirex.py +231 -0

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

tirex/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+from .api_adapter.forecast import ForecastModel
+from .base import load_model
+from .models.tirex import TiRexZero
+__all__ = ["load_model", "ForecastModel"]

tirex/api_adapter/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Copyright (c) NXAI GmbH.
2	+ # This software may be used and distributed according to the terms of the NXAI Community License Agreement.

tirex/api_adapter/forecast.py ADDED Viewed

	@@ -0,0 +1,209 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+from abc import ABC, abstractmethod
+from typing import Literal
+import torch
+from .standard_adapter import ContextType, get_batches
+try:
+    from .gluon import format_gluonts_output, get_gluon_batches
+    _GLUONTS_AVAILABLE = True
+except ImportError:
+    _GLUONTS_AVAILABLE = False
+try:
+    from .hf_data import get_hfdata_batches
+    _HF_DATASETS_AVAILABLE = True
+except ImportError:
+    _HF_DATASETS_AVAILABLE = False
+DEF_TARGET_COLUMN = "target"
+DEF_META_COLUMNS = ("start", "item_id")
+def _format_output(
+    quantiles: torch.Tensor,
+    means: torch.Tensor,
+    sample_meta: list[dict],
+    quantile_levels: list[float],
+    output_type: Literal["torch", "numpy", "gluonts"],
+):
+    if output_type == "torch":
+        return quantiles.cpu(), means.cpu()
+    elif output_type == "numpy":
+        return quantiles.cpu().numpy(), means.cpu().numpy()
+    elif output_type == "gluonts":
+        if not _GLUONTS_AVAILABLE:
+            raise ValueError("output_type glutonts needs GluonTs but GluonTS is not available (not installed)!")
+        return format_gluonts_output(quantiles, means, sample_meta, quantile_levels)
+    else:
+        raise ValueError(f"Invalid output type: {output_type}")
+def _as_generator(batches, fc_func, quantile_levels, output_type, **predict_kwargs):
+    for batch_ctx, batch_meta in batches:
+        quantiles, mean = fc_func(batch_ctx, **predict_kwargs)
+        yield _format_output(
+            quantiles=quantiles,
+            means=mean,
+            sample_meta=batch_meta,
+            quantile_levels=quantile_levels,
+            output_type=output_type,
+        )
+def _gen_forecast(fc_func, batches, output_type, quantile_levels, yield_per_batch, **predict_kwargs):
+    if yield_per_batch:
+        return _as_generator(batches, fc_func, quantile_levels, output_type, **predict_kwargs)
+    prediction_q = []
+    prediction_m = []
+    sample_meta = []
+    for batch_ctx, batch_meta in batches:
+        quantiles, mean = fc_func(batch_ctx, **predict_kwargs)
+        prediction_q.append(quantiles)
+        prediction_m.append(mean)
+        sample_meta.extend(batch_meta)
+    prediction_q = torch.cat(prediction_q, dim=0)
+    prediction_m = torch.cat(prediction_m, dim=0)
+    return _format_output(
+        quantiles=prediction_q,
+        means=prediction_m,
+        sample_meta=sample_meta,
+        quantile_levels=quantile_levels,
+        output_type=output_type,
+    )
+def _common_forecast_doc():
+    common_doc = f"""
+        This method takes historical context data as input and outputs probabilistic forecasts.
+        Args:
+            output_type (Literal["torch", "numpy", "gluonts"], optional):
+                Specifies the desired format of the returned forecasts:
+                - "torch": Returns forecasts as `torch.Tensor` objects [batch_dim, forecast_len, |quantile_levels|]
+                - "numpy": Returns forecasts as `numpy.ndarray` objects [batch_dim, forecast_len, |quantile_levels|]
+                - "gluonts": Returns forecasts as a list of GluonTS `Forecast` objects.
+                Defaults to "torch".
+            batch_size (int, optional): The number of time series instances to process concurrently by the model.
+                                        Defaults to 512. Must be $>= 1$.
+            quantile_levels (List[float], optional): Quantile levels for which predictions should be generated.
+                                                     Defaults to (0.1, 0.2, ..., 0.9).
+            yield_per_batch (bool, optional): If `True`, the method will act as a generator, yielding
+                                              forecasts batch by batch as they are computed.
+                                              Defaults to `False`.
+            **predict_kwargs: Additional keyword arguments that are passed directly to the underlying
+                              prediction mechanism of the pre-trained model. Refer to the model's
+                              internal prediction method documentation for available options.
+        Returns:
+            The return type depends on `output_type` and `yield_per_batch`:
+                - If `yield_per_batch` is `True`: An iterator that yields forecasts. Each yielded item
+                  will correspond to a batch of forecasts in the format specified by `output_type`.
+                - If `yield_per_batch` is `False`: A single object containing all forecasts.
+                  - If `output_type="torch"`: `Tuple[torch.Tensor, torch.Tensor]` (quantiles, mean).
+                  - If `output_type="numpy"`: `Tuple[numpy.ndarray, numpy.ndarray]` (quantiles, mean).
+                  - If `output_type="gluonts"`: A `List[gluonts.model.forecast.Forecast]` of all forecasts.
+        """
+    return common_doc
+class ForecastModel(ABC):
+    @abstractmethod
+    def _forecast_quantiles(self, batch, **predict_kwargs):
+        pass
+    def forecast(
+        self,
+        context: ContextType,
+        output_type: Literal["torch", "numpy", "gluonts"] = "torch",
+        batch_size: int = 512,
+        quantile_levels: list[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
+        yield_per_batch: bool = False,
+        **predict_kwargs,
+    ):
+        f"""
+        {_common_forecast_doc}
+        Args:
+            context (ContextType): The historical "context" data of the time series:
+                - `torch.Tensor`: 1D `[context_length]` or 2D `[batch_dim, context_length]` tensor
+                - `np.ndarray`: 1D `[context_length]` or 2D `[batch_dim, context_length]` array
+                - `List[torch.Tensor]`: List of 1D tensors (samples with different lengths get padded per batch)
+                - `List[np.ndarray]`: List of 1D arrays (samples with different lengths get padded per batch)
+        """
+        assert batch_size >= 1, "Batch size must be >= 1"
+        batches = get_batches(context, batch_size)
+        return _gen_forecast(
+            self._forecast_quantiles, batches, output_type, quantile_levels, yield_per_batch, **predict_kwargs
+        )
+    def forecast_gluon(
+        self,
+        gluonDataset,
+        output_type: Literal["torch", "numpy", "gluonts"] = "torch",
+        batch_size: int = 512,
+        quantile_levels: list[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
+        yield_per_batch: bool = False,
+        data_kwargs: dict = {},
+        **predict_kwargs,
+    ):
+        f"""
+        {_common_forecast_doc()}
+        Args:
+            gluonDataset (gluon_ts.dataset.common.Dataset): A GluonTS dataset object containing the
+                                                            historical time series data.
+            data_kwargs (dict, optional): Additional keyword arguments passed to the
+                                          autogluon data processing function.
+        """
+        assert batch_size >= 1, "Batch size must be >= 1"
+        if not _GLUONTS_AVAILABLE:
+            raise ValueError("forecast_gluon glutonts needs GluonTs but GluonTS is not available (not installed)!")
+        batches = get_gluon_batches(gluonDataset, batch_size, **data_kwargs)
+        return _gen_forecast(
+            self._forecast_quantiles, batches, output_type, quantile_levels, yield_per_batch, **predict_kwargs
+        )
+    def forecast_hfdata(
+        self,
+        hf_dataset,
+        output_type: Literal["torch", "numpy", "gluonts"] = "torch",
+        batch_size: int = 512,
+        quantile_levels: list[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
+        yield_per_batch: bool = False,
+        data_kwargs: dict = {},
+        **predict_kwargs,
+    ):
+        f"""
+        {_common_forecast_doc()}
+        Args:
+            hf_dataset (datasets.Dataset): A Hugging Face `Dataset` object containing the
+                                           historical time series data.
+            data_kwargs (dict, optional): Additional keyword arguments passed to the
+                                          datasets data processing function.
+        """
+        assert batch_size >= 1, "Batch size must be >= 1"
+        if not _HF_DATASETS_AVAILABLE:
+            raise ValueError(
+                "forecast_hfdata glutonts needs HuggingFace datasets but datasets is not available (not installed)!"
+            )
+        batches = get_hfdata_batches(hf_dataset, batch_size, **data_kwargs)
+        return _gen_forecast(
+            self._forecast_quantiles, batches, output_type, quantile_levels, yield_per_batch, **predict_kwargs
+        )

tirex/api_adapter/gluon.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+import pandas as pd
+import torch
+from gluonts.dataset.common import Dataset
+from gluonts.dataset.field_names import FieldName
+from gluonts.model.forecast import QuantileForecast
+from .standard_adapter import _batch_pad_iterable
+DEF_TARGET_COLUMN = FieldName.TARGET  # target
+DEF_META_COLUMNS = (FieldName.START, FieldName.ITEM_ID)
+def _get_gluon_ts_map(**gluon_kwargs):
+    target_col = gluon_kwargs.get("target_column", DEF_TARGET_COLUMN)
+    meta_columns = gluon_kwargs.get("meta_columns", DEF_META_COLUMNS)
+    def extract_gluon(series):
+        ctx = torch.Tensor(series[target_col])
+        meta = {k: series[k] for k in meta_columns if k in series}
+        meta["length"] = len(ctx)
+        return ctx, meta
+    return extract_gluon
+def get_gluon_batches(gluonDataset: Dataset, batch_size: int, **gluon_kwargs):
+    return _batch_pad_iterable(map(_get_gluon_ts_map(**gluon_kwargs), gluonDataset), batch_size)
+def format_gluonts_output(quantile_forecasts: torch.Tensor, mean_forecasts, meta: list[dict], quantile_levels):
+    forecasts = []
+    for i in range(quantile_forecasts.shape[0]):
+        start_date = meta[i].get(FieldName.START, pd.Period("01-01-2000", freq=meta[i].get("freq", "h")))
+        start_date += meta[i].get("length", 0)
+        forecasts.append(
+            QuantileForecast(
+                forecast_arrays=torch.cat((quantile_forecasts[i], mean_forecasts[i].unsqueeze(1)), dim=1)
+                .T.cpu()
+                .numpy(),
+                start_date=start_date,
+                item_id=meta[i].get(FieldName.ITEM_ID, None),
+                forecast_keys=list(map(str, quantile_levels)) + ["mean"],
+            )
+        )
+    return forecasts

tirex/api_adapter/hf_data.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+import datasets
+import torch
+from .standard_adapter import _batch_pad_iterable
+DEF_TARGET_COLUMN = "target"
+def _get_hf_map(dataset: datasets.Dataset, **hf_kwargs):
+    target_col = hf_kwargs.get("target_column", DEF_TARGET_COLUMN)
+    meta_columns = hf_kwargs.get("meta_columns", ())
+    columns_to_pass = [target_col] + list(meta_columns)
+    remove_cols = [col for col in dataset.column_names if col not in columns_to_pass]
+    dataset = (
+        dataset.with_format("torch")
+        .remove_columns(remove_cols)
+        .cast_column(target_col, datasets.Sequence(datasets.Value("float32")))
+    )
+    def yield_batch_tuples(sample: dict) -> tuple[torch.Tensor, dict]:
+        context_data = sample[target_col]
+        if context_data.ndim > 1:
+            context_data = context_data.squeeze()
+        assert context_data.ndim == 1
+        meta = {k: sample[k] for k in meta_columns if k in sample}
+        meta["length"] = len(context_data)
+        return context_data, meta
+    return dataset, yield_batch_tuples
+def get_hfdata_batches(hf_dataset: datasets.Dataset, batch_size: int, **hf_kwargs):
+    dataset, map_func = _get_hf_map(hf_dataset, **hf_kwargs)
+    return _batch_pad_iterable(map(map_func, dataset), batch_size)

tirex/api_adapter/standard_adapter.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+import itertools
+from collections.abc import Iterable, Iterator, Sequence
+from typing import Union
+import numpy as np
+import torch
+ContextType = Union[
+    torch.Tensor,
+    np.ndarray,
+    list[torch.Tensor],
+    list[np.ndarray],
+]
+def _batched_slice(full_batch, full_meta: list[dict] | None, batch_size: int) -> Iterator[tuple[Sequence, list[dict]]]:
+    if len(full_batch) <= batch_size:
+        yield full_batch, full_meta if full_meta is not None else [{} for _ in range(len(full_batch))]
+    else:
+        for i in range(0, len(full_batch), batch_size):
+            batch = full_batch[i : i + batch_size]
+            yield batch, (full_meta[i : i + batch_size] if full_meta is not None else [{} for _ in range(len(batch))])
+def _batched(iterable: Iterable, n: int):
+    it = iter(iterable)
+    while batch := tuple(itertools.islice(it, n)):
+        yield batch
+def _batch_pad_iterable(iterable: Iterable[tuple[torch.Tensor, dict]], batch_size: int):
+    for batch in _batched(iterable, batch_size):
+        # ctx_it_len, ctx_it_data, it_meta = itertools.tee(batch, 3)
+        max_len = max(len(el[0]) for el in batch)
+        padded_batch = []
+        meta = []
+        for el in batch:
+            sample = el[0]
+            assert isinstance(sample, torch.Tensor)
+            assert sample.ndim == 1
+            assert len(sample) > 0, "Each sample needs to have a length > 0"
+            padding = torch.full(size=(max_len - len(sample),), fill_value=torch.nan, device=sample.device)
+            padded_batch.append(torch.cat((padding, sample)))
+            meta.append(el[1])
+        yield torch.stack(padded_batch), meta
+def get_batches(context: ContextType, batch_size: int):
+    batches = None
+    if isinstance(context, torch.Tensor):
+        if context.ndim == 1:
+            context = context.unsqueeze(0)
+        assert context.ndim == 2
+        batches = _batched_slice(context, None, batch_size)
+    elif isinstance(context, np.ndarray):
+        if context.ndim == 1:
+            context = np.expand_dims(context, axis=0)
+        assert context.ndim == 2
+        batches = map(lambda x: (torch.Tensor(x[0]), x[1]), _batched_slice(context, None, batch_size))
+    elif isinstance(context, (list, Iterable)):
+        batches = _batch_pad_iterable(map(lambda x: (torch.Tensor(x), None), context), batch_size)
+    if batches is None:
+        raise ValueError(f"Context type {type(context)} not supported! Supported Types: {ContextType}")
+    return batches

tirex/base.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+import os
+from abc import ABC, abstractmethod
+from typing import TypeVar
+from huggingface_hub import hf_hub_download
+T = TypeVar("T", bound="PretrainedModel")
+def parse_hf_repo_id(path):
+    parts = path.split("/")
+    return "/".join(parts[0:2])
+class PretrainedModel(ABC):
+    REGISTRY: dict[str, "PretrainedModel"] = {}
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        cls.REGISTRY[cls.register_name()] = cls
+    @classmethod
+    def from_pretrained(cls: type[T], path, device: str = "cuda:0", hf_kwargs=None, ckp_kwargs=None) -> T:
+        if hf_kwargs is None:
+            hf_kwargs = {}
+        if ckp_kwargs is None:
+            ckp_kwargs = {}
+        if os.path.exists(path):
+            print("Loading weights from local directory")
+            checkpoint_path = path
+        else:
+            repo_id = parse_hf_repo_id(path)
+            checkpoint_path = hf_hub_download(repo_id=repo_id, filename="model.ckpt", **hf_kwargs)
+        model = cls.load_from_checkpoint(checkpoint_path, map_location=device, **ckp_kwargs)
+        model.after_load_from_checkpoint()
+        return model
+    @classmethod
+    @abstractmethod
+    def register_name(cls) -> str:
+        pass
+    def after_load_from_checkpoint(self):
+        pass
+def load_model(path: str, device: str = "cuda:0", hf_kwargs=None, ckp_kwargs=None) -> PretrainedModel:
+    """Loads a TiRex model. This function attempts to load the specified model.
+    Args:
+        path (str): Hugging Face path to the model (e.g. NX-AI/TiRex)
+        device (str, optional): The device on which to load the model (e.g., "cuda:0", "cpu").
+                                If you want to use "cpu" you need to deactivate the sLSTM CUDA kernels (check repository FAQ!).
+        hf_kwargs (dict, optional): Keyword arguments to pass to the Hugging Face Hub download method.
+        ckp_kwargs (dict, optional): Keyword arguments to pass when loading the checkpoint.
+    Returns:
+        PretrainedModel: The loaded model.
+    Examples:
+        model: ForecastModel = load_model("NX-AI/TiRex")
+    """
+    try:
+        _, model_id = parse_hf_repo_id(path).split("/")
+    except:
+        raise ValueError(f"Invalid model path {path}")
+    model_cls = PretrainedModel.REGISTRY.get(model_id, None)
+    if model_cls is None:
+        raise ValueError(f"Invalid model id {model_id}")
+    return model_cls.from_pretrained(path, device=device, hf_kwargs=hf_kwargs, ckp_kwargs=ckp_kwargs)

tirex/models/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Copyright (c) NXAI GmbH.
2	+ # This software may be used and distributed according to the terms of the NXAI Community License Agreement.

tirex/models/components.py ADDED Viewed

	@@ -0,0 +1,147 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+from dataclasses import dataclass, field
+from typing import Any
+import torch
+SCALER_STATE = "scaler_state"
+class ResidualBlock(torch.nn.Module):
+    def __init__(
+        self,
+        in_dim: int,
+        h_dim: int,
+        out_dim: int,
+        dropout: float = 0,
+    ) -> None:
+        super().__init__()
+        self.dropout = torch.nn.Dropout(dropout)
+        self.hidden_layer = torch.nn.Linear(in_dim, h_dim)
+        self.output_layer = torch.nn.Linear(h_dim, out_dim)
+        self.residual_layer = torch.nn.Linear(in_dim, out_dim)
+        self.act = torch.nn.ReLU()
+    def forward(self, x: torch.Tensor):
+        hid = self.act(self.hidden_layer(x))
+        out = self.output_layer(hid)
+        res = self.residual_layer(x)
+        out = out + res
+        return out
+@dataclass
+class StandardScaler:
+    eps: float = 1e-5
+    nan_loc: float = 0.0
+    def scale(
+        self,
+        x: torch.Tensor,
+        loc_scale: tuple[torch.Tensor, torch.Tensor] | None = None,
+    ) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
+        if loc_scale is None:
+            loc = torch.nan_to_num(torch.nanmean(x, dim=-1, keepdim=True), nan=self.nan_loc)
+            scale = torch.nan_to_num(torch.nanmean((x - loc).square(), dim=-1, keepdim=True).sqrt(), nan=1.0)
+            scale = torch.where(scale == 0, torch.abs(loc) + self.eps, scale)
+        else:
+            loc, scale = loc_scale
+        return ((x - loc) / scale), (loc, scale)
+    def re_scale(self, x: torch.Tensor, loc_scale: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
+        loc, scale = loc_scale
+        return x * scale + loc
+@dataclass
+class _Patcher:
+    patch_size: int
+    patch_stride: int
+    left_pad: bool
+    def __post_init__(self):
+        assert self.patch_size % self.patch_stride == 0
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        assert x.ndim == 2
+        length = x.shape[-1]
+        if length < self.patch_size or (length % self.patch_stride != 0):
+            if length < self.patch_size:
+                padding_size = (
+                    *x.shape[:-1],
+                    self.patch_size - (length % self.patch_size),
+                )
+            else:
+                padding_size = (
+                    *x.shape[:-1],
+                    self.patch_stride - (length % self.patch_stride),
+                )
+            padding = torch.full(size=padding_size, fill_value=torch.nan, dtype=x.dtype, device=x.device)
+            if self.left_pad:
+                x = torch.concat((padding, x), dim=-1)
+            else:
+                x = torch.concat((x, padding), dim=-1)
+        x = x.unfold(dimension=-1, size=self.patch_size, step=self.patch_stride)
+        return x
+@dataclass
+class PatchedUniTokenizer:
+    patch_size: int
+    scaler: Any = field(default_factory=StandardScaler)
+    patch_stride: int | None = None
+    def __post_init__(self):
+        if self.patch_stride is None:
+            self.patch_stride = self.patch_size
+        self.patcher = _Patcher(self.patch_size, self.patch_stride, left_pad=True)
+    def context_input_transform(self, data: torch.Tensor):
+        assert data.ndim == 2
+        data, scale_state = self.scaler.scale(data)
+        return self.patcher(data), {SCALER_STATE: scale_state}
+    def output_transform(self, data: torch.Tensor, tokenizer_state: dict):
+        data_shape = data.shape
+        data = self.scaler.re_scale(data.reshape(data_shape[0], -1), tokenizer_state[SCALER_STATE]).view(*data_shape)
+        return data
+class StreamToLogger:
+    """Fake file-like stream object that redirects writes to a logger
+    instance."""
+    def __init__(self, logger, log_level):
+        self.logger = logger
+        self.log_level = log_level
+        self.linebuf = ""  # Buffer for partial lines
+    def write(self, message):
+        # Filter out empty messages (often from just a newline)
+        if message.strip():
+            self.linebuf += message
+            # If the message contains a newline, process the full line
+            if "\n" in self.linebuf:
+                lines = self.linebuf.splitlines(keepends=True)
+                for line in lines:
+                    if line.endswith("\n"):
+                        # Log full lines without the trailing newline (logger adds its own)
+                        self.logger.log(self.log_level, line.rstrip("\n"))
+                    else:
+                        # Keep partial lines in buffer
+                        self.linebuf = line
+                        return
+                self.linebuf = ""  # All lines processed
+            # If no newline, keep buffering
+    def flush(self):
+        # Log any remaining buffered content when flush is called
+        if self.linebuf.strip():
+            self.logger.log(self.log_level, self.linebuf.rstrip("\n"))
+            self.linebuf = ""

tirex/models/mixed_stack.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+import os
+from dataclasses import dataclass, field
+import torch
+from torch import nn
+from xlstm.blocks.slstm.layer import sLSTMLayer, sLSTMLayerConfig
+from xlstm.xlstm_large import xLSTMLargeConfig
+from xlstm.xlstm_large.components import RMSNorm
+from xlstm.xlstm_large.model import FeedForward, mLSTMBlock, mLSTMStateType
+def skip_cuda():
+    return os.getenv("TIREX_NO_CUDA", "False").lower() in ("true", "1", "t")
+def init_cell(config: xLSTMLargeConfig, block_idx, num_blocks):
+    return sLSTMLayer(
+        sLSTMLayerConfig(
+            embedding_dim=config.embedding_dim,
+            num_heads=config.num_heads,
+            conv1d_kernel_size=0,  # 0 means no convolution included
+            group_norm_weight=True,
+            dropout=0,
+            # CellConfig
+            backend="vanilla" if skip_cuda() else "cuda",
+            bias_init="powerlaw_blockdependent",
+            recurrent_weight_init="zeros",
+            num_gates=4,
+            gradient_recurrent_cut=False,
+            gradient_recurrent_clipval=None,
+            forward_clipval=None,
+            batch_size=8,  # needed?
+            _block_idx=block_idx,
+            _num_blocks=num_blocks,
+        )
+    )
+sLSTMLayerStateType = tuple[torch.Tensor, torch.Tensor]
+sLSTMStateType = dict[int, sLSTMLayerStateType]
+class sLSTMBlock(nn.Module):
+    def __init__(self, config: xLSTMLargeConfig, block_idx: int, num_blocks: int):
+        super().__init__()
+        self.config = config
+        self.norm_slstm = RMSNorm(
+            num_features=config.embedding_dim,
+            eps=config.norm_eps,
+            use_weight=True,
+            use_bias=config.use_bias,
+            force_float32_reductions=config.norm_reduction_force_float32,
+        )
+        self.slstm_layer = init_cell(config, block_idx, num_blocks)
+        self.norm_ffn = RMSNorm(
+            num_features=config.embedding_dim,
+            eps=config.norm_eps,
+            use_weight=True,
+            use_bias=config.use_bias,
+            force_float32_reductions=config.norm_reduction_force_float32,
+        )
+        self.ffn = FeedForward(config)
+    def forward(
+        self, x: torch.Tensor, state: sLSTMLayerStateType | None = None
+    ) -> tuple[torch.Tensor, sLSTMLayerStateType]:
+        x_slstm = self.norm_slstm(x)
+        if state is None:
+            conv_state, slstm_state = None, None
+        else:
+            conv_state, slstm_state = state
+        x_slstm, state = self.slstm_layer(x_slstm, conv_state, slstm_state, return_last_state=True)
+        x = x + x_slstm
+        x_ffn = self.norm_ffn(x)
+        x_ffn = self.ffn(x_ffn)
+        x = x + x_ffn
+        return x, (state["conv_state"], state["slstm_state"])
+@dataclass
+class xLSTMMixedLargeConfig(xLSTMLargeConfig):
+    slstm_at: list[int] = field(default_factory=list)
+    all_slstm: bool = True
+    @property
+    def block_types(self):
+        return ["s" if i in self.slstm_at or self.all_slstm else "m" for i in range(self.num_blocks)]
+class xLSTMMixedLargeBlockStack(nn.Module):
+    config_class = xLSTMMixedLargeConfig
+    def __init__(self, config: xLSTMMixedLargeConfig):
+        super().__init__()
+        self.config = config
+        self.blocks = nn.ModuleList(
+            [
+                sLSTMBlock(config, block_idx=i, num_blocks=config.num_blocks) if t == "s" else mLSTMBlock(config)
+                for i, t in enumerate(config.block_types)
+            ]
+        )
+        if self.config.add_out_norm:
+            self.out_norm = RMSNorm(
+                num_features=config.embedding_dim,
+                eps=config.norm_eps,
+                use_weight=True,
+                use_bias=config.use_bias,
+                force_float32_reductions=config.norm_reduction_force_float32,
+            )
+        else:
+            self.out_norm = nn.Identity()
+    def forward(
+        self, x: torch.Tensor, state: mLSTMStateType | sLSTMStateType | None = None
+    ) -> tuple[torch.Tensor, mLSTMStateType]:
+        if state is None:
+            state = {i: None for i in range(len(self.blocks))}
+        for i, block in enumerate(self.blocks):
+            block_state = state[i]
+            x, block_state_new = block(x, block_state)
+            if block_state is None:
+                state[i] = block_state_new
+            else:
+                pass
+                ## layer state is a tuple of three tensors: c, n, m
+                ## we update the state in place in order to avoid creating new tensors
+                # for state_idx in range(len(block_state)):
+                #    state[i][state_idx].copy_(block_state_new[state_idx])
+        x = self.out_norm(x)
+        return x, state

tirex/models/predict_utils.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+import logging
+from abc import abstractmethod
+import torch
+from ..api_adapter.forecast import ForecastModel
+LOGGER = logging.getLogger()
+class TensorQuantileUniPredictMixin(ForecastModel):
+    @abstractmethod
+    def _forecast_tensor(
+        self,
+        context: torch.Tensor,
+        prediction_length: int | None = None,
+        **predict_kwargs,
+    ) -> torch.Tensor:
+        pass
+    @property
+    @abstractmethod
+    def quantiles(self):
+        pass
+    def _forecast_quantiles(
+        self,
+        context: torch.Tensor,
+        prediction_length: int | None = None,
+        quantile_levels: list[float] = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+        output_device: str = "cpu",
+        auto_cast: bool = False,
+        **predict_kwargs,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        with torch.autocast(device_type=self.device.type, enabled=auto_cast):
+            predictions = self._forecast_tensor(
+                context=context, prediction_length=prediction_length, **predict_kwargs
+            ).detach()
+        predictions = predictions.to(torch.device(output_device)).swapaxes(1, 2)
+        training_quantile_levels = list(self.quantiles)
+        if set(quantile_levels).issubset(set(training_quantile_levels)):
+            quantiles = predictions[..., [training_quantile_levels.index(q) for q in quantile_levels]]
+        else:
+            if min(quantile_levels) < min(training_quantile_levels) or max(quantile_levels) > max(
+                training_quantile_levels
+            ):
+                logging.warning(
+                    f"Requested quantile levels ({quantile_levels}) fall outside the range of "
+                    f"quantiles the model was trained on ({training_quantile_levels}). "
+                    "Predictions for out-of-range quantiles will be clamped to the nearest "
+                    "boundary of the trained quantiles (i.e., minimum or maximum trained level). "
+                    "This can significantly impact prediction accuracy, especially for extreme quantiles. "
+                )
+            # Interpolate quantiles
+            augmented_predictions = torch.cat(
+                [predictions[..., [0]], predictions, predictions[..., [-1]]],
+                dim=-1,
+            )
+            quantiles = torch.quantile(
+                augmented_predictions,
+                q=torch.tensor(quantile_levels, dtype=augmented_predictions.dtype),
+                dim=-1,
+            ).permute(1, 2, 0)
+        # median as mean
+        mean = predictions[:, :, training_quantile_levels.index(0.5)]
+        return quantiles, mean

tirex/models/tirex.py ADDED Viewed

	@@ -0,0 +1,231 @@

+# Copyright (c) NXAI GmbH.
+# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+import logging
+import warnings
+from contextlib import redirect_stdout
+from dataclasses import dataclass
+import lightning as L
+import torch
+from dacite import Config, from_dict
+from ..base import PretrainedModel
+from .components import PatchedUniTokenizer, ResidualBlock, StreamToLogger
+from .mixed_stack import skip_cuda, xLSTMMixedLargeBlockStack, xLSTMMixedLargeConfig
+from .predict_utils import TensorQuantileUniPredictMixin
+LOGGER = logging.getLogger()
+@dataclass
+class TiRexZeroConfig:
+    input_patch_size: int
+    output_patch_size: int
+    quantiles: list[float]
+    block_kwargs: dict
+    input_ff_dim: int
+class TiRexZero(L.LightningModule, PretrainedModel, TensorQuantileUniPredictMixin):
+    def __init__(self, model_config: dict, train_ctx_len=None):
+        super().__init__()
+        self.model_config: TiRexZeroConfig = from_dict(TiRexZeroConfig, model_config, config=Config(strict=True))
+        assert self.model_config.input_patch_size == self.model_config.output_patch_size
+        self.train_ctx_len = train_ctx_len
+        # Block Stack
+        self.nan_mask_value = 0
+        self.block_stack, resolved_config = self.init_block(self.model_config.block_kwargs)
+        self.model_config.block_kwargs = resolved_config
+        # Input Layer
+        self.input_patch_embedding = ResidualBlock(
+            in_dim=self.model_config.input_patch_size * 2,
+            h_dim=self.model_config.input_ff_dim,
+            out_dim=self.model_config.block_kwargs.embedding_dim,
+        )
+        self.tokenizer = PatchedUniTokenizer(
+            patch_size=self.model_config.input_patch_size,
+        )
+        # Output Layer
+        self.num_quantiles = len(self.model_config.quantiles)
+        quantiles = torch.tensor(self.model_config.quantiles)
+        self.register_buffer("quantiles", quantiles, persistent=False)
+        self.output_patch_embedding = ResidualBlock(
+            in_dim=self.model_config.block_kwargs.embedding_dim,
+            h_dim=self.model_config.input_ff_dim,
+            out_dim=self.num_quantiles * self.model_config.output_patch_size,
+        )
+        self.save_hyperparameters()
+    @classmethod
+    def register_name(cls):
+        return "TiRex"
+    def init_block(self, block_kwargs):
+        config = from_dict(xLSTMMixedLargeConfig, block_kwargs)
+        log_redirect = StreamToLogger(LOGGER, logging.INFO)
+        with redirect_stdout(log_redirect):  # avoid excessive print statements of sLSTM compile
+            model = xLSTMMixedLargeBlockStack(config)
+        return model, config
+    @property
+    def quantiles(self):
+        return self.model.quantiles
+    def _forward_model_tokenized(
+        self,
+        input_token,
+        input_mask=None,
+        rollouts=1,
+    ):
+        input_mask = (
+            input_mask.to(input_token.dtype)
+            if input_mask is not None
+            else torch.isnan(input_token).logical_not().to(input_token.dtype)
+        )
+        assert rollouts >= 1
+        bs, numb_ctx_token, token_dim = input_token.shape
+        if rollouts > 1:
+            input_token = torch.cat(
+                (
+                    input_token,
+                    torch.full(
+                        (bs, rollouts - 1, token_dim),
+                        fill_value=torch.nan,
+                        device=input_token.device,
+                        dtype=input_token.dtype,
+                    ),
+                ),
+                dim=1,
+            )
+            input_mask = torch.cat(
+                (
+                    input_mask,
+                    torch.full(
+                        (bs, rollouts - 1, token_dim),
+                        fill_value=False,
+                        device=input_mask.device,
+                        dtype=input_mask.dtype,
+                    ),
+                ),
+                dim=1,
+            )
+        input_token = torch.nan_to_num(input_token, nan=self.nan_mask_value)
+        input_embeds = self.input_patch_embedding(torch.cat((input_token, input_mask), dim=2))
+        # hidden_states = []
+        # for rollout in range(rollout):
+        x = self.block_stack(input_embeds)
+        if isinstance(x, tuple):
+            hidden_states = x[0]
+        else:
+            hidden_states = x
+        quantile_preds = self.output_patch_embedding(hidden_states)
+        quantile_preds = torch.unflatten(quantile_preds, -1, (self.num_quantiles, self.model_config.output_patch_size))
+        quantile_preds = torch.transpose(quantile_preds, 1, 2)  # switch quantile and num_token_dimension
+        # quantile_preds: [batch_size, num_quantiles, num_token, output_patch_size]
+        return quantile_preds, hidden_states
+    @torch.inference_mode()
+    def _forecast_tensor(
+        self,
+        context: torch.Tensor,
+        prediction_length: int | None = None,
+        max_context: int | None = None,
+        max_accelerated_rollout_steps: int = 1,
+    ) -> torch.Tensor:
+        predictions = []
+        if prediction_length is None:
+            prediction_length = self.tokenizer.patch_size
+        remaining = -(prediction_length // -self.tokenizer.patch_size)
+        if max_context is None:
+            max_context = self.train_ctx_len
+        min_context = max(self.train_ctx_len, max_context)
+        context = context.to(
+            device=self.device,
+            dtype=torch.float32,
+        )
+        while remaining > 0:
+            if context.shape[-1] > max_context:
+                context = context[..., -max_context:]
+            if context.shape[-1] < min_context:
+                pad = torch.full(
+                    (context.shape[0], min_context - context.shape[-1]),
+                    fill_value=torch.nan,
+                    device=context.device,
+                    dtype=context.dtype,
+                )
+                context = torch.concat((pad, context), dim=1)
+            tokenized_tensor, tokenizer_state = self.tokenizer.context_input_transform(context)
+            fut_rollouts = min(remaining, max_accelerated_rollout_steps)
+            with torch.no_grad():
+                prediction, _ = self._forward_model_tokenized(input_token=tokenized_tensor, rollouts=fut_rollouts)
+                prediction = prediction[:, :, -fut_rollouts:, :].to(tokenized_tensor)  # predicted token
+                # [bs, num_quantiles, num_predicted_token, output_patch_size]
+            prediction = self.tokenizer.output_transform(prediction, tokenizer_state)
+            prediction = prediction.flatten(start_dim=2)
+            predictions.append(prediction)
+            remaining -= fut_rollouts
+            if remaining <= 0:
+                break
+            context = torch.cat([context, torch.full_like(prediction[:, 0, :], fill_value=torch.nan)], dim=-1)
+        return torch.cat(predictions, dim=-1)[..., :prediction_length].to(
+            dtype=torch.float32,
+        )
+    def on_load_checkpoint(self, checkpoint: dict) -> None:
+        state_dict = checkpoint["state_dict"]
+        load_vanilla_kernel = skip_cuda()
+        if load_vanilla_kernel:
+            warnings.warn(
+                "You use TiRex without sLSTM CUDA kernels! This might slow down the model considerably and might degrade forecasting results!"
+                "Set the environment variable TIREX_NO_CUDA to 0 to avoid this!"
+            )
+            block_kwargs = self.model_config.block_kwargs
+            head_dim = block_kwargs.embedding_dim // block_kwargs.num_heads
+            num_gates = 4
+            new_state_dict = {}
+            for k, v in state_dict.items():
+                if "slstm_layer.slstm_cell._recurrent_kernel_" in k:
+                    new_state_dict[k] = (
+                        v.reshape(
+                            block_kwargs.num_heads,
+                            head_dim,
+                            num_gates,
+                            head_dim,
+                        )
+                        .permute(0, 2, 3, 1)
+                        .reshape(
+                            block_kwargs.num_heads,
+                            num_gates * head_dim,
+                            head_dim,
+                        )
+                    )
+                    # new_state_dict[k] = v.permute(0, 2, 1)
+                elif "slstm_layer.slstm_cell._bias_" in k:
+                    new_state_dict[k] = (
+                        v.reshape(block_kwargs.num_heads, num_gates, head_dim).permute(1, 0, 2).reshape(-1)
+                    )
+                else:
+                    new_state_dict[k] = v
+            checkpoint["state_dict"] = new_state_dict
+    def after_load_from_checkpoint(self):
+        if not skip_cuda() and self.device.type != "cuda":
+            warnings.warn(
+                f"You use TiRex with sLSTM CUDA kernels BUT DO NOT LOAD THE DEVICE ON A CUDA DEVICE (device type is {self.device.type})!"
+                "This is not supported and calls to the model will likely lead to an error if you dont move your model to a CUDA device!"
+                "If you want to run TiRex on CPU you need to disable sLSTM CUDA kernels but be aware of the downsides (see FAQ)"
+            )