Source code for fusionlab.nn.losses

# -*- coding: utf-8 -*-
#   License: BSD-3-Clause
#   Author: LKouadio <etanoyau@gmail.com>

"""
Contains loss functions used in the `gofast-nn` package for neural 
network models. The loss functions are designed to be compatible with Keras
and TensorFlow models. 
"""
import warnings 
from numbers import Real 
from typing import List, Optional 

from .._fusionlog import fusionlog
from ..compat.sklearn import Interval

from ..core.checks import ParamsValidator, check_params
from ..core.diagnose_q import validate_quantiles_in 
from ..utils.deps_utils import ensure_pkg
from ..utils.validator import check_consistent_length 
from . import KERAS_DEPS, KERAS_BACKEND, dependency_message
from .keras_validator import validate_keras_loss 

K = KERAS_DEPS.backend
Loss=KERAS_DEPS.Loss
Tensor=KERAS_DEPS.Tensor 

tf_abs=KERAS_DEPS.abs
tf_reduce_mean=KERAS_DEPS.reduce_mean 
tf_square=KERAS_DEPS.square 
tf_reshape=KERAS_DEPS.reshape 
tf_convert_to_tensor=KERAS_DEPS.convert_to_tensor 
tf_expand_dims=KERAS_DEPS.expand_dims 
tf_maximum=KERAS_DEPS.maximum
tf_rank=KERAS_DEPS.rank 
tf_cond =KERAS_DEPS.cond 
tf_constant =KERAS_DEPS.constant 
tf_equal = KERAS_DEPS.equal 
tf_cast=KERAS_DEPS.cast 
tf_zeros_like=KERAS_DEPS.zeros_like
tf_constant = KERAS_DEPS.constant 
tf_float32 = KERAS_DEPS.float32 
tf_reduce_sum = KERAS_DEPS.reduce_sum
tf_gather = KERAS_DEPS.gather 

register_keras_serializable=KERAS_DEPS.register_keras_serializable
    
DEP_MSG = dependency_message('nn.losses') 

logger = fusionlog.get_fusionlab_logger(__name__)

__all__ = [
    'quantile_loss', 
    'quantile_loss_multi', 
    'anomaly_loss', 
    'combined_quantile_loss', 
    'combined_total_loss',
    'objective_loss', 
    'prediction_based_loss'
 ]

@ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG)
def make_weighted_pinball(qs, weights):
    r"""
    Weighted quantile (pinball) loss for sequence forecasts.

    This factory returns a Keras-serializable loss that computes a
    *weighted* pinball score across the quantile axis and then averages
    over batch and horizon. It accepts common rank patterns and handles
    broadcasting internally.

    The pinball loss for a single quantile :math:`\\tau \\in (0,1)` and
    error :math:`e = y - \\hat y` is

    .. math::

        L_\\tau(y, \\hat y) = \\max(\\tau e, (\\tau - 1) e).

    With multiple quantiles :math:`\\{\\tau_q\\}_{q=1}^Q`, a set of
    non-negative weights :math:`\\{w_q\\}`, and per-horizon predictions,
    this loss computes a weighted sum along the quantile axis and then
    reduces the result via mean over remaining axes.

    Parameters
    ----------
    qs : sequence of float
        Quantile levels in the *same order* as the model output's
        quantile dimension. Length ``Q``. Typical example:
        ``[0.1, 0.5, 0.9]``.

    weights : dict or sequence of float
        Per-quantile weights. If a ``dict``, keys are quantile levels
        (e.g., ``{0.1: 3.0, 0.5: 1.0, 0.9: 3.0}``) and are matched to
        ``qs`` using a tolerant float comparison (±1e-6). If a sequence,
        it must have length ``Q`` and correspond *positionally* to
        ``qs``. Weights are normalized to sum to 1 across the quantile
        axis before aggregation.

    Returns
    -------
    loss_fn : Callable
        A function ``loss_fn(y_true, y_pred) -> tf.Tensor`` compatible
        with ``tf.keras.Model.compile(loss=...)``. It returns a scalar
        mean loss over batch and horizon.

    Shape semantics
    ---------------
    Let ``B`` be batch size, ``H`` the forecast horizon, and ``Q`` the
    number of quantiles.

    * ``y_true`` : ``(B, H)`` or ``(B, H, 1)``
    * ``y_pred`` : ``(B, H, Q)`` or ``(B, H, Q, 1)``

    The function internally reshapes to

    * ``y_true``  → ``(B, H, 1, 1)``
    * ``y_pred``  → ``(B, H, Q, 1)``
    * ``qs``/``weights``  → broadcast to ``(1, 1, Q, 1)``

    and computes a weighted sum along the quantile axis (``Q``), then a
    mean over ``B`` and ``H``.

    Notes
    -----
    * The order of ``qs`` must match the model output order along the
      quantile axis. If you change the model's quantile ordering, update
      ``qs`` accordingly.
    * When ``weights`` is a dict, any quantile in ``qs`` that is not
      found in the dict (within ±1e-6) defaults to weight 1 before the
      final weight normalization.
    * Setting a zero weight for a quantile zeroes its contribution (and
      its gradient), which can be useful for focusing optimization on
      tails or the median.

    Examples
    --------
    >>> from fusionlab.nn.losses import make_weighted_pinball
    >>> qs = [0.1, 0.5, 0.9]
    >>> w  = {0.1: 3.0, 0.5: 1.0, 0.9: 3.0}
    >>> subs_loss = make_weighted_pinball(qs, w)
    >>> model.compile(optimizer="adam",
    ...               loss={"subs_pred": subs_loss, "gwl_pred": "mse"})

    See Also
    --------
    pinball_loss
        Unweighted (equal-weight) pinball loss across quantiles.

    References
    ----------
    Koenker, R. and Bassett, G. (1978). Regression quantiles.
    Econometrica, 46(1):33–50.
    """
    # existing implementation unchanged…
    qs_list = [float(q) for q in list(qs)]
    def _lookup_weight(q):
        if isinstance(weights, dict):
            for k, v in weights.items():
                if abs(float(k) - q) <= 1e-6:
                    return float(v)
            return 1.0
        else:
            return None

    if isinstance(weights, dict):
        w_list = [_lookup_weight(q) for q in qs_list]
    else:
        w_list = list(weights)

    qs_tf = tf_constant(qs_list, dtype=tf_float32)  # [Q]
    w_tf  = tf_constant(w_list, dtype=tf_float32)   # [Q]
    w_tf  = w_tf / tf_reduce_sum(w_tf)

    @register_keras_serializable("fusionlab.nn.losses",
                                 name="make_weighted_pinball")
    def loss_fn(y_true, y_pred):
        y_true = tf_convert_to_tensor(y_true)
        y_pred = tf_convert_to_tensor(y_pred)

        # If someone accidentally passed (B,H,Q,O) targets, drop Q.
        if y_true.shape.rank == 4:
            y_true = tf_gather(y_true, 0, axis=2)   # -> (B,H,O)
    
        # y_true -> (B,H,1)
        ytrue_rank = tf_rank(y_true)
        
        y_true_3 = tf_cond(
            tf_equal(ytrue_rank, 2),
            lambda: tf_expand_dims(y_true, axis=-1),  # (B,H)->(B,H,1)
            lambda: y_true
        )

        # y_pred -> (B,H,Q,1)
        ypred_rank = tf_rank(y_pred)
        y_pred_4 = tf_cond(
            tf_equal(ypred_rank, 3),
            lambda: tf_expand_dims(y_pred, axis=-1),  # (B,H,Q)->(B,H,Q,1)
            lambda: y_pred                             # already (B,H,Q,1)
        )

        # Broadcast y_true over Q
        y_true_exp = tf_expand_dims(y_true_3, axis=2)     # (B,H,1,1)

        # Pinball
        tau = tf_reshape(qs_tf, [1, 1, -1, 1])            # (1,1,Q,1)
        err = y_true_exp - y_pred_4                       # (B,H,Q,1)
        pin = tf_maximum(tau * err, (tau - 1.0) * err)    # (B,H,Q,1)

        # Weighted over Q
        ww  = tf_reshape(w_tf, [1, 1, -1, 1])             # (1,1,Q,1)
        pin_w = tf_reduce_sum(ww * pin, axis=2)           # (B,H,1)

        # Mean over batch & horizon
        return tf_reduce_mean(pin_w)

    return loss_fn


@ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG)
def pinball_loss(qs: List[float]):
    r"""
    Unweighted quantile (pinball) loss for sequence forecasts.

    This factory returns a Keras-serializable loss that computes the
    *unweighted* pinball score across the quantile axis and averages
    over batch and horizon. It supports common rank patterns and
    performs safe broadcasting.

    Given error :math:`e = y - \\hat y` and quantile
    :math:`\\tau \\in (0,1)`, the loss is

    .. math::

        L_\\tau(y, \\hat y) = \\max(\\tau e, (\\tau - 1) e).

    For multiple quantiles, the loss averages across the quantile axis.

    Parameters
    ----------
    qs : sequence of float
        Quantile levels in the same order as the model output's
        quantile dimension (length ``Q``), e.g., ``[0.1, 0.5, 0.9]``.

    Returns
    -------
    loss : Callable
        A function ``loss(y_true, y_pred) -> tf.Tensor`` suitable for
        ``tf.keras.Model.compile``. It returns a scalar mean loss over
        batch and horizon.

    Shape semantics
    ---------------
    Let ``B`` be batch size, ``H`` the forecast horizon, and ``Q`` the
    number of quantiles.

    * ``y_true`` : ``(B, H)`` or ``(B, H, 1)``
    * ``y_pred`` : ``(B, H, Q)`` or ``(B, H, Q, 1)``

    Internally, shapes are broadcast so that ``y_true`` aligns with the
    quantile axis of ``y_pred`` and the pinball score is computed per
    quantile before being averaged.

    Raises
    ------
    ValueError
        If ``y_pred`` rank is not 3 or 4 (i.e., not ``(B,H,Q)`` nor
        ``(B,H,Q,1)``).

    Notes
    -----
    * If you need to *emphasize* tails or the median, use
      :func:`make_weighted_pinball` instead and supply per-quantile
      weights.
    * The order of ``qs`` must match the model output order along the
      quantile axis.

    Examples
    --------
    >>> from fusionlab.nn.losses import pinball_loss
    >>> qloss = pinball_loss([0.1, 0.5, 0.9])
    >>> model.compile(optimizer="adam",
    ...               loss={"subs_pred": qloss, "gwl_pred": "mse"})

    See Also
    --------
    make_weighted_pinball
        Pinball loss with explicit per-quantile weighting.

    References
    ----------
    Koenker, R. and Bassett, G. (1978). Regression quantiles.
    Econometrica, 46(1):33–50.
    """
    # existing implementation unchanged
    q_base = tf_constant(qs, dtype=tf_float32)  # shape (Q,)

    @register_keras_serializable("fusionlab.nn.losses", name="pinball_loss")
    def loss(y_true, y_pred):
        yt = tf_cast(y_true, tf_float32)
        yp = tf_cast(y_pred, tf_float32)

        # --- Normalize shapes ---
        # y_pred: either (B,H,Q) or (B,H,Q,1)
        if yp.shape.rank == 3:
            # want y_true as (B,H,1) so it broadcasts across Q
            if yt.shape.rank == 2:               # (B,H) -> (B,H,1)
                yt = tf_expand_dims(yt, axis=-1)
            # if yt is already (B,H,1), leave it
            q = q_base[None, None, :]            # (1,1,Q) -> matches (B,H,Q)

        elif yp.shape.rank == 4:
            # want y_true as (B,H,1,1) to match (B,H,Q,1)
            if yt.shape.rank == 2:               # (B,H) -> (B,H,1)
                yt = tf_expand_dims(yt, axis=-1)
            if yt.shape.rank == 3:               # (B,H,1) -> (B,H,1,1)
                yt = tf_expand_dims(yt, axis=2)
            q = q_base[None, None, :, None]      # (1,1,Q,1) -> matches (B,H,Q,1)

        else:
            raise ValueError(
                "y_pred must be rank 3 (B,H,Q) or rank 4 (B,H,Q,1)."
            )

        # --- Pinball loss ---
        err = yt - yp
        pin = tf_maximum(q * err, (q - 1.0) * err)
        return tf_reduce_mean(pin)

    return loss

[docs] @ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG) def objective_loss( multi_obj_loss: Loss, anomaly_scores: Optional[Tensor] = None, ): """ Create a multi-objective Keras loss function that wraps a `MultiObjectiveLoss` layer, optionally including anomaly scores. Parameters ---------- multi_obj_loss : MultiObjectiveLoss A `MultiObjectiveLoss` instance that combines quantile loss and anomaly loss. Typically you create it via: MultiObjectiveLoss( quantile_loss_fn=AdaptiveQuantileLoss(...), anomaly_loss_fn=AnomalyLoss(...) ) anomaly_scores : tf.Tensor or None, optional Tensor of shape (B, H, D) representing anomaly scores. If None, anomaly loss is omitted. Defaults to None. Returns ------- callable A function `loss_fn(y_true, y_pred) -> scalar`, suitable for `model.compile(loss=...)`. Notes ----- This function is "Keras-serializable" in that you can save and load models using it. Under the hood, it calls `multi_obj_loss(y_true, y_pred, anomaly_scores)`. If `anomaly_scores` is None, only the quantile loss is used. Examples -------- >>> from fusionlab.nn.components import ( ... MultiObjectiveLoss, AdaptiveQuantileLoss, AnomalyLoss ... ) >>> mo_loss = MultiObjectiveLoss( ... quantile_loss_fn=AdaptiveQuantileLoss([0.1, 0.5, 0.9]), ... anomaly_loss_fn=AnomalyLoss(weight=1.5) ... ) >>> # Suppose anomaly_scores is some Tensor >>> anomaly_scores = tf.random.normal((32, 10, 8)) >>> # Wrap everything as a single Keras loss function >>> loss_fn = objective_loss( ... multi_obj_loss=mo_loss, ... anomaly_scores=anomaly_scores ... ) >>> # Now you can do: ... model.compile(optimizer="adam", loss=loss_fn) See Also -------- fusionlab.nn.losses.MultiObjectiveLoss : The layer combining quantile + anomaly losses. """ from .components import MultiObjectiveLoss # Optional: check if multi_obj_loss has a 'call' method # or if it's a valid Keras layer. multi_obj_loss= validate_keras_loss ( multi_obj_loss, deep_check=True, ops="validate", ) if not isinstance(multi_obj_loss, MultiObjectiveLoss): warnings.warn( "Expected a MultiObjectiveLoss instance, got %s" % type( multi_obj_loss) ) @register_keras_serializable( package="fusionlab.nn.losses", name="objective_loss" ) @ParamsValidator( { "y_true": ["array-like:tf:transf"], "y_pred": ["array-like:tf:transf"], } ) def _loss_fn(y_true, y_pred): # If anomaly_scores is not None, we can do a length check: if anomaly_scores is not None: # Basic length check (optional) check_consistent_length(y_true, y_pred, anomaly_scores) # Actual call to multi_obj_layer return multi_obj_loss(y_true, y_pred, anomaly_scores) else: check_consistent_length(y_true, y_pred) return multi_obj_loss(y_true, y_pred) return _loss_fn
[docs] @ParamsValidator({ 'quantiles': ['array-like', None], 'anomaly_loss_weight': [Real, None] }) @ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG) def prediction_based_loss( quantiles: Optional[List[float]] = None, anomaly_loss_weight: float = 0.1 ): """ Create a combined prediction + anomaly loss function for prediction-based strategy. Parameters ---------- quantiles : list of float, optional Quantiles for quantile loss calculation. If None, uses MSE. anomaly_loss_weight : float, default=0.1 Weight for anomaly loss component. Returns ------- callable A loss function: loss_fn(y_true, y_pred) Notes ----- - Handles both quantile and MSE-based prediction losses - Anomaly loss is computed as mean tf_squared prediction errors - Compatible with Keras serialization/deserialization """ # Validate quantiles if provided if quantiles is not None: quantiles = validate_quantiles_in(quantiles) logger.debug(f"Using quantiles: {quantiles}") @register_keras_serializable( "fusionlab.nn.losses", name=f"prediction_based_loss_q{quantiles}_w{anomaly_loss_weight}" ) def _pb_loss(y_true, y_pred): # Compute prediction loss if quantiles: # Quantile loss calculation pred_loss = combined_quantile_loss(quantiles)(y_true, y_pred) else: # Standard MSE loss pred_loss = tf_reduce_mean(tf_square(y_true - y_pred)) # Compute anomaly scores from absolute errors prediction_errors = tf_abs(y_true - y_pred) # Handle quantile dimension if present if len(y_pred.shape) == 3 and quantiles: # (batch, horizon, quantiles) # Average errors across quantiles anomaly_scores = tf_reduce_mean(prediction_errors, axis=-1) else: anomaly_scores = prediction_errors # Compute anomaly loss (mean tf_squared anomaly scores) anomaly_loss = tf_reduce_mean(tf_square(anomaly_scores)) # Combine losses return pred_loss + anomaly_loss_weight * anomaly_loss return _pb_loss
@ParamsValidator({'quantiles': [Real, 'array-like']}) @ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG) def combined_quantile_loss_(quantiles: List[float]): """ Create a quantile loss function for multiple quantiles. This top-level function is decorated so that if you do: model.compile(loss=combined_quantile_loss([...])) Keras can serialize/deserialize it. Parameters ---------- quantiles : list of float List of quantiles to compute. Returns ------- callable A loss function: loss_fn(y_true, y_pred). Notes ----- - We do not decorate the returned inner function with @register_keras_serializable. This avoids double registration conflicts. """ # Validate & store quantiles quantiles = validate_quantiles_in(quantiles) @register_keras_serializable( "fusionlab.nn.losses", name="combined_quantile_loss_" ) def _cqloss(y_true, y_pred): def case_rank3(): yp = tf_expand_dims(y_pred, axis=-1) # (B, H, Q, 1) return yp def case_rank4(): return y_pred # Already has shape (B, H, Q, O) # Expand y_true so it matches y_pred's quantile dimension y_true_expanded = tf_expand_dims(y_true, axis=2) # => (B, H, 1, O) rank = tf_rank(y_pred) #XXX TODO: FIX try: if len(y_pred) == 3: # e.g. shape (B, H, Q) => expand last dim y_pred = tf_expand_dims(y_pred, axis=-1) # => (B, H, Q, 1) except: # Conditionally expand if rank == 3 y_pred = tf_cond(tf_equal(rank, 3), case_rank3, case_rank4) # Broadcast y_true_expanded to match y_pred's shape # shape => (B, H, Q, O) error = y_true_expanded - y_pred # Initialize loss loss_val = 0.0 # Accumulate pinball losses for each quantile for i, q in enumerate(quantiles): q_loss = tf_maximum(q * error[:, :, i, :], (q - 1) * error[:, :, i, :]) # Aggregate loss (mean over batch, horizons, and output_dim) loss_val += tf_reduce_mean(q_loss) # Average loss over all quantiles return loss_val / len(quantiles) return _cqloss
[docs] @register_keras_serializable("fusionlab.nn.losses", name="combined_quantile_loss") def combined_quantile_loss(quantiles): # Validate & store quantiles quantiles = validate_quantiles_in(quantiles) @register_keras_serializable( "fusionlab.nn.losses", name="combined_quantile_loss" ) def _cqloss(y_true, y_pred): # y_true original shape: (batch_size, horizon, output_dim), e.g., (20, 3, 1) # Expand y_true to (batch_size, horizon, 1, output_dim) for broadcasting y_true_expanded = tf_expand_dims(y_true, axis=2) # y_true_expanded shape: (20, 3, 1, 1) # y_pred original shape can be: # (batch, horizon, quantiles), e.g., (20, 3, 3) if output_dim is implicitly 1 # OR (batch, horizon, quantiles, output_dim), e.g., (20, 3, 3, 1) rank = tf_rank(y_pred) # case_rank3 and case_rank4 are defined in your original code: # These functions capture `y_pred` from the _cqloss arguments. def case_rank3(): # Called when y_pred is (Batch, Horizon, Quantiles) return tf_expand_dims(y_pred, axis=-1) # Returns (B, H, Q, 1) def case_rank4(): # Called when y_pred is (Batch, Horizon, Quantiles, OutputDim) return y_pred # Returns (B, H, Q, O) # --- MODIFICATION START --- # Remove the try-except block and directly use tf.cond # This ensures y_pred is reshaped correctly before the subtraction. # The result of tf.cond is assigned back to y_pred (or a new variable). y_pred_reshaped = tf_cond(tf_equal(rank, 3), true_fn=case_rank3, false_fn=case_rank4) # --- MODIFICATION END --- # Now, y_pred_reshaped should have shape (B, H, Q, O), e.g., (20, 3, 3, 1) # Broadcast y_true_expanded to match y_pred_reshaped's shape # y_true_expanded: (20, 3, 1, 1) # y_pred_reshaped: (20, 3, 3, 1) # Subtraction broadcasts along the 3rd dimension (axis=2). error = y_true_expanded - y_pred_reshaped # error shape: (20, 3, 3, 1) # Initialize loss loss_val = tf_constant(0.0, dtype=error.dtype) # Accumulate pinball losses for each quantile for i, q_float in enumerate(quantiles): # quantiles is the list of float values q = tf_cast(q_float, dtype=error.dtype) # error is (B,H,Q,O). Slicing error[:, :, i, :] gives (B,H,O) current_error_slice = error[:, :, i, :] q_loss = tf_maximum(q * current_error_slice, (q - 1) * current_error_slice) # Aggregate loss (mean over batch, horizons, and output_dim for this quantile) loss_val += tf_reduce_mean(q_loss) # Average loss over all quantiles return loss_val / tf_cast(len(quantiles), dtype=loss_val.dtype) return _cqloss
@register_keras_serializable("fusionlab.nn.losses", name="combined_quantile_loss__") def combined_quantile_loss__(quantiles): quantiles = validate_quantiles_in(quantiles) # @optional_tf_function def _cqloss(y_true, y_pred): # Ensure y_pred has shape (B, H, Q, O) y_true_exp = tf_expand_dims(y_true, axis=2) # (B, H, 1, O) # Handle y_pred that may be (B, H, Q) or (B, H, Q, O) y_pred_rank = tf_rank(y_pred) def expand_pred(): return tf_expand_dims(y_pred, axis=-1) # (B, H, Q) → (B, H, Q, 1) def identity_pred(): return y_pred y_pred = tf_cond(tf_equal(y_pred_rank, 3), expand_pred, identity_pred) # Now both y_true_exp and y_pred are (B, H, Q, O) error = y_true_exp - y_pred loss_val = 0.0 for i, q in enumerate(quantiles): q_loss = tf_maximum(q * error[:, :, i, :], (q - 1) * error[:, :, i, :]) loss_val += tf_reduce_mean(q_loss) return loss_val / len(quantiles) return _cqloss
[docs] @ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG) def combined_total_loss( quantiles: List[float], anomaly_layer: Loss, # an instance of your AnomalyLoss anomaly_scores: Tensor, ): """ Create a total loss that adds quantile loss + anomaly loss. Like above, only this top-level is decorated. The returned function is not re-decorated. Parameters ---------- quantiles : list of float Quantiles for the quantile loss part. anomaly_layer : tf.keras.losses.Loss A custom Loss or callable implementing anomaly loss. anomaly_scores : tf.Tensor or np.ndarray The anomaly scores needed for the anomaly loss. Returns ------- callable A loss function: loss_fn(y_true, y_pred) """ from .components import AnomalyLoss # Re-use the same logic from combined_quantile_loss quantile_loss_fn = combined_quantile_loss(quantiles) # validate layer anomaly_layer =validate_keras_loss( anomaly_layer, ops="validate", ) if not isinstance(anomaly_layer, AnomalyLoss): warnings.warn( "Expected a AnomalyLoss instance, got %s" % type( anomaly_layer) ) @register_keras_serializable( package="fusionlab.nn.losses", name="combined_total_loss" ) def _total_loss(y_true, y_pred): q_loss = quantile_loss_fn(y_true, y_pred) a_loss = anomaly_layer( anomaly_scores, tf_zeros_like(anomaly_scores) ) return q_loss + a_loss return _total_loss
[docs] @check_params({"q": Real}) @ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG) def quantile_loss(q): """ Quantile (Pinball) Loss Function for Quantile Regression. The ``quantile_loss`` function computes the quantile loss, also known as Pinball loss, which is used in quantile regression to predict a specific quantile of the target variable's distribution. This loss function penalizes over-predictions and under-predictions differently based on the quantile parameter, allowing the model to estimate the desired quantile. .. math:: L_q(y, \hat{y}) = \frac{1}{N} \sum_{i=1}^{N} \rho_q(y_i - \hat{y}_i) Where: - :math:`y_i` is the true value. - :math:`\hat{y}_i` is the predicted value. - :math:`\rho_q(u)` is the quantile loss function defined as: .. math:: \rho_q(u) = u \cdot (q - \mathbb{I}(u < 0)) Here, :math:`\mathbb{I}(u < 0)` is the indicator function that is 1 if :math:`u < 0` and 0 otherwise. Parameters ---------- q : float The quantile to calculate the loss for. Must be a value between 0 and 1. For example, ``q=0.1`` corresponds to the 10th percentile, ``q=0.5`` is the median, and ``q=0.9`` corresponds to the 90th percentile. Returns ------- loss : callable A loss function that can be used in Keras models. This function takes two arguments, ``y_true`` and ``y_pred``, and returns the computed quantile loss. Examples -------- >>> from fusionlab.nn.losses import quantile_loss >>> import tensorflow as tf >>> from tensorflow.keras.models import Sequential >>> from tensorflow.keras.layers import Dense >>> import numpy as np >>> >>> # Create a simple Keras model >>> model = Sequential() >>> model.add(Dense(64, input_dim=10, activation='relu')) >>> model.add(Dense(1)) >>> >>> # Compile the model with quantile loss for the 10th percentile >>> model.compile(optimizer='adam', loss=quantile_loss(q=0.1)) >>> >>> # Generate example data >>> X_train = np.random.rand(100, 10) >>> y_train = np.random.rand(100, 1) >>> >>> # Train the model >>> model.fit(X_train, y_train, epochs=10, batch_size=32) Notes ----- - **Usage in Probabilistic Forecasting**: The quantile loss function is particularly useful in probabilistic forecasting where multiple quantiles are predicted to provide a distribution of possible outcomes rather than a single point estimate. - **Handling Multiple Quantiles**: To predict multiple quantiles, you can create separate output layers for each quantile and compile the model with a list of quantile loss functions. - **Gradient Computation**: The quantile loss function is differentiable, allowing it to be used seamlessly with gradient-based optimization algorithms in Keras. - **Robustness to Outliers**: Unlike Mean Squared Error (MSE), the quantile loss function is more robust to outliers, especially when predicting lower or higher quantiles. See Also -------- tensorflow.keras.losses : A module containing built-in loss functions in Keras. sklearn.metrics.mean_pinball_loss : Computes the mean pinball loss, similar to quantile loss used here. statsmodels.regression.quantile_regression : Provides tools for quantile regression analysis. References ---------- .. [1] Koenker, R., & Bassett Jr, G. (1978). Regression quantiles. *Econometrica*, 46(1), 33-50. .. [2] Taylor, J. W., Oosterlee, C. W., & Haggerty, K. (2008). A review of quantile regression in financial time series forecasting. *Applied Financial Economics*, 18(12), 955-967. .. [3] Koenker, R. (2005). Quantile Regression. *Cambridge University Press* . """ @register_keras_serializable("fusionlab.nn.losses", name='quantile_loss') def _q_loss(y_true, y_pred): """ Compute the Quantile Loss (Pinball Loss) for a Given Batch. The loss is defined as: .. math:: L_q(y, \hat{y}) = \frac{1}{N} \sum_{i=1}^{N} \rho_q(y_i - \hat{y}_i) Where: .. math:: \rho_q(u) = u \cdot (q - \mathbb{I}(u < 0)) Parameters ---------- y_true : Tensor The ground truth values. Shape: ``(batch_size, ...)``. y_pred : Tensor The predicted values by the model. Shape: ``(batch_size, ...)``. Returns ------- loss : Tensor The quantile loss value averaged over the batch. """ error = y_true - y_pred loss = K.mean( K.maximum(q * error, (q - 1) * error), axis=-1 ) return loss return _q_loss
[docs] @check_params ({ 'quantiles': List[float]} ) @ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG) def quantile_loss_multi(quantiles=[0.1, 0.5, 0.9]): """ Multi-Quantile (Pinball) Loss Function for Quantile Regression. The ``quantile_loss_multi`` function computes the average quantile loss across multiple quantiles, allowing for the simultaneous prediction of several quantiles of the target variable's distribution. This is particularly useful in probabilistic forecasting where a range of possible outcomes is desired. .. math:: L_{\text{multi}}(Y, \hat{Y}) = \frac{1}{Q} \sum_{q \in \text{quantiles}} L_q(Y, \hat{Y}) Where: - :math:`L_q(Y, \hat{Y})` is the quantile loss for a specific quantile :math:`q`. - :math:`Q` is the total number of quantiles. Each individual quantile loss is defined as: .. math:: L_q(Y, \hat{Y}) = \frac{1}{N} \sum_{i=1}^{N} \rho_q(y_i - \hat{y}_i) And the pinball loss function :math:`\rho_q(u)` is: .. math:: \rho_q(u) = u \cdot (q - \mathbb{I}(u < 0)) Here, :math:`\mathbb{I}(u < 0)` is the indicator function that is 1 if :math:`u < 0` and 0 otherwise. Parameters ---------- quantiles : list of float, default=[0.1, 0.5, 0.9] A list of quantiles to calculate the loss for. Each value must be between 0 and 1. For example, ``quantiles=[0.1, 0.5, 0.9]`` corresponds to the 10th percentile, median, and 90th percentile respectively. Returns ------- loss : callable A loss function that can be used in Keras models. This function takes two arguments, ``y_true`` and ``y_pred``, and returns the averaged quantile loss across the specified quantiles. Examples -------- >>> from fusionlab.nn.loss import quantile_loss_multi >>> import tensorflow as tf >>> from tensorflow.keras.models import Sequential >>> from tensorflow.keras.layers import Dense >>> import numpy as np >>> >>> # Create a simple Keras model >>> model = Sequential() >>> model.add(Dense(64, input_dim=10, activation='relu')) >>> model.add(Dense(1)) >>> >>> # Compile the model with multi-quantile loss for the 10th, 50th, >>> # and 90th percentiles >>> model.compile(optimizer='adam', loss=quantile_loss_multi( ... quantiles=[0.1, 0.5, 0.9])) >>> >>> # Generate example data >>> X_train = np.random.rand(100, 10) >>> y_train = np.random.rand(100, 1) >>> >>> # Train the model >>> model.fit(X_train, y_train, epochs=10, batch_size=32) Notes ----- - **Probabilistic Forecasting**: The multi-quantile loss function is essential for probabilistic forecasting, where multiple quantiles provide a comprehensive view of the possible outcomes rather than a single point estimate. - **Model Output Configuration**: When using multiple quantiles, ensure that the model's output layer is configured to output predictions for each quantile. For example, the output layer should have a number of units equal to the number of quantiles. - **Handling Multiple Quantiles in Predictions**: The model will output a separate prediction for each quantile. It is important to interpret these predictions correctly, understanding that each represents a specific percentile of the target distribution. - **Gradient Computation**: The quantile loss function is differentiable, allowing it to be used seamlessly with gradient-based optimization algorithms in Keras. - **Robustness to Outliers**: Unlike Mean Squared Error (MSE), the quantile loss function is more robust to outliers, especially when predicting lower or higher quantiles. See Also -------- tensorflow.keras.losses : A module containing built-in loss functions in Keras. sklearn.metrics.mean_pinball_loss : Computes the mean pinball loss, similar to quantile loss used here. statsmodels.regression.quantile_regression : Provides tools for quantile regression analysis. References ---------- .. [1] Koenker, R., & Bassett Jr, G. (1978). Regression quantiles. *Econometrica*, 46(1), 33-50. .. [2] Taylor, J. W., Oosterlee, C. W., & Haggerty, K. (2008). A review of quantile regression in financial time series forecasting. *Applied Financial Economics*, 18(12), 955-967. .. [3] Koenker, R. (2005). Quantile Regression. *Cambridge University Press*. """ quantiles =validate_quantiles_in(quantiles) @register_keras_serializable("fusionlab.nn.losses", name="quantile_loss_multi") def _q_loss_multi(y_true, y_pred): """ Compute the Multi-Quantile Loss (Averaged Pinball Loss) for a Given Batch. This function calculates the quantile loss for each specified quantile and returns the average loss across all quantiles. It is suitable for models that predict multiple quantiles simultaneously. Parameters ---------- y_true : Tensor The ground truth values. Shape: ``(batch_size, ...)``. y_pred : Tensor The predicted values by the model. Shape: ``(batch_size, ...)``. Returns ------- loss : Tensor The averaged quantile loss across all specified quantiles. """ losses = [] for q in quantiles: error = y_true - y_pred loss_q = K.mean(K.tf_maximum(q * error, (q - 1) * error), axis=-1) losses.append(loss_q) # Stack the losses for each quantile and compute the mean loss_stack = K.stack(losses, axis=0) loss_mean = K.mean(loss_stack, axis=0) return loss_mean return _q_loss_multi
[docs] @ParamsValidator( { 'anomaly_scores': ['array-like:tf:transf'], 'anomaly_loss_weight': [Interval(Real, 0, None, closed ='neither')] } ) @ensure_pkg(KERAS_BACKEND or "keras", extra=DEP_MSG) def anomaly_loss(anomaly_scores, anomaly_loss_weight=1.0): """ Compute the anomaly loss based on given anomaly scores and a scaling weight. The function returns a loss function callable that can be directly used in Keras model compilation and training workflows. The anomaly loss penalizes large anomaly scores, thereby guiding the model towards producing lower values when data points are considered normal. Given anomaly scores :math:`a = [a_1, a_2, ..., a_n]`, the anomaly loss :math:`L` is defined as: .. math:: L = w \cdot \frac{1}{n} \sum_{i=1}^{n} a_i^{2} where :math:`w` is the `anomaly_loss_weight`, and :math:`n` is the number of data points. The model thus aims to reduce these anomaly scores, forcing representations or intermediate outputs to behave more normally according to its learned patterns. Parameters ---------- anomaly_scores : tf.Tensor or array-like The anomaly scores reflecting the degree of abnormality in data points. Higher values indicate more unusual points. If provided as array-like, they will be converted into a :class:`tf.Tensor` of type float32. anomaly_loss_weight : float, optional A scaling factor controlling the influence of the anomaly loss on the overall training objective. Default is ``1.0``. Increasing this value places greater emphasis on reducing anomaly scores, encouraging the model to learn representations or predictions that minimize these values. Returns ------- callable A callable loss function with signature ``loss(y_true, y_pred)`` compatible with Keras. This returned function ignores `y_true` and focuses only on `anomaly_scores`, computing the mean of the tf_squared anomaly scores and scaling by ``anomaly_loss_weight``. Examples -------- >>> from fusionlab.nn.losses import anomaly_loss >>> import tensorflow as tf >>> anomaly_scores = tf.constant([0.1, 0.5, 2.0], dtype=tf.float32) >>> loss_fn = anomaly_loss(anomaly_scores, anomaly_loss_weight=0.5) >>> y_true_dummy = tf.zeros_like(anomaly_scores) >>> y_pred_dummy = tf.zeros_like(anomaly_scores) >>> loss_value = loss_fn(y_true_dummy, y_pred_dummy) >>> print(loss_value.numpy()) 1.4166666 In this example, the anomaly loss encourages the model to reduce the given anomaly scores. Notes ----- - The `y_true` and `y_pred` parameters are included for compatibility with Keras losses but are not utilized in the anomaly loss computation. - If `anomaly_scores` is provided as array-like, it is converted to float32 for consistency. If it is already a tensor, it is cast to float32 if needed. See Also -------- :func:`tf.keras.losses.Loss` : Base class for all Keras losses. :func:`tf.tf_reduce_mean` : TensorFlow method for computing mean. :func:`tf.tf_square` : Squares tensor elements. References ---------- .. [1] Goodfellow, Ian, et al. *Deep Learning.* MIT Press, 2016. """ # if not isinstance(anomaly_scores, tf.Tensor): # anomaly_scores = tf.tf_convert_to_tensor(anomaly_scores, dtype=tf.float32) # else: # if anomaly_scores.dtype not in (tf.float16, tf.float32, tf.float64): # anomaly_scores = tf.cast(anomaly_scores, tf.float32) if anomaly_scores.shape.tf_rank is None: anomaly_scores =tf_reshape(anomaly_scores, [-1]) anomaly_loss_weight =tf_convert_to_tensor( anomaly_loss_weight, dtype=anomaly_scores.dtype ) @register_keras_serializable( "fusionlab.nn.losses", name="anomaly_loss" ) def _a_loss(y_true, y_pred): return anomaly_loss_weight * tf_reduce_mean(tf_square(anomaly_scores)) return _a_loss