Source code for dipy.reconst.weights_method

#!/usr/bin/python
"""Functions for defining weights for iterative fitting methods."""

import numpy as np

MIN_POSITIVE_SIGNAL = 0.0001



[docs]
def simple_cutoff(
    residuals, log_residuals, pred_sig, design_matrix, leverages, C, cutoff
):
    """Define outliers based on the signal (rather than the log-signal).

    Parameters
    ----------
    residuals : ndarray
        Residuals of the signal (observed signal - fitted signal).
    log_residuals : ndarray
        Residuals of the log signal (log observed signal - fitted log signal).
    pred_sig : ndarray
        The predicted signal, given a previous fit.
    design_matrix : ndarray (g, ...)
        Design matrix holding the covariants used to solve for the
        regression coefficients.
    leverages : ndarray
        The leverages (diagonal of the 'hat matrix') of the fit.
    C : float
        Estimate of the standard deviation of the error.
    cutoff : float, optional
        Cut-off value for defining outliers based on fitting residuals.
        Here the condition is::

            |residuals| > cut_off x C x HAT_factor

        where HAT_factor = sqrt(1 - leverages) adjusts for leverage effects.

    """
    leverages[np.isclose(leverages, 1.0)] = 0.99  # avoids rare issues
    HAT_factor = np.sqrt(1 - leverages)
    cond = np.abs(residuals) > +cutoff * C * HAT_factor
    return cond




[docs]
def two_eyes_cutoff(
    residuals, log_residuals, pred_sig, design_matrix, leverages, C, cutoff
):
    """Define outliers with two-eyes approach.

    see :footcite:p:`Collier2015` for more details.

    Parameters
    ----------
    residuals : ndarray
        Residuals of the signal (observed signal - fitted signal).
    log_residuals : ndarray
        Residuals of the log signal (log observed signal - fitted log signal).
    pred_sig : ndarray
        The predicted signal, given a previous fit.
    design_matrix : ndarray (g, ...)
        Design matrix holding the covariants used to solve for the
        regression coefficients.
    leverages : ndarray
        The leverages (diagonal of the 'hat matrix') of the fit.
    C : float
        Estimate of the standard deviation of the error.
    cutoff : float, optional
        Cut-off value for defining outliers based on fitting residuals,
        see :footcite:p:`Collier2015` for the two-eyes approached used here.

    References
    ----------
    .. footbibliography::

    """
    leverages[np.isclose(leverages, 1.0)] = 0.99  # avoids rare issues
    HAT_factor = np.sqrt(1 - leverages)
    cond = (residuals > +cutoff * C * HAT_factor) | (
        log_residuals < -cutoff * C * HAT_factor / pred_sig
    )
    return cond




[docs]
def weights_method_wls_m_est(
    data,
    pred_sig,
    design_matrix,
    leverages,
    idx,
    total_idx,
    last_robust,
    *,
    m_est="gm",
    cutoff=3,
    outlier_condition_func=simple_cutoff,
):
    """Calculate M-estimator weights for WLS model.

    Parameters
    ----------
    data : ndarray
        The measured signal.
    pred_sig : ndarray
        The predicted signal, given a previous fit.
        Has the same shape as data.
    design_matrix : ndarray (g, ...)
        Design matrix holding the covariants used to solve for the
        regression coefficients.
    leverages : ndarray
        The leverages (diagonal of the 'hat matrix') of the fit.
    idx : int
        The current iteration number.
    total_idx : int
        The total number of iterations.
    last_robust : ndarray
        True for inlier indices and False for outlier indices. Must have the
        same shape as data.
    m_est : str, optional.
        M-estimator weighting scheme to use. Currently,
        'gm' (Geman-McClure) and 'cauchy' are provided.
    cutoff : float, optional
        Cut-off value for defining outliers based on fitting residuals.
        Will be passed to the outlier_condition_func.
        Typical example: ``|residuals| > cut_off x standard_deviation``
    outlier_condition_func : callable, optional
        A function with args and returns as follows::

            is_an_outlier = outlier_condition_func(residuals, log_residuals,
                pred_sig, design_matrix, leverages, C, cutoff)

    Notes
    -----
    Robust weights are calculated specifically for the WLS problem, i.e. the
    usual form of the WLS problem is accounted for when defining these new
    weights, see :footcite:p:`Collier2015`. On the second-to-last iteration,
    OLS is performed without outliers. On the last iteration, WLS is performed
    without outliers.

    References
    ----------
    .. footbibliography::

    """
    # check if M-estimator is valid (defined in this function)
    if m_est not in ["gm", "cauchy"]:
        raise ValueError("unknown M-estimator choice")

    # min 4 iters: (1) WLS (2) WLS with M-weights (3) clean OLS (3) clean WLS
    if total_idx < 4:
        raise ValueError("require 4+ iterations")

    p, N = design_matrix.shape[-1], data.shape[-1]
    if N <= p:
        raise ValueError("Fewer data points than parameters.")
    factor = 1.4826 * np.sqrt(N / (N - p))

    # handle potential zeros
    pred_sig[pred_sig < MIN_POSITIVE_SIGNAL] = MIN_POSITIVE_SIGNAL

    # calculate quantities needed for C and w
    log_pred_sig = np.log(pred_sig)
    residuals = data - pred_sig
    log_data = np.log(data)
    log_residuals = log_data - log_pred_sig
    z = pred_sig * log_residuals

    # IRLLS paper eq9 corrected (weights for log_residuals^2 are pred_sig^2)
    C = (
        factor
        * np.median(np.abs(z - np.median(z, axis=-1)[..., None]), axis=-1)[..., None]
    )
    C[C == 0] = np.nanmedian(C)  # C could be 0, if all signals = min_signal

    # NOTE: if more M-estimators are added, please update the docs!
    if m_est == "gm":
        w = (C / pred_sig) ** 2 / ((C / pred_sig) ** 2 + log_residuals**2) ** 2
    if m_est == "cauchy":
        w = C**2 / ((C / pred_sig) ** 2 + log_residuals**2)

    robust = None

    if idx == total_idx - 1:  # OLS without outliers
        cond = outlier_condition_func(
            residuals, log_residuals, pred_sig, design_matrix, leverages, C, cutoff
        )
        robust = np.logical_not(cond)

        w[~robust] = 0.0
        w[robust] = 1.0

    if idx == total_idx:  # WLS without outliers
        robust = last_robust
        w[~robust] = 0.0
        w[robust] = pred_sig[robust == 1] ** 2

    w[np.isinf(w)] = 0
    w[np.isnan(w)] = 0

    return w, robust




[docs]
def weights_method_nlls_m_est(
    data,
    pred_sig,
    design_matrix,
    leverages,
    idx,
    total_idx,
    last_robust,
    *,
    m_est="gm",
    cutoff=3,
    outlier_condition_func=simple_cutoff,
):
    """Calculate M-estimator weights for NLLS model.

    Parameters
    ----------
    data : ndarray
        The measured signal.
    pred_sig : ndarray
        The predicted signal, given a previous fit.
        Has the same shape as data.
    design_matrix : ndarray (g, ...)
        Design matrix holding the covariants used to solve for the
        regression coefficients.
    leverages : ndarray
        The leverages (diagonal of the 'hat matrix') of the fit.
    idx : int
        The current iteration number.
    total_idx : int
        The total number of iterations.
    last_robust : ndarray
        True for inlier indices and False for outlier indices. Must have the
        same shape as data.
    m_est : str, optional.
        M-estimator weighting scheme to use. Currently,
        'gm' (Geman-McClure) and 'cauchy' are provided.
    cutoff : float, optional
        Cut-off value for defining outliers based on fitting residuals.
        Will be passed to the outlier_condition_func.
        Typical example: ``|residuals| > cut_off x standard_deviation``
    outlier_condition_func : callable, optional
        A function with args and returns as follows::

            is_an_outlier = outlier_condition_func(residuals, log_residuals,
                pred_sig, design_matrix, leverages, C, cutoff)

    Notes
    -----
    Robust weights are calculated specifically for the NLLS problem.
    On the last iteration, NLLS is performed without outliers.

    """
    # check if M-estimator is valid (defined in this function)
    if m_est not in ["gm", "cauchy"]:
        raise ValueError("unknown M-estimator choice")

    # min 3 iters: (1) NLLS (2) NLLS with M-weights (3) clean NLLS
    if total_idx < 3:
        raise ValueError("require 3+ iterations")

    p, N = design_matrix.shape[-1], data.shape[-1]
    if N <= p:
        raise ValueError("Fewer data points than parameters.")
    factor = 1.4826 * np.sqrt(N / (N - p))

    # handle potential zeros
    pred_sig[pred_sig < MIN_POSITIVE_SIGNAL] = MIN_POSITIVE_SIGNAL

    # calculate quantities needed for C and w
    log_pred_sig = np.log(pred_sig)
    residuals = data - pred_sig
    log_data = np.log(data)
    log_residuals = log_data - log_pred_sig

    C = (
        factor
        * np.median(np.abs(residuals - np.median(residuals)[..., None]), axis=-1)[
            ..., None
        ]
    )
    C[C == 0] = np.nanmedian(C)  # C could be 0, if all signals = min_signal

    # NOTE: if more M-estimators are added, please update the docs!
    if m_est == "gm":
        w = C**2 / (C**2 + residuals**2) ** 2
    if m_est == "cauchy":
        w = C**2 / (C**2 + residuals**2)

    robust = None

    if idx == total_idx:
        cond = outlier_condition_func(
            residuals, log_residuals, pred_sig, design_matrix, leverages, C, cutoff
        )
        robust = np.logical_not(cond)

        w[~robust] = 0.0
        w[robust] = 1.0

    w[np.isinf(w)] = 0
    w[np.isnan(w)] = 0

    return w, robust