Source code for interpreTS.core.features.feature_linearity

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


[docs]
def calculate_linearity(data, normalize=True, use_derivative=True):
    """
    Calculate the linearity of a time series, similar to tsflex or sktime implementations.

    Parameters
    ----------
    data : pd.Series or np.ndarray
        The time series data for which the linearity is to be calculated.
    normalize : bool, optional
        Whether to normalize the data before calculating linearity (default is True).
    use_derivative : bool, optional
        Whether to calculate linearity on the first derivative of the data (default is True).

    Returns
    -------
    float
        The R-squared value representing the linearity of the time series.
        A value closer to 1 indicates higher linearity.

    Raises
    ------
    TypeError
        If the data is not a valid time series type or contains non-numeric values.
    ValueError
        If the data is empty or contains insufficient unique points for regression.

    Examples
    --------
    >>> import pandas as pd
    >>> data = pd.Series([1, 2, 3, 4, 5])
    >>> calculate_linearity(data)
    1.0
    >>> data = pd.Series([1, 2, 1, 2, 1, 2, 1, 2, 1, 2])
    >>> calculate_linearity(data)
    0.0
    """
    if isinstance(data, np.ndarray):
        data = pd.Series(data)

    if normalize:
        data = (data - data.mean()) / data.std()

    if use_derivative:
        derivative_data = data.diff().dropna()
        if len(np.unique(derivative_data)) <= 1:  
            return 1.0 if len(np.unique(data)) > 1 else 0.0 
        data = derivative_data

    if len(data) < 2 or len(np.unique(data)) < 2:
        return 0.0

    x = np.arange(len(data)).reshape(-1, 1)
    y = data.values


    model = LinearRegression()
    model.fit(x, y)
    r_squared = model.score(x, y)

    return r_squared