Source code for interpreTS.core.features.feature_variability_in_sub_periods

import pandas as pd
import numpy as np


[docs]
def calculate_variability_in_sub_periods(data, window_size, step_size=None, ddof=0):
    """
    Calculate the variance within sub-periods of a time series, providing a measure of variability.
    
    Parameters
    ----------
    data : pd.Series or np.ndarray
        The time series data for which the variability is to be calculated.
    window_size : int
        The size of each sub-period window (number of points in each window).
    step_size : int, optional
        The step size between sub-periods. If None, it defaults to window_size (non-overlapping windows).
    ddof : int, optional
        The degrees of freedom to use when calculating variance within each sub-period. Default is 0 (population variance).
        
    Returns
    -------
    pd.Series
        A series of variance values representing the variability in each sub-period.
        
    Raises
    ------
    TypeError
        If the data is not a valid time series type.
    ValueError
        If the data contains NaN values or if window_size is larger than the data length.
        
    Examples
    --------
    >>> import pandas as pd
    >>> data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    >>> calculate_variability_in_sub_periods(data, window_size=5)
    0    2.5
    1    2.5
    dtype: float64
    """
    # Convert numpy array to pandas Series if necessary
    if isinstance(data, np.ndarray):
        data = pd.Series(data)
    
    if step_size is None:
        step_size = window_size  # Default to non-overlapping windows

    # Initialize a list to hold variability values
    variability_measures = []

    # Calculate variance for each sub-period
    for start in range(0, len(data) - window_size + 1, step_size):
        sub_period = data[start:start + window_size]
        
        # Append the calculated variance for the sub-period
        variability_measures.append(np.var(sub_period, ddof=ddof))
    
    # Return as a Pandas Series for easier handling of the result
    return pd.Series(variability_measures)