Source code for interpreTS.core.features.feature_variability_in_sub_periods
import pandas as pd
import numpy as np
[docs]
def calculate_variability_in_sub_periods(data, window_size, step_size=None, ddof=0):
"""
Calculate the variance within sub-periods of a time series, providing a measure of variability.
Parameters
----------
data : pd.Series or np.ndarray
The time series data for which the variability is to be calculated.
window_size : int
The size of each sub-period window (number of points in each window).
step_size : int, optional
The step size between sub-periods. If None, it defaults to window_size (non-overlapping windows).
ddof : int, optional
The degrees of freedom to use when calculating variance within each sub-period. Default is 0 (population variance).
Returns
-------
pd.Series
A series of variance values representing the variability in each sub-period.
Raises
------
TypeError
If the data is not a valid time series type.
ValueError
If the data contains NaN values or if window_size is larger than the data length.
Examples
--------
>>> import pandas as pd
>>> data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
>>> calculate_variability_in_sub_periods(data, window_size=5)
0 2.5
1 2.5
dtype: float64
"""
# Convert numpy array to pandas Series if necessary
if isinstance(data, np.ndarray):
data = pd.Series(data)
if step_size is None:
step_size = window_size # Default to non-overlapping windows
# Initialize a list to hold variability values
variability_measures = []
# Calculate variance for each sub-period
for start in range(0, len(data) - window_size + 1, step_size):
sub_period = data[start:start + window_size]
# Append the calculated variance for the sub-period
variability_measures.append(np.var(sub_period, ddof=ddof))
# Return as a Pandas Series for easier handling of the result
return pd.Series(variability_measures)