Source code for interpreTS.core.features.feature_outliers_std

import numpy as np
import pandas as pd

[docs] def calculate_outliers_std(data, training_data): """ Calculates the percentage of observations in a window that are above or below 3 standard deviations from the mean, based on the training dataset. Parameters ---------- data : np.ndarray or pd.Series Window data to analyze. training_data : np.ndarray or pd.Series Training data used to calculate the mean and standard deviation. Returns ------- float Percentage of observations in the window that deviate by more than 3 standard deviations. Examples -------- >>> import numpy as np >>> import pandas as pd >>> training_data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> data = pd.Series([0, 10, 2, 3, 15]) >>> calculate_outliers_std(data, training_data) 0.2 """ # Convert to numpy arrays for consistency if isinstance(data, pd.Series): data = data.values if isinstance(training_data, pd.Series): training_data = training_data.values # Calculate mean and standard deviation from training data mean_value = np.mean(training_data) std_dev = np.std(training_data) # Handle case where std_dev is 0 if std_dev == 0: outliers = np.sum(data != mean_value) # Count values not equal to the mean return outliers / len(data) # Define bounds for outliers (3 standard deviations from the mean) lower_bound = mean_value - 3 * std_dev upper_bound = mean_value + 3 * std_dev # Count observations outside the bounds outliers = np.sum((data < lower_bound) | (data > upper_bound)) return outliers / len(data)