Source code for interpreTS.core.features.feature_outliers_iqr

import numpy as np
import pandas as pd

[docs] def calculate_outliers_iqr(data, training_data, epsilon=1e-6): """ Calculates the percentage of observations in a given window that fall below (Q1 - 1.5 * IQR) or above (Q3 + 1.5 * IQR) using the Interquartile Range (IQR) method. Parameters ---------- data : np.ndarray or pd.Series The data window to analyze for outliers. training_data : np.ndarray or pd.Series The training data used to calculate Q1 (25th percentile), Q3 (75th percentile), and IQR. epsilon : float, optional A small tolerance added to bounds when training data contains a single unique value (default is 1e-6). Returns ------- float The percentage of observations in the window that are considered outliers. Examples -------- >>> import numpy as np >>> training_data = np.array([10, 12, 14, 15, 16, 18, 19]) >>> data = np.array([9, 15, 20, 25]) >>> calculate_outliers_iqr(data, training_data) 0.25 """ if isinstance(training_data, pd.Series): training_data = training_data.values if isinstance(data, pd.Series): data = data.values # Handle single-value training data if np.all(training_data == training_data[0]): unique_value = training_data[0] lower_bound = unique_value - 1.5 # Adjusted for single value upper_bound = unique_value + 1.5 else: # Calculate Q1, Q3, and IQR from the training dataset q1 = np.percentile(training_data, 25) q3 = np.percentile(training_data, 75) iqr = q3 - q1 # Handle the case of zero IQR if iqr == 0: return 0.0 lower_bound = q1 - 1.5 * iqr upper_bound = q3 + 1.5 * iqr # Count the number of outliers in the window outliers = np.sum((data < lower_bound) | (data > upper_bound)) return outliers / len(data)