Source code for interpreTS.core.features.feature_outliers_iqr
import numpy as np
import pandas as pd
[docs]
def calculate_outliers_iqr(data, training_data, epsilon=1e-6):
"""
Calculates the percentage of observations in a given window that fall below (Q1 - 1.5 * IQR)
or above (Q3 + 1.5 * IQR) using the Interquartile Range (IQR) method.
Parameters
----------
data : np.ndarray or pd.Series
The data window to analyze for outliers.
training_data : np.ndarray or pd.Series
The training data used to calculate Q1 (25th percentile), Q3 (75th percentile), and IQR.
epsilon : float, optional
A small tolerance added to bounds when training data contains a single unique value
(default is 1e-6).
Returns
-------
float
The percentage of observations in the window that are considered outliers.
Examples
--------
>>> import numpy as np
>>> training_data = np.array([10, 12, 14, 15, 16, 18, 19])
>>> data = np.array([9, 15, 20, 25])
>>> calculate_outliers_iqr(data, training_data)
0.25
"""
if isinstance(training_data, pd.Series):
training_data = training_data.values
if isinstance(data, pd.Series):
data = data.values
# Handle single-value training data
if np.all(training_data == training_data[0]):
unique_value = training_data[0]
lower_bound = unique_value - 1.5 # Adjusted for single value
upper_bound = unique_value + 1.5
else:
# Calculate Q1, Q3, and IQR from the training dataset
q1 = np.percentile(training_data, 25)
q3 = np.percentile(training_data, 75)
iqr = q3 - q1
# Handle the case of zero IQR
if iqr == 0:
return 0.0
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
# Count the number of outliers in the window
outliers = np.sum((data < lower_bound) | (data > upper_bound))
return outliers / len(data)