Source code for interpreTS.utils.data_manager

from .feature_loader import Features
from ..core.features.feature_spikeness import calculate_spikeness
from ..core.features.feature_entropy import calculate_entropy
from ..core.features.feature_stability import calculate_stability
from ..core.features.feature_length import calculate_length
from ..core.features.feature_mean import calculate_mean
from ..core.features.feature_seasonality_strength import calculate_seasonality_strength
from ..core.features.feature_variance import calculate_variance
from ..core.features.feature_peak import calculate_peak
from ..core.features.feature_trough import calculate_trough
from ..core.features.feature_heterogeneity import calculate_heterogeneity
from ..core.features.feature_absolute_energy import calculate_absolute_energy
from ..core.features.feature_missing_points import calculate_missing_points
from ..core.features.feature_distance_to_the_last_change_point import calculate_distance_to_last_trend_change
from ..core.features.feature_above_9th_decile import calculate_above_9th_decile
from ..core.features.feature_below_1st_decile import calculate_below_1st_decile
from ..core.features.feature_binarize_mean import calculate_binarize_mean
from ..core.features.feature_crossing_points import calculate_crossing_points
from ..core.features.feature_flat_spots import calculate_flat_spots
from ..core.features.feature_outliers_iqr import calculate_outliers_iqr
from ..core.features.feature_outliers_std import calculate_outliers_std
from ..core.features.feature_std_1st_der import calculate_std_1st_der
from ..core.features.feature_histogram_dominant import calculate_dominant
from ..core.features.feature_mean_change import calculate_mean_change
from ..core.features.feature_trend_strength import calculate_trend_strength
from ..core.features.feature_significant_changes import calculate_significant_changes
from ..core.features.feature_variability_in_sub_periods import calculate_variability_in_sub_periods
from ..core.features.feature_variance_change import calculate_change_in_variance
from ..core.features.feature_linearity import calculate_linearity

[docs] def load_metadata(): return { Features.LENGTH: { 'level': 'easy', 'description': 'Number of points in the window.' }, Features.MEAN: { 'level': 'easy', 'description': 'Mean value within the window.' }, Features.VARIANCE: { 'level': 'moderate', 'description': 'Variance of the signal within the window.' }, Features.ENTROPY: { 'level': 'advanced', 'description': 'Degree of randomness or disorder in the window.' }, Features.SPIKENESS: { 'level': 'moderate', 'description': 'Measure of sudden jumps or spikes in the signal.' }, Features.SEASONALITY_STRENGTH: { 'level': 'advanced', 'description': 'Strength of seasonal patterns within the signal.' }, Features.STABILITY: { 'level': 'moderate', 'description': 'Measure of consistency in the signal values.' }, Features.PEAK: { 'level': 'easy', 'description': 'The maximum value in the window.' }, Features.TROUGH: { 'level': 'easy', 'description': 'The minimum value in the window.' }, Features.DISTANCE_TO_LAST_TREND_CHANGE: { 'level': 'moderate', 'description': 'Distance (in terms of indices) to the last detected trend change in the window.' }, Features.ABSOLUTE_ENERGY: { 'level': 'moderate', 'description': 'Total energy of the signal in the window.' }, Features.ABOVE_9TH_DECILE: { 'level': 'moderate', 'description': 'Fraction of values in the window above the 9th decile of the training data, representing the presence of extreme high values.' }, Features.BELOW_1ST_DECILE: { 'level': 'moderate', 'description': 'Fraction of values in the window below the 1st decile of the training data, representing the presence of extreme low values.' }, Features.BINARIZE_MEAN: { 'level': 'moderate', 'description': 'Binary value indicating whether the signal mean exceeds a threshold.' }, Features.CROSSING_POINTS: { 'level': 'easy', 'description': 'Number of times the signal crosses its mean.' }, Features.FLAT_SPOTS: { 'level': 'easy', 'description': 'Number of segments with constant values in the signal.' }, Features.HETEROGENEITY: { 'level': 'moderate', 'description': 'Coefficient of variation, representing the ratio of standard deviation to mean, indicating the relative variability in the time series.' }, Features.OUTLIERS_IQR: { 'level': 'moderate', 'description': 'Percentage of values in the window that are classified as outliers based on the Interquartile Range (IQR) method.' }, Features.OUTLIERS_STD: { 'level': 'moderate', 'description': 'Percentage of values in the window that are more than 3 standard deviations away from the mean, indicating extreme deviations.' }, Features.STD_1ST_DER: { 'level': 'moderate', 'description': 'Standard deviation of the first derivative of the signal.' }, Features.DOMINANT: { 'level': 'moderate', 'description': 'The dominant value of the time series histogram, representing the most frequent range of values within the specified bins.' }, Features.MEAN_CHANGE: { 'level': 'moderate', 'description': 'The rate of change in the rolling mean over time, capturing trends or shifts in the time series.' }, Features.TREND_STRENGTH: { 'level': 'moderate', 'description': 'The R-squared value from a linear regression, representing the strength and consistency of the trend in the time series.' }, Features.SIGNIFICANT_CHANGES: { 'level': 'moderate', 'description': 'Proportion of significant increases or decreases in the time series, based on deviations from the interquartile range (IQR) of differences between consecutive values.' }, Features.MISSING_POINTS: { 'level': 'easy', 'description': 'Proportion or count of missing data points in the window.' }, Features.VARIABILITY_IN_SUB_PERIODS: { 'level': 'moderate', 'description': 'Variance calculated within sub-periods of a time series, providing a measure of variability across fixed-size windows.' }, Features.CHANGE_IN_VARIANCE: { 'level': 'moderate', 'description': 'Change in variance over time, calculated as the difference between rolling variances across consecutive windows.' }, Features.LINEARITY:{ 'level': 'moderate', 'description': 'Measure of how well the time series can be approximated by a linear trend, quantified using the R-squared value from linear regression.' } }
[docs] def generate_feature_descriptions(self, extracted_features): """ Generate textual descriptions for extracted features. Parameters ---------- extracted_features : dict A dictionary where keys are feature names and values are their calculated values. Returns ------- dict A dictionary where keys are feature names and values are textual descriptions. """ descriptions = {} feature_metadata = self.load_metadata() for feature_name, feature_value in extracted_features.items(): if feature_name in feature_metadata: metadata = self.feature_metadata[feature_name] description = metadata['description'] descriptions[feature_name] = ( f"Feature '{feature_name}': {description} Value: {feature_value}." ) else: descriptions[feature_name] = ( f"Unknown feature: '{feature_name}'. Value: {feature_value}." ) return descriptions
[docs] def load_feature_functions(): return { Features.LENGTH: calculate_length, Features.MEAN: calculate_mean, Features.VARIANCE: calculate_variance, Features.SPIKENESS: calculate_spikeness, Features.ENTROPY: calculate_entropy, Features.STABILITY: calculate_stability, Features.SEASONALITY_STRENGTH: calculate_seasonality_strength, Features.PEAK: calculate_peak, Features.TROUGH: calculate_trough, Features.DISTANCE_TO_LAST_TREND_CHANGE: calculate_distance_to_last_trend_change, Features.HETEROGENEITY: calculate_heterogeneity, Features.ABSOLUTE_ENERGY: calculate_absolute_energy, Features.MISSING_POINTS: calculate_missing_points, Features.ABOVE_9TH_DECILE: calculate_above_9th_decile, Features.BELOW_1ST_DECILE: calculate_below_1st_decile, Features.BINARIZE_MEAN: calculate_binarize_mean, Features.CROSSING_POINTS: calculate_crossing_points, Features.FLAT_SPOTS: calculate_flat_spots, Features.OUTLIERS_IQR: calculate_outliers_iqr, Features.OUTLIERS_STD: calculate_outliers_std, Features.STD_1ST_DER: calculate_std_1st_der, Features.DOMINANT: calculate_dominant, Features.MEAN_CHANGE: calculate_mean_change, Features.TREND_STRENGTH: calculate_trend_strength, Features.SIGNIFICANT_CHANGES: calculate_significant_changes, Features.VARIABILITY_IN_SUB_PERIODS: calculate_variability_in_sub_periods, Features.CHANGE_IN_VARIANCE: calculate_change_in_variance, Features.LINEARITY: calculate_linearity }
[docs] def load_validation_requirements(): return { Features.LINEARITY: { "require_datetime_index": False, "allow_nan": True, "check_one_dimensional": True, }, Features.MEAN: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, }, Features.VARIANCE: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True }, Features.SPIKENESS: { "require_datetime_index": False, "allow_nan": True, "check_one_dimensional": True }, Features.ENTROPY: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 2 }, Features.STABILITY: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 2 }, Features.SEASONALITY_STRENGTH: { "require_datetime_index": False, "allow_nan": True, "check_one_dimensional": True, "min_length": 2, "validate_positive_parameters": {"period": "Period must be a positive integer."} }, Features.PEAK: { "require_datetime_index": False, "allow_nan": True, "check_one_dimensional": True, "min_length": 1 }, Features.TROUGH: { "require_datetime_index": False, "allow_nan": True, "check_one_dimensional": True, "min_length": 1 }, Features.DISTANCE_TO_LAST_TREND_CHANGE: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": "window_size + 1" }, Features.HETEROGENEITY: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1, "check_nonzero_mean": True }, Features.ABSOLUTE_ENERGY: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1 }, Features.MISSING_POINTS: { "require_datetime_index": False, "allow_nan": True, "check_one_dimensional": True, "min_length": 1 }, Features.ABOVE_9TH_DECILE: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1, "additional_requirements": { "training_data": { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1 } } }, Features.BELOW_1ST_DECILE: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1, "additional_requirements": { "training_data": { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1 } } }, Features.BINARIZE_MEAN: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1 }, Features.CROSSING_POINTS: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1 }, Features.FLAT_SPOTS: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1 }, Features.OUTLIERS_IQR: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1, "additional_requirements": { "training_data_not_empty": True, "training_data_no_nan": True } }, Features.OUTLIERS_STD: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1, "additional_requirements": { "training_data_not_empty": True, "training_data_no_nan": True } }, Features.STD_1ST_DER: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1 }, Features.DOMINANT: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 1 }, Features.MEAN_CHANGE: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 2, "positive_integer_parameters": ["window_size"] }, Features.TREND_STRENGTH: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 2 }, Features.SIGNIFICANT_CHANGES: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 2 }, Features.VARIABILITY_IN_SUB_PERIODS: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": "window_size" }, Features.CHANGE_IN_VARIANCE: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": "window_size", "positive_integer_params": ["window_size"] }, Features.LINEARITY: { "require_datetime_index": False, "allow_nan": False, "check_one_dimensional": True, "min_length": 2 } }