Source code for interpreTS.utils.data_manager

from .feature_loader import Features
from ..core.features.feature_spikeness import calculate_spikeness
from ..core.features.feature_entropy import calculate_entropy
from ..core.features.feature_stability import calculate_stability
from ..core.features.feature_length import calculate_length
from ..core.features.feature_mean import calculate_mean
from ..core.features.feature_seasonality_strength import calculate_seasonality_strength
from ..core.features.feature_variance import calculate_variance
from ..core.features.feature_peak import calculate_peak
from ..core.features.feature_trough import calculate_trough
from ..core.features.feature_heterogeneity import calculate_heterogeneity
from ..core.features.feature_absolute_energy import calculate_absolute_energy
from ..core.features.feature_missing_points import calculate_missing_points
from ..core.features.feature_distance_to_the_last_change_point import calculate_distance_to_last_trend_change
from ..core.features.feature_above_9th_decile import calculate_above_9th_decile
from ..core.features.feature_below_1st_decile import calculate_below_1st_decile
from ..core.features.feature_binarize_mean import calculate_binarize_mean
from ..core.features.feature_crossing_points import calculate_crossing_points
from ..core.features.feature_flat_spots import calculate_flat_spots
from ..core.features.feature_outliers_iqr import calculate_outliers_iqr
from ..core.features.feature_outliers_std import calculate_outliers_std
from ..core.features.feature_std_1st_der import calculate_std_1st_der
from ..core.features.feature_histogram_dominant import calculate_dominant
from ..core.features.feature_mean_change import calculate_mean_change
from ..core.features.feature_trend_strength import calculate_trend_strength
from ..core.features.feature_significant_changes import calculate_significant_changes
from ..core.features.feature_variability_in_sub_periods import calculate_variability_in_sub_periods
from ..core.features.feature_variance_change import calculate_change_in_variance
from ..core.features.feature_linearity import calculate_linearity


[docs]
def load_metadata():
    return {
        Features.LENGTH: {
            'level': 'easy',
            'description': 'Number of points in the window.'
        },
        Features.MEAN: {
            'level': 'easy',
            'description': 'Mean value within the window.'
        },
        Features.VARIANCE: {
            'level': 'moderate',
            'description': 'Variance of the signal within the window.'
        },
        Features.ENTROPY: {
            'level': 'advanced',
            'description': 'Degree of randomness or disorder in the window.'
        },
        Features.SPIKENESS: {
            'level': 'moderate',
            'description': 'Measure of sudden jumps or spikes in the signal.'
        },
        Features.SEASONALITY_STRENGTH: {
            'level': 'advanced',
            'description': 'Strength of seasonal patterns within the signal.'
        },
        Features.STABILITY: {
            'level': 'moderate',
            'description': 'Measure of consistency in the signal values.'
        },
        Features.PEAK: {
                'level': 'easy',
                'description': 'The maximum value in the window.'
        },
        Features.TROUGH: {
                'level': 'easy',
                'description': 'The minimum value in the window.'
        },
        Features.DISTANCE_TO_LAST_TREND_CHANGE: {
            'level': 'moderate',
            'description': 'Distance (in terms of indices) to the last detected trend change in the window.'
        },
        Features.ABSOLUTE_ENERGY: {
                'level': 'moderate',
                'description': 'Total energy of the signal in the window.'
            },
        Features.ABOVE_9TH_DECILE: {
            'level': 'moderate',
            'description': 'Fraction of values in the window above the 9th decile of the training data, representing the presence of extreme high values.'
        },
        Features.BELOW_1ST_DECILE: {
            'level': 'moderate',
            'description': 'Fraction of values in the window below the 1st decile of the training data, representing the presence of extreme low values.'
        },
        Features.BINARIZE_MEAN: {
            'level': 'moderate',
            'description': 'Binary value indicating whether the signal mean exceeds a threshold.'
        },
        Features.CROSSING_POINTS: {
            'level': 'easy',
            'description': 'Number of times the signal crosses its mean.'
        },
        Features.FLAT_SPOTS: {
            'level': 'easy',
            'description': 'Number of segments with constant values in the signal.'
        },
        Features.HETEROGENEITY: {
            'level': 'moderate',
            'description': 'Coefficient of variation, representing the ratio of standard deviation to mean, indicating the relative variability in the time series.'
        },
        Features.OUTLIERS_IQR: {
            'level': 'moderate',
            'description': 'Percentage of values in the window that are classified as outliers based on the Interquartile Range (IQR) method.'
        },
        Features.OUTLIERS_STD: {
            'level': 'moderate',
            'description': 'Percentage of values in the window that are more than 3 standard deviations away from the mean, indicating extreme deviations.'
        },
        Features.STD_1ST_DER: {
            'level': 'moderate',
            'description': 'Standard deviation of the first derivative of the signal.'
        },
        Features.DOMINANT: {
            'level': 'moderate',
            'description': 'The dominant value of the time series histogram, representing the most frequent range of values within the specified bins.'
        },
        Features.MEAN_CHANGE: {
            'level': 'moderate',
            'description': 'The rate of change in the rolling mean over time, capturing trends or shifts in the time series.'
        },
        Features.TREND_STRENGTH: {
            'level': 'moderate',
            'description': 'The R-squared value from a linear regression, representing the strength and consistency of the trend in the time series.'
        },
        Features.SIGNIFICANT_CHANGES: {
            'level': 'moderate',
            'description': 'Proportion of significant increases or decreases in the time series, based on deviations from the interquartile range (IQR) of differences between consecutive values.'
        },
        Features.MISSING_POINTS: {
            'level': 'easy',
            'description': 'Proportion or count of missing data points in the window.'
        },
        Features.VARIABILITY_IN_SUB_PERIODS: {
            'level': 'moderate',
            'description': 'Variance calculated within sub-periods of a time series, providing a measure of variability across fixed-size windows.'
        },
        Features.CHANGE_IN_VARIANCE: {
            'level': 'moderate',
            'description': 'Change in variance over time, calculated as the difference between rolling variances across consecutive windows.'
        },
        Features.LINEARITY:{
            'level': 'moderate',
            'description': 'Measure of how well the time series can be approximated by a linear trend, quantified using the R-squared value from linear regression.'
        }
    } 



[docs]
def generate_feature_descriptions(self, extracted_features):
    """
    Generate textual descriptions for extracted features.

    Parameters
    ----------
    extracted_features : dict
        A dictionary where keys are feature names and values are their calculated values.

    Returns
    -------
    dict
        A dictionary where keys are feature names and values are textual descriptions.
    """
    descriptions = {}
    feature_metadata = self.load_metadata()
    for feature_name, feature_value in extracted_features.items():
        if feature_name in feature_metadata:
            metadata = self.feature_metadata[feature_name]
            description = metadata['description']
            descriptions[feature_name] = (
                f"Feature '{feature_name}': {description} Value: {feature_value}."
            )
        else:
            descriptions[feature_name] = (
                f"Unknown feature: '{feature_name}'. Value: {feature_value}."
            )
    return descriptions



[docs]
def load_feature_functions():
    return {
            Features.LENGTH: calculate_length,
            Features.MEAN: calculate_mean,
            Features.VARIANCE: calculate_variance,
            Features.SPIKENESS: calculate_spikeness,
            Features.ENTROPY: calculate_entropy,
            Features.STABILITY: calculate_stability,
            Features.SEASONALITY_STRENGTH: calculate_seasonality_strength,
            Features.PEAK: calculate_peak,
            Features.TROUGH: calculate_trough,
            Features.DISTANCE_TO_LAST_TREND_CHANGE: calculate_distance_to_last_trend_change,
            Features.HETEROGENEITY: calculate_heterogeneity,
            Features.ABSOLUTE_ENERGY: calculate_absolute_energy,
            Features.MISSING_POINTS: calculate_missing_points,
            Features.ABOVE_9TH_DECILE: calculate_above_9th_decile,
            Features.BELOW_1ST_DECILE: calculate_below_1st_decile,
            Features.BINARIZE_MEAN: calculate_binarize_mean,
            Features.CROSSING_POINTS: calculate_crossing_points,
            Features.FLAT_SPOTS: calculate_flat_spots,
            Features.OUTLIERS_IQR: calculate_outliers_iqr,
            Features.OUTLIERS_STD: calculate_outliers_std,
            Features.STD_1ST_DER: calculate_std_1st_der,
            Features.DOMINANT: calculate_dominant,
            Features.MEAN_CHANGE: calculate_mean_change,
            Features.TREND_STRENGTH: calculate_trend_strength,
            Features.SIGNIFICANT_CHANGES: calculate_significant_changes,
            Features.VARIABILITY_IN_SUB_PERIODS: calculate_variability_in_sub_periods,
            Features.CHANGE_IN_VARIANCE: calculate_change_in_variance,
            Features.LINEARITY: calculate_linearity
        }

    

[docs]
def load_validation_requirements():
    return {
            Features.LINEARITY: {
                "require_datetime_index": False,
                "allow_nan": True,  
                "check_one_dimensional": True,  
            },
            Features.MEAN: {
                "require_datetime_index": False,
                "allow_nan": False, 
                "check_one_dimensional": True,
            },
            Features.VARIANCE: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True
            },
            Features.SPIKENESS: {
                "require_datetime_index": False,
                "allow_nan": True,
                "check_one_dimensional": True
            },
            Features.ENTROPY: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 2
            },
            Features.STABILITY: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 2
            },
            Features.SEASONALITY_STRENGTH: {
                "require_datetime_index": False,
                "allow_nan": True,
                "check_one_dimensional": True,
                "min_length": 2,
                "validate_positive_parameters": {"period": "Period must be a positive integer."}
            },
            Features.PEAK: {
                "require_datetime_index": False,
                "allow_nan": True,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.TROUGH: {
                "require_datetime_index": False,
                "allow_nan": True,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.DISTANCE_TO_LAST_TREND_CHANGE: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": "window_size + 1"
            },
            Features.HETEROGENEITY: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1,
                "check_nonzero_mean": True
            },
            Features.ABSOLUTE_ENERGY: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.MISSING_POINTS: {
                "require_datetime_index": False,
                "allow_nan": True,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.ABOVE_9TH_DECILE: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1,
                "additional_requirements": {
                    "training_data": {
                        "require_datetime_index": False,
                        "allow_nan": False,
                        "check_one_dimensional": True,
                        "min_length": 1
                    }
                }
            },
            Features.BELOW_1ST_DECILE: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1,
                "additional_requirements": {
                    "training_data": {
                        "require_datetime_index": False,
                        "allow_nan": False,
                        "check_one_dimensional": True,
                        "min_length": 1
                    }
                }
            },
            Features.BINARIZE_MEAN: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.CROSSING_POINTS: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.FLAT_SPOTS: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.OUTLIERS_IQR: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1,
                "additional_requirements": {
                    "training_data_not_empty": True,
                    "training_data_no_nan": True
                }
            },
            Features.OUTLIERS_STD: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1,
                "additional_requirements": {
                    "training_data_not_empty": True,
                    "training_data_no_nan": True
                }
            },
            Features.STD_1ST_DER: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.DOMINANT: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 1
            },
            Features.MEAN_CHANGE: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 2,
                "positive_integer_parameters": ["window_size"]
            },
            Features.TREND_STRENGTH: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 2
            },
            Features.SIGNIFICANT_CHANGES: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 2
            },
            Features.VARIABILITY_IN_SUB_PERIODS: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": "window_size"
            },
            Features.CHANGE_IN_VARIANCE: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": "window_size",
                "positive_integer_params": ["window_size"]
            },
            Features.LINEARITY: {
                "require_datetime_index": False,
                "allow_nan": False,
                "check_one_dimensional": True,
                "min_length": 2
            }
        }