{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Notebook Test Example of Features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Test calculation of features implemented in versio 0.4" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(r'D:\\pbl\\InterpreTS')\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_crossing_points import calculate_crossing_points" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'crossing_count': 7, 'crossing_points': [0, 1, 2, 3, 4, 5, 6]}\n", "{'crossing_count': 0, 'crossing_points': []}\n" ] } ], "source": [ "data = pd.Series([1, 3, 2, 4, 1, 5, 2, 6])\n", "result = calculate_crossing_points(data)\n", "print(result)\n", "\n", "data = pd.Series([2, 2, 2, 2])\n", "res2 = calculate_crossing_points(data)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_amplitude_change_rate import calculate_amplitude_change_rate" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.8333333333333335\n", "nan\n" ] } ], "source": [ "data = pd.Series([1, 3, 2, 4, 1, 5, 2, 6, 3])\n", "res = calculate_amplitude_change_rate(data)\n", "print(res)\n", "\n", "data = pd.Series([1, 1, 1, 1])\n", "res2 =calculate_amplitude_change_rate(data)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_linearity import calculate_linearity" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n", "0.0\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "res = calculate_linearity(data)\n", "print(res)\n", "\n", "data = pd.Series([1, 2, 1, 2, 1, 2, 1, 2, 1, 2])\n", "res2 = calculate_linearity(data)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.seasonality_strength import calculate_seasonality_strength" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n", "0.8333333333333333\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2], index=pd.date_range(\"2023-01-01\", periods=12, freq=\"M\"))\n", "res1 = calculate_seasonality_strength(data, period=3)\n", "print(res)\n", "\n", "data = pd.Series([1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2])\n", "res2 = calculate_seasonality_strength(data, period=2)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.distance_to_the_last_change_point import calculate_distance_to_last_trend_change" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 2, 1, 2, 3, 2, 1])\n", "res = calculate_distance_to_last_trend_change(data, window_size=2)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_above_9th_decile import calculate_above_9th_decile" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.Series([8, 9, 10, 11, 12])\n", "training_data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n", "calculate_above_9th_decile(data, training_data)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_absolute_energy import calculate_absolute_energy" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "30\n", "13\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4])\n", "res1 = calculate_absolute_energy(data)\n", "print(res1)\n", "\n", "res2 = calculate_absolute_energy(data, start=1, end=3)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_below_1st_decile import calculate_below_1st_decile" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.2\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "training_data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n", "res1 = calculate_below_1st_decile(data, training_data)\n", "print(res1)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_binarize_mean import calculate_binarize_mean" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.6\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "res = calculate_binarize_mean(data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_entropy import calculate_entropy" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n", "0.0\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 1, 2, 3, 1, 2, 3])\n", "res1 = calculate_entropy(data, bins=3)\n", "print(res1)\n", "\n", "data = pd.Series([1, 1, 1, 1])\n", "res2 = calculate_entropy(data, bins=2)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_flat_spots import calculate_flat_spots" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4\n" ] } ], "source": [ "data = pd.Series([1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4, 5, 1, 1])\n", "res = calculate_flat_spots(data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_heterogeneity import calculate_heterogeneity" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.5270462766947299\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "res = calculate_heterogeneity(data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_length import calculate_length" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "res = calculate_length(data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_mean import calculate_mean" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.0\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "res = calculate_mean(data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_missing_points import calculate_missing_points" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.4\n", "3.0\n" ] } ], "source": [ "data = pd.Series([1, 2, np.nan, 4, None])\n", "res1 = calculate_missing_points(data)\n", "print(res1)\n", "\n", "res2 = calculate_missing_points(data, percentage=False)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_outliers_iqr import calculate_outliers_iqr" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.25\n", "0.25\n" ] } ], "source": [ "training_data = np.array([10, 12, 14, 15, 16, 18, 19])\n", "data = np.array([9, 15, 20, 25])\n", "res1 = calculate_outliers_iqr(data, training_data)\n", "print(res1)\n", "\n", "training_data = np.array([10, 10, 10, 10, 10])\n", "data = np.array([10, 11, 12, 9])\n", "res2 = calculate_outliers_iqr(data, training_data)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_outliers_std import calculate_outliers_std" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.2\n" ] } ], "source": [ "training_data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9])\n", "data = pd.Series([0, 10, 2, 3, 15])\n", "res = calculate_outliers_std(data, training_data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_peak import calculate_peak" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7\n", "5\n" ] } ], "source": [ "data = pd.Series([1, 2, 5, 4, 7])\n", "res1 = calculate_peak(data)\n", "print(res1)\n", "\n", "res2 =calculate_peak(data, start=1, end=3)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_significant_changes import calculate_significant_changes" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.0\n" ] } ], "source": [ "data = np.array([1, 100, 1, 200, 50, 3, 1])\n", "res1 = calculate_significant_changes(data)\n", "print(res1) #isn't correct, all examples was 0.0" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_spikeness import calculate_spikeness" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.0\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "res = calculate_spikeness(data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_stability import calculate_stability" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.8410385081084804\n", "0.6144144729613819\n" ] } ], "source": [ "data = pd.Series([10, 12, 11, 13, 12, 14, 11, 13, 12, 14, 13])\n", "res1 = calculate_stability(data)\n", "print(res1)\n", "\n", "data = pd.Series([5, 20, 3, 18, 1, 25, 2, 22, 0, 19])\n", "res2 = calculate_stability(data)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_std_1st_der import calculate_std_1st_der" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.0\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "res = calculate_std_1st_der(data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_trough import calculate_trough" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "2\n" ] } ], "source": [ "data = pd.Series([1, 2, 5, 4, 3])\n", "res1 = calculate_trough(data)\n", "print(res1)\n", "\n", "res2 = calculate_trough(data, start=1, end=3)\n", "print(res2)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.feature_variance import calculate_variance" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10.0\n", "8.0\n", "0.0\n" ] } ], "source": [ "data = pd.Series([10, 12, 14, 16, 18])\n", "res1 = calculate_variance(data)\n", "print(res1)\n", "\n", "data = np.array([2, 4, 6, 8, 10])\n", "res2 = calculate_variance(data, ddof=0)\n", "print(res2)\n", "\n", "data = pd.Series([5])\n", "res3 = calculate_variance(data)\n", "print(res3)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.histogram_dominant import calculate_dominant" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.6\n", "30.0\n", "0.8333333333333333\n" ] } ], "source": [ "data = np.array([1, 2, 2, 3, 3, 3, 4, 4, 5])\n", "res1 = calculate_dominant(data, bins=5)\n", "print(res1)\n", "\n", "data = np.array([10, 20, 20, 30, 30, 30, 40, 40, 50])\n", "res2 = calculate_dominant(data, bins=5, return_bin_center=True)\n", "print(res2)\n", "\n", "data = np.array([1, 1, 1, 1, 1])\n", "res3 = calculate_dominant(data, bins=3)\n", "print(res3)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.mean_change import calculate_mean_change" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 NaN\n", "1 NaN\n", "2 NaN\n", "3 1.0\n", "4 1.0\n", "5 1.0\n", "6 1.0\n", "7 1.0\n", "8 1.0\n", "9 1.0\n", "dtype: float64\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n", "res = calculate_mean_change(data, window_size=3)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.trend_strength import calculate_trend_strength" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5])\n", "res = calculate_trend_strength(data)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.variability_in_sub_periods import calculate_variability_in_sub_periods" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 2.0\n", "1 2.0\n", "dtype: float64\n" ] } ], "source": [ "data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n", "res = calculate_variability_in_sub_periods(data, window_size=5)\n", "print(res)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "from interpreTS.core.features.variance_change import calculate_change_in_variance" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 NaN\n", "1 NaN\n", "2 NaN\n", "3 -4.000000\n", "4 -4.000000\n", "5 4.666667\n", "6 5.333333\n", "7 -8.000000\n", "8 26.666667\n", "9 -8.666667\n", "dtype: float64\n" ] } ], "source": [ "data = pd.Series([1, 2, 7, 4, 5, 9, 12, 8, 1, 10])\n", "res = calculate_change_in_variance(data, window_size=3)\n", "print(res)" ] } ], "metadata": { "kernelspec": { "display_name": "PBL", "language": "python", "name": "env" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.20" } }, "nbformat": 4, "nbformat_minor": 2 }