{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Regression with interpreTS"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this tutorial, we show how you can use interpreTS for regression."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:interpreTS:scikit-learn is not installed. Please install it to use interpreTS.\n"
     ]
    }
   ],
   "source": [
    "import urllib.request as urllib2\n",
    "from io import BytesIO\n",
    "from zipfile import ZipFile\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import interpreTS as it"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Loading in the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\slawek\\AppData\\Local\\Temp\\ipykernel_130648\\302813920.py:5: FutureWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
      "  df_power_consumption: pd.DataFrame = pd.read_csv(\n",
      "C:\\Users\\slawek\\AppData\\Local\\Temp\\ipykernel_130648\\302813920.py:5: UserWarning: Parsing dates in %d/%m/%Y %H:%M:%S format when dayfirst=False (the default) was specified. Pass `dayfirst=True` or specify a format to silence this warning.\n",
      "  df_power_consumption: pd.DataFrame = pd.read_csv(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "timestamp\n",
       "0 days 01:11:00    1\n",
       "0 days 01:24:00    1\n",
       "5 days 00:27:00    1\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "zip_url: str = \"https://archive.ics.uci.edu/ml/machine-learning-databases/00235/household_power_consumption.zip\"\n",
    "zipped_file_name: str = \"household_power_consumption.txt\"\n",
    "\n",
    "\n",
    "df_power_consumption: pd.DataFrame = pd.read_csv(\n",
    "    ZipFile(BytesIO(urllib2.urlopen(zip_url).read())).open(zipped_file_name),\n",
    "    sep=\";\",\n",
    "    parse_dates={\"timestamp\": [\"Date\", \"Time\"]},\n",
    "    infer_datetime_format=True,\n",
    "    low_memory=False,\n",
    "    na_values=[\"nan\", \"?\"],\n",
    "    index_col=\"timestamp\",\n",
    "    dtype=\"float32\",\n",
    ")\n",
    "\n",
    "df_power_consumption = df_power_consumption.dropna()\n",
    "df_power_consumption.index.to_series().diff().value_counts().sample(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\slawek\\AppData\\Local\\Temp\\ipykernel_130648\\192953477.py:1: FutureWarning: The provided callable <function nanmean at 0x0000028DE8235B80> is currently using Rolling.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"mean\" instead.\n",
      "  df_power_consumption[\"avg_15min_GAP\"] = df_power_consumption.rolling(\"15min\")[\n"
     ]
    }
   ],
   "source": [
    "df_power_consumption[\"avg_15min_GAP\"] = df_power_consumption.rolling(\"15min\")[\n",
    "    \"Global_active_power\"\n",
    "].aggregate(np.nanmean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Global_active_power</th>\n",
       "      <th>Global_reactive_power</th>\n",
       "      <th>Voltage</th>\n",
       "      <th>Global_intensity</th>\n",
       "      <th>Sub_metering_1</th>\n",
       "      <th>Sub_metering_2</th>\n",
       "      <th>Sub_metering_3</th>\n",
       "      <th>avg_15min_GAP</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>adjusted_month</th>\n",
       "      <th>timestamp</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">0</th>\n",
       "      <th>2006-12-16 17:24:00</th>\n",
       "      <td>4.216</td>\n",
       "      <td>0.418</td>\n",
       "      <td>234.839996</td>\n",
       "      <td>18.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>4.216000</td>\n",
       "      <td>2006</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006-12-16 17:25:00</th>\n",
       "      <td>5.360</td>\n",
       "      <td>0.436</td>\n",
       "      <td>233.630005</td>\n",
       "      <td>23.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>4.788000</td>\n",
       "      <td>2006</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006-12-16 17:26:00</th>\n",
       "      <td>5.374</td>\n",
       "      <td>0.498</td>\n",
       "      <td>233.289993</td>\n",
       "      <td>23.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>4.983333</td>\n",
       "      <td>2006</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006-12-16 17:27:00</th>\n",
       "      <td>5.388</td>\n",
       "      <td>0.502</td>\n",
       "      <td>233.740005</td>\n",
       "      <td>23.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>5.084500</td>\n",
       "      <td>2006</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006-12-16 17:28:00</th>\n",
       "      <td>3.666</td>\n",
       "      <td>0.528</td>\n",
       "      <td>235.679993</td>\n",
       "      <td>15.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>4.800800</td>\n",
       "      <td>2006</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">38</th>\n",
       "      <th>2010-02-05 04:24:00</th>\n",
       "      <td>0.340</td>\n",
       "      <td>0.076</td>\n",
       "      <td>245.979996</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.344000</td>\n",
       "      <td>2010</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-05 04:25:00</th>\n",
       "      <td>0.344</td>\n",
       "      <td>0.076</td>\n",
       "      <td>245.889999</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.344267</td>\n",
       "      <td>2010</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-05 04:26:00</th>\n",
       "      <td>0.344</td>\n",
       "      <td>0.074</td>\n",
       "      <td>245.660004</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.343733</td>\n",
       "      <td>2010</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-05 04:27:00</th>\n",
       "      <td>0.344</td>\n",
       "      <td>0.076</td>\n",
       "      <td>246.190002</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.344000</td>\n",
       "      <td>2010</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-05 04:28:00</th>\n",
       "      <td>0.420</td>\n",
       "      <td>0.162</td>\n",
       "      <td>246.740005</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.349333</td>\n",
       "      <td>2010</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1639424 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    Global_active_power  \\\n",
       "adjusted_month timestamp                                  \n",
       "0              2006-12-16 17:24:00                4.216   \n",
       "               2006-12-16 17:25:00                5.360   \n",
       "               2006-12-16 17:26:00                5.374   \n",
       "               2006-12-16 17:27:00                5.388   \n",
       "               2006-12-16 17:28:00                3.666   \n",
       "...                                                 ...   \n",
       "38             2010-02-05 04:24:00                0.340   \n",
       "               2010-02-05 04:25:00                0.344   \n",
       "               2010-02-05 04:26:00                0.344   \n",
       "               2010-02-05 04:27:00                0.344   \n",
       "               2010-02-05 04:28:00                0.420   \n",
       "\n",
       "                                    Global_reactive_power     Voltage  \\\n",
       "adjusted_month timestamp                                                \n",
       "0              2006-12-16 17:24:00                  0.418  234.839996   \n",
       "               2006-12-16 17:25:00                  0.436  233.630005   \n",
       "               2006-12-16 17:26:00                  0.498  233.289993   \n",
       "               2006-12-16 17:27:00                  0.502  233.740005   \n",
       "               2006-12-16 17:28:00                  0.528  235.679993   \n",
       "...                                                   ...         ...   \n",
       "38             2010-02-05 04:24:00                  0.076  245.979996   \n",
       "               2010-02-05 04:25:00                  0.076  245.889999   \n",
       "               2010-02-05 04:26:00                  0.074  245.660004   \n",
       "               2010-02-05 04:27:00                  0.076  246.190002   \n",
       "               2010-02-05 04:28:00                  0.162  246.740005   \n",
       "\n",
       "                                    Global_intensity  Sub_metering_1  \\\n",
       "adjusted_month timestamp                                               \n",
       "0              2006-12-16 17:24:00              18.4             0.0   \n",
       "               2006-12-16 17:25:00              23.0             0.0   \n",
       "               2006-12-16 17:26:00              23.0             0.0   \n",
       "               2006-12-16 17:27:00              23.0             0.0   \n",
       "               2006-12-16 17:28:00              15.8             0.0   \n",
       "...                                              ...             ...   \n",
       "38             2010-02-05 04:24:00               1.4             0.0   \n",
       "               2010-02-05 04:25:00               1.6             0.0   \n",
       "               2010-02-05 04:26:00               1.6             0.0   \n",
       "               2010-02-05 04:27:00               1.6             0.0   \n",
       "               2010-02-05 04:28:00               2.0             0.0   \n",
       "\n",
       "                                    Sub_metering_2  Sub_metering_3  \\\n",
       "adjusted_month timestamp                                             \n",
       "0              2006-12-16 17:24:00             1.0            17.0   \n",
       "               2006-12-16 17:25:00             1.0            16.0   \n",
       "               2006-12-16 17:26:00             2.0            17.0   \n",
       "               2006-12-16 17:27:00             1.0            17.0   \n",
       "               2006-12-16 17:28:00             1.0            17.0   \n",
       "...                                            ...             ...   \n",
       "38             2010-02-05 04:24:00             1.0             0.0   \n",
       "               2010-02-05 04:25:00             1.0             1.0   \n",
       "               2010-02-05 04:26:00             1.0             1.0   \n",
       "               2010-02-05 04:27:00             1.0             0.0   \n",
       "               2010-02-05 04:28:00             1.0             1.0   \n",
       "\n",
       "                                    avg_15min_GAP  year  month  \n",
       "adjusted_month timestamp                                        \n",
       "0              2006-12-16 17:24:00       4.216000  2006     12  \n",
       "               2006-12-16 17:25:00       4.788000  2006     12  \n",
       "               2006-12-16 17:26:00       4.983333  2006     12  \n",
       "               2006-12-16 17:27:00       5.084500  2006     12  \n",
       "               2006-12-16 17:28:00       4.800800  2006     12  \n",
       "...                                           ...   ...    ...  \n",
       "38             2010-02-05 04:24:00       0.344000  2010      2  \n",
       "               2010-02-05 04:25:00       0.344267  2010      2  \n",
       "               2010-02-05 04:26:00       0.343733  2010      2  \n",
       "               2010-02-05 04:27:00       0.344000  2010      2  \n",
       "               2010-02-05 04:28:00       0.349333  2010      2  \n",
       "\n",
       "[1639424 rows x 10 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_columns = [f\"Sub_metering_{i}\" for i in range(1, 4)] + [\"timestamp\"]\n",
    "target_col = \"avg_15min_GAP\"\n",
    "\n",
    "# The percentage of data used for testing\n",
    "test_pct = 0.2\n",
    "day_margin = 3\n",
    "\n",
    "# add the timestamp col\n",
    "df_power_consumption[\"timestamp\"] = df_power_consumption.index\n",
    "\n",
    "# Ensure timestamp is in datetime format\n",
    "df_power_consumption['timestamp'] = pd.to_datetime(df_power_consumption['timestamp'])\n",
    "\n",
    "# Add 'year' and 'month' columns\n",
    "df_power_consumption['year'] = df_power_consumption['timestamp'].dt.year\n",
    "df_power_consumption['month'] = df_power_consumption['timestamp'].dt.month\n",
    "\n",
    "# Add 'adjusted_month' column\n",
    "df_power_consumption['adjusted_month'] = (df_power_consumption['year'] - 2007) * 12 + df_power_consumption['month']\n",
    "\n",
    "# Temporal split logic\n",
    "df_train = df_power_consumption[: -int(len(df_power_consumption) * test_pct)].copy()\n",
    "df_test = df_power_consumption[df_train.index[-1] + pd.Timedelta(days=day_margin):].copy()\n",
    "\n",
    "# Add MultiIndex for training data (adjusted_month, timestamp)\n",
    "df_train = df_train.set_index(['adjusted_month', 'timestamp'])\n",
    "df_train.sort_index(inplace=True)\n",
    "\n",
    "# Add MultiIndex for testing data (adjusted_month, timestamp)\n",
    "df_test = df_test.set_index(['adjusted_month', 'timestamp'])\n",
    "df_test.sort_index(inplace=True)\n",
    "\n",
    "# Output the training data head\n",
    "df_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((1639424,), (1639424, 9), (405537,), (405537, 9))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Reset the index to remove the MultiIndex\n",
    "df_test_reshaped = df_test.reset_index()\n",
    "df_train_reshaped = df_train.reset_index()\n",
    "train_y = df_train_reshaped['avg_15min_GAP']\n",
    "train_y_monthly = df_train.groupby(level='adjusted_month').mean()\n",
    "test_y = df_test_reshaped['avg_15min_GAP']\n",
    "test_y_monthly = df_test.groupby(level='adjusted_month').mean()\n",
    "df_test_reshaped.drop(columns=['timestamp', 'month', 'avg_15min_GAP'], inplace=True)\n",
    "df_train_reshaped.drop(columns=['timestamp', 'month', 'avg_15min_GAP'], inplace=True)\n",
    "train_y.shape, df_train_reshaped.shape, test_y.shape, df_test_reshaped.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Feature extraction with interpreTS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>length_Global_active_power</th>\n",
       "      <th>length_Global_reactive_power</th>\n",
       "      <th>length_Voltage</th>\n",
       "      <th>length_Global_intensity</th>\n",
       "      <th>length_Sub_metering_1</th>\n",
       "      <th>length_Sub_metering_2</th>\n",
       "      <th>length_Sub_metering_3</th>\n",
       "      <th>length_year</th>\n",
       "      <th>mean_Global_active_power</th>\n",
       "      <th>mean_Global_reactive_power</th>\n",
       "      <th>...</th>\n",
       "      <th>spikeness_Sub_metering_3</th>\n",
       "      <th>spikeness_year</th>\n",
       "      <th>seasonality_strength_Global_active_power</th>\n",
       "      <th>seasonality_strength_Global_reactive_power</th>\n",
       "      <th>seasonality_strength_Voltage</th>\n",
       "      <th>seasonality_strength_Global_intensity</th>\n",
       "      <th>seasonality_strength_Sub_metering_1</th>\n",
       "      <th>seasonality_strength_Sub_metering_2</th>\n",
       "      <th>seasonality_strength_Sub_metering_3</th>\n",
       "      <th>seasonality_strength_year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>21992</td>\n",
       "      <td>21992</td>\n",
       "      <td>21992</td>\n",
       "      <td>21992</td>\n",
       "      <td>21992</td>\n",
       "      <td>21992</td>\n",
       "      <td>21992</td>\n",
       "      <td>21992</td>\n",
       "      <td>1.901295</td>\n",
       "      <td>0.131386</td>\n",
       "      <td>...</td>\n",
       "      <td>0.321571</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.883480</td>\n",
       "      <td>0.852107</td>\n",
       "      <td>0.958830</td>\n",
       "      <td>0.891263</td>\n",
       "      <td>0.820514</td>\n",
       "      <td>0.930466</td>\n",
       "      <td>0.980202</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>44638</td>\n",
       "      <td>44638</td>\n",
       "      <td>44638</td>\n",
       "      <td>44638</td>\n",
       "      <td>44638</td>\n",
       "      <td>44638</td>\n",
       "      <td>44638</td>\n",
       "      <td>44638</td>\n",
       "      <td>1.546034</td>\n",
       "      <td>0.132676</td>\n",
       "      <td>...</td>\n",
       "      <td>0.325868</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.901837</td>\n",
       "      <td>0.861384</td>\n",
       "      <td>0.954732</td>\n",
       "      <td>0.902753</td>\n",
       "      <td>0.823161</td>\n",
       "      <td>0.931576</td>\n",
       "      <td>0.980758</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>40318</td>\n",
       "      <td>40318</td>\n",
       "      <td>40318</td>\n",
       "      <td>40318</td>\n",
       "      <td>40318</td>\n",
       "      <td>40318</td>\n",
       "      <td>40318</td>\n",
       "      <td>40318</td>\n",
       "      <td>1.401084</td>\n",
       "      <td>0.113637</td>\n",
       "      <td>...</td>\n",
       "      <td>0.488657</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.940750</td>\n",
       "      <td>0.866933</td>\n",
       "      <td>0.940974</td>\n",
       "      <td>0.941904</td>\n",
       "      <td>0.786104</td>\n",
       "      <td>0.926132</td>\n",
       "      <td>0.982762</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>44639</td>\n",
       "      <td>44639</td>\n",
       "      <td>44639</td>\n",
       "      <td>44639</td>\n",
       "      <td>44639</td>\n",
       "      <td>44639</td>\n",
       "      <td>44639</td>\n",
       "      <td>44639</td>\n",
       "      <td>1.318627</td>\n",
       "      <td>0.114747</td>\n",
       "      <td>...</td>\n",
       "      <td>0.536607</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.945153</td>\n",
       "      <td>0.871813</td>\n",
       "      <td>0.943282</td>\n",
       "      <td>0.944359</td>\n",
       "      <td>0.801939</td>\n",
       "      <td>0.930233</td>\n",
       "      <td>0.979583</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>39477</td>\n",
       "      <td>39477</td>\n",
       "      <td>39477</td>\n",
       "      <td>39477</td>\n",
       "      <td>39477</td>\n",
       "      <td>39477</td>\n",
       "      <td>39477</td>\n",
       "      <td>39477</td>\n",
       "      <td>0.891189</td>\n",
       "      <td>0.118778</td>\n",
       "      <td>...</td>\n",
       "      <td>1.007143</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.929497</td>\n",
       "      <td>0.874644</td>\n",
       "      <td>0.963638</td>\n",
       "      <td>0.927853</td>\n",
       "      <td>0.802403</td>\n",
       "      <td>0.896615</td>\n",
       "      <td>0.978974</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 56 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   length_Global_active_power  length_Global_reactive_power  length_Voltage  \\\n",
       "0                       21992                         21992           21992   \n",
       "1                       44638                         44638           44638   \n",
       "2                       40318                         40318           40318   \n",
       "3                       44639                         44639           44639   \n",
       "4                       39477                         39477           39477   \n",
       "\n",
       "   length_Global_intensity  length_Sub_metering_1  length_Sub_metering_2  \\\n",
       "0                    21992                  21992                  21992   \n",
       "1                    44638                  44638                  44638   \n",
       "2                    40318                  40318                  40318   \n",
       "3                    44639                  44639                  44639   \n",
       "4                    39477                  39477                  39477   \n",
       "\n",
       "   length_Sub_metering_3  length_year  mean_Global_active_power  \\\n",
       "0                  21992        21992                  1.901295   \n",
       "1                  44638        44638                  1.546034   \n",
       "2                  40318        40318                  1.401084   \n",
       "3                  44639        44639                  1.318627   \n",
       "4                  39477        39477                  0.891189   \n",
       "\n",
       "   mean_Global_reactive_power  ...  spikeness_Sub_metering_3  spikeness_year  \\\n",
       "0                    0.131386  ...                  0.321571             0.0   \n",
       "1                    0.132676  ...                  0.325868             0.0   \n",
       "2                    0.113637  ...                  0.488657             0.0   \n",
       "3                    0.114747  ...                  0.536607             0.0   \n",
       "4                    0.118778  ...                  1.007143             0.0   \n",
       "\n",
       "   seasonality_strength_Global_active_power  \\\n",
       "0                                  0.883480   \n",
       "1                                  0.901837   \n",
       "2                                  0.940750   \n",
       "3                                  0.945153   \n",
       "4                                  0.929497   \n",
       "\n",
       "   seasonality_strength_Global_reactive_power  seasonality_strength_Voltage  \\\n",
       "0                                    0.852107                      0.958830   \n",
       "1                                    0.861384                      0.954732   \n",
       "2                                    0.866933                      0.940974   \n",
       "3                                    0.871813                      0.943282   \n",
       "4                                    0.874644                      0.963638   \n",
       "\n",
       "   seasonality_strength_Global_intensity  seasonality_strength_Sub_metering_1  \\\n",
       "0                               0.891263                             0.820514   \n",
       "1                               0.902753                             0.823161   \n",
       "2                               0.941904                             0.786104   \n",
       "3                               0.944359                             0.801939   \n",
       "4                               0.927853                             0.802403   \n",
       "\n",
       "   seasonality_strength_Sub_metering_2  seasonality_strength_Sub_metering_3  \\\n",
       "0                             0.930466                             0.980202   \n",
       "1                             0.931576                             0.980758   \n",
       "2                             0.926132                             0.982762   \n",
       "3                             0.930233                             0.979583   \n",
       "4                             0.896615                             0.978974   \n",
       "\n",
       "   seasonality_strength_year  \n",
       "0                        0.0  \n",
       "1                        0.0  \n",
       "2                        0.0  \n",
       "3                        0.0  \n",
       "4                        0.0  \n",
       "\n",
       "[5 rows x 56 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extractor = it.FeatureExtractor(id_column=\"adjusted_month\")\n",
    "features_train = extractor.extract_features(df_train_reshaped)\n",
    "features_test = extractor.extract_features(df_test_reshaped)\n",
    "features_train.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using interpreTS for regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xgboost as xgb\n",
    "from sklearn.metrics import mean_squared_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RMSE: 1.8527\n"
     ]
    }
   ],
   "source": [
    "gb_regressor = xgb.XGBRegressor(random_state=42)\n",
    "\n",
    "gb_regressor.fit(features_train, train_y_monthly)\n",
    "\n",
    "y_pred = gb_regressor.predict(features_test)\n",
    "\n",
    "rmse = np.sqrt(mean_squared_error(test_y_monthly, y_pred))\n",
    "print(f\"RMSE: {rmse:.4f}\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}