Source code for interpreTS.core.time_series_data

import pandas as pd

[docs] class TimeSeriesData: """ A class to manage and process time series data. """ def __init__(self, data): """ Initialize the TimeSeriesData with time series data. Parameters ---------- data : pd.Series, pd.DataFrame, or np.ndarray The time series data to be managed. If provided as a numpy array, it will be converted to a pandas DataFrame for consistency. Examples -------- >>> import pandas as pd >>> data = pd.Series([1, 2, 3, 4, 5]) >>> ts_data = TimeSeriesData(data) """ if isinstance(data, (pd.Series, pd.DataFrame)): self.data = data else: raise ValueError("Data must be a pandas Series or DataFrame.")
[docs] def resample(self, interval): """ Resample the time series data to a specified interval. Parameters ---------- interval : str The interval to resample the data, e.g., 'D' for daily, 'H' for hourly. Returns ------- TimeSeriesData A new TimeSeriesData object with resampled data. Examples -------- >>> data = pd.Series([1, 2, 3, 4, 5], index=pd.date_range("2023-01-01", periods=5, freq="D")) >>> ts_data = TimeSeriesData(data) >>> resampled_data = ts_data.resample("2D") """ if not isinstance(self.data.index, pd.DatetimeIndex): raise ValueError("Data must have a DateTime index for resampling.") resampled_data = self.data.resample(interval).mean() return TimeSeriesData(resampled_data)
[docs] def split(self, train_size=0.7): """ Split the time series data into training and test sets. Parameters ---------- train_size : float, optional The proportion of the data to use for training, by default 0.7. Returns ------- tuple of TimeSeriesData A tuple containing the training and test sets as TimeSeriesData objects. Examples -------- >>> data = pd.Series([1, 2, 3, 4, 5]) >>> ts_data = TimeSeriesData(data) >>> train, test = ts_data.split(0.6) """ split_index = int(len(self.data) * train_size) train_data = self.data[:split_index] test_data = self.data[split_index:] return TimeSeriesData(train_data), TimeSeriesData(test_data)