Source code for interpreTS.core.features.feature_histogram_dominant
import pandas as pd
import numpy as np
[docs]
def calculate_dominant(data, bins=10, return_bin_center=False):
"""
Calculate the dominant value (mode) of a time series histogram.
Parameters
----------
data : pd.Series or np.ndarray
The time series data for which the dominant value is to be calculated.
bins : int, optional
The number of bins to use for creating the histogram, by default 10.
return_bin_center : bool, optional
If True, return the center of the bin with the maximum frequency.
Otherwise, return the lower bound of the bin (default is False).
Returns
-------
float
The dominant value of the histogram (either the center or the lower bound of the bin).
Raises
------
TypeError
If the data is not a valid time series type.
ValueError
If the data contains NaN values.
Examples
--------
>>> import numpy as np
>>> data = np.array([1, 2, 2, 3, 3, 3, 4, 4, 5])
>>> calculate_dominant(data, bins=5)
2.6
>>> data = np.array([10, 20, 20, 30, 30, 30, 40, 40, 50])
>>> calculate_dominant(data, bins=5, return_bin_center=True)
30.0
>>> data = np.array([1, 1, 1, 1, 1])
>>> calculate_dominant(data, bins=3)
0.8333333333333333
"""
# Handle empty data early
if isinstance(data, (pd.Series, pd.DataFrame)) and data.empty:
return np.nan
if isinstance(data, np.ndarray) and data.size == 0:
return np.nan
# Calculate histogram
counts, bin_edges = np.histogram(data, bins=bins)
max_bin_index = np.argmax(counts)
# Return the lower bound or center of the dominant bin
if return_bin_center:
dominant_value = (bin_edges[max_bin_index] + bin_edges[max_bin_index + 1]) / 2
else:
dominant_value = bin_edges[max_bin_index]
return dominant_value