From de9c30e58679ed2ab4ece97ca81e03ae2c6eb72b Mon Sep 17 00:00:00 2001 From: Kevin Chen Date: Wed, 19 Oct 2022 00:31:48 -0400 Subject: [PATCH 1/5] update automl.py - tft gap detection on each time series independently --- flaml/automl.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index 71d7bd43b6..d4ee5f8575 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -1027,11 +1027,25 @@ def _validate_ts_data( dataframe[[dataframe.columns[0]]].duplicated() is None ), "Duplicate timestamp values with different values for other columns." ts_series = pd.to_datetime(dataframe[dataframe.columns[0]]) - inferred_freq = pd.infer_freq(ts_series) - if inferred_freq is None: - logger.warning( - "Missing timestamps detected. To avoid error with estimators, set estimator list to ['prophet']. " - ) + if self._state.task == TS_FORECASTPANEL: + # check for each time series independently + group_ids = self._state.fit_kwargs.get("group_ids") + unique_ids = dataframe[group_ids].value_counts().reset_index()[group_ids] + for _, row in unique_ids: + df = dataframe.copy() + for id in group_ids: + ts = df.loc[df[id] == row[id]] + inferred_freq = pd.infer_freq(ts) + if inferred_freq is None: + logger.warning( + "Missing timestamps detected. To avoid error with estimators, set estimator list to ['prophet']. " + ) + else: + inferred_freq = pd.infer_freq(ts_series) + if inferred_freq is None: + logger.warning( + "Missing timestamps detected. To avoid error with estimators, set estimator list to ['prophet']. " + ) if y_train_all is not None: return dataframe.iloc[:, :-1], dataframe.iloc[:, -1] return dataframe From 5fc62551afb0047f3cef0bd2d785ed5da02ab4c7 Mon Sep 17 00:00:00 2001 From: Kevin Chen Date: Wed, 19 Oct 2022 02:23:47 -0400 Subject: [PATCH 2/5] update automl.py --- flaml/automl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/automl.py b/flaml/automl.py index d4ee5f8575..1b25d44f4b 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -1031,7 +1031,7 @@ def _validate_ts_data( # check for each time series independently group_ids = self._state.fit_kwargs.get("group_ids") unique_ids = dataframe[group_ids].value_counts().reset_index()[group_ids] - for _, row in unique_ids: + for _, row in unique_ids.iterrows(): df = dataframe.copy() for id in group_ids: ts = df.loc[df[id] == row[id]] From 99b7c1bf2edf32cee84df1b7b68b018a4c4e24e4 Mon Sep 17 00:00:00 2001 From: Kevin Chen Date: Wed, 19 Oct 2022 04:08:44 -0400 Subject: [PATCH 3/5] update automl.py - fix bugs --- flaml/automl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index 1b25d44f4b..eb73165327 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -1026,7 +1026,6 @@ def _validate_ts_data( assert ( dataframe[[dataframe.columns[0]]].duplicated() is None ), "Duplicate timestamp values with different values for other columns." - ts_series = pd.to_datetime(dataframe[dataframe.columns[0]]) if self._state.task == TS_FORECASTPANEL: # check for each time series independently group_ids = self._state.fit_kwargs.get("group_ids") @@ -1035,12 +1034,14 @@ def _validate_ts_data( df = dataframe.copy() for id in group_ids: ts = df.loc[df[id] == row[id]] - inferred_freq = pd.infer_freq(ts) + ts_series = pd.to_datetime(ts[ts.columns[0]]) + inferred_freq = pd.infer_freq(ts_series) if inferred_freq is None: logger.warning( "Missing timestamps detected. To avoid error with estimators, set estimator list to ['prophet']. " ) else: + ts_series = pd.to_datetime(dataframe[dataframe.columns[0]]) inferred_freq = pd.infer_freq(ts_series) if inferred_freq is None: logger.warning( From 31b326ff1628576d5f9fb78317f7dacb57d38b77 Mon Sep 17 00:00:00 2001 From: Kevin Chen Date: Wed, 19 Oct 2022 14:36:14 -0400 Subject: [PATCH 4/5] update automl.py - use groupby --- flaml/automl.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index eb73165327..10a8d8fd26 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -1029,17 +1029,15 @@ def _validate_ts_data( if self._state.task == TS_FORECASTPANEL: # check for each time series independently group_ids = self._state.fit_kwargs.get("group_ids") - unique_ids = dataframe[group_ids].value_counts().reset_index()[group_ids] - for _, row in unique_ids.iterrows(): - df = dataframe.copy() - for id in group_ids: - ts = df.loc[df[id] == row[id]] - ts_series = pd.to_datetime(ts[ts.columns[0]]) - inferred_freq = pd.infer_freq(ts_series) - if inferred_freq is None: - logger.warning( - "Missing timestamps detected. To avoid error with estimators, set estimator list to ['prophet']. " - ) + group_df = dataframe.groupby([group_ids]) + for group in group_df: + ts = group[1] + ts_series = pd.to_datetime(ts[ts.columns[0]]) + inferred_freq = pd.infer_freq(ts_series) + if inferred_freq is None: + logger.warning( + "Missing timestamps detected." + ) else: ts_series = pd.to_datetime(dataframe[dataframe.columns[0]]) inferred_freq = pd.infer_freq(ts_series) From 5fdd8fae87e361436f8bcbb1d36fa73713cff746 Mon Sep 17 00:00:00 2001 From: Kevin Chen Date: Wed, 19 Oct 2022 15:51:14 -0400 Subject: [PATCH 5/5] update automl.py - fix issue --- flaml/automl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/automl.py b/flaml/automl.py index 10a8d8fd26..34172bfab1 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -1029,7 +1029,7 @@ def _validate_ts_data( if self._state.task == TS_FORECASTPANEL: # check for each time series independently group_ids = self._state.fit_kwargs.get("group_ids") - group_df = dataframe.groupby([group_ids]) + group_df = dataframe.groupby(group_ids) for group in group_df: ts = group[1] ts_series = pd.to_datetime(ts[ts.columns[0]])