Source code for bigdl.chronos.autots.deprecated.config.recipe

#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from bigdl.chronos.autots.deprecated.config.base import Recipe
from bigdl.orca.automl import hp
from bigdl.chronos.utils import deprecated


[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class SmokeRecipe(Recipe): """ A very simple Recipe for smoke test that runs one epoch and one iteration with only 1 random sample. """ def __init__(self): ''' __init__() ''' super(self.__class__, self).__init__() def search_space(self): return { "model": "LSTM", "lstm_1_units": hp.choice([32, 64]), "dropout_1": hp.uniform(0.2, 0.5), "lstm_2_units": hp.choice([32, 64]), "dropout_2": hp.uniform(0.2, 0.5), "lr": 0.001, "batch_size": 1024, "epochs": 1, "past_seq_len": 2, }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class MTNetSmokeRecipe(Recipe): """ A very simple Recipe for smoke test that runs one epoch and one iteration with only 1 random sample. """ def __init__(self): ''' __init__() ''' super(self.__class__, self).__init__() def search_space(self): return { "model": "MTNet", "lr": 0.001, "batch_size": 16, "epochs": 1, "cnn_dropout": 0.2, "rnn_dropout": 0.2, "time_step": hp.choice([3, 4]), "cnn_height": 2, "long_num": hp.choice([3, 4]), "ar_size": hp.choice([2, 3]), "past_seq_len": hp.sample_from(lambda spec: (spec.config.long_num + 1) * spec.config.time_step), }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class TCNSmokeRecipe(Recipe): """ A very simple Recipe for smoke test that runs one epoch and one iteration with only 1 random sample. """ def __init__(self): ''' __init__() ''' super(self.__class__, self).__init__() def search_space(self): return { "lr": 0.001, "batch_size": 16, "nhid": 8, "levels": 8, "kernel_size": 3, "dropout": 0.1 }
[docs]class PastSeqParamHandler(object): """ Utility to handle PastSeq Param """ def __init__(self): pass
[docs] @staticmethod @deprecated('Please use `bigdl.orca.automl.hp` instead.') def get_past_seq_config(look_back): """ get_past_seq_config(look_back) Generate pass sequence config based on look_back. :param look_back: look_back configuration :return: search configuration for past sequence """ from bigdl.nano.utils.common import invalidInputError if isinstance( look_back, tuple) and len(look_back) == 2 and isinstance( look_back[0], int) and isinstance( look_back[1], int): if look_back[1] < 2: invalidInputError(False, "The max look back value should be at least 2") if look_back[0] < 2: print( "The input min look back value is smaller than 2. " "We sample from range (2, {}) instead.".format( look_back[1])) past_seq_config = hp.randint(look_back[0], look_back[1] + 1) elif isinstance(look_back, int): if look_back < 2: invalidInputError(False, "look back value should not be smaller than 2. " "Current value is ", look_back) past_seq_config = look_back else: invalidInputError(False, "look back is {}.\n " "look_back should be either a tuple with 2 int values:" " (min_len, max_len) or a single int".format(look_back)) return past_seq_config
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class GridRandomRecipe(Recipe): """ A recipe involves both grid search and random search. """ def __init__( self, num_rand_samples=1, look_back=2, epochs=5, training_iteration=10): """ __init__() Constructor. :param num_rand_samples: number of hyper-param configurations sampled randomly :param look_back: the length to look back, either a tuple with 2 int values, which is in format is (min len, max len), or a single int, which is a fixed length to look back. :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration """ super(self.__class__, self).__init__() self.num_samples = num_rand_samples self.training_iteration = training_iteration self.past_seq_config = PastSeqParamHandler.get_past_seq_config( look_back) self.epochs = epochs def search_space(self): return { # -------- model selection TODO add MTNet "model": hp.choice(["LSTM", "Seq2seq"]), # --------- Vanilla LSTM model parameters "lstm_1_units": hp.grid_search([16, 32]), "dropout_1": 0.2, "lstm_2_units": hp.grid_search([16, 32]), "dropout_2": hp.uniform(0.2, 0.5), # ----------- Seq2Seq model parameters "latent_dim": hp.grid_search([32, 64]), "dropout": hp.uniform(0.2, 0.5), # ----------- optimization parameters "lr": hp.uniform(0.001, 0.01), "batch_size": hp.choice([32, 64]), "epochs": self.epochs, "past_seq_len": self.past_seq_config, }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class LSTMSeq2SeqRandomRecipe(Recipe): """ A recipe involves both grid search and random search, only for Seq2SeqPytorch. Note: This recipe is specifically designed for third-party model searching, rather than TimeSequencePredictor. """ def __init__( self, input_feature_num, output_feature_num, future_seq_len, num_rand_samples=1, epochs=1, training_iteration=20, batch_size=[128, 256, 512], lr=(0.001, 0.01), lstm_hidden_dim=[64, 128], lstm_layer_num=[1, 2, 3, 4], dropout=(0, 0.25), teacher_forcing=[True, False]): """ __init__() Constructor. set the param to a list for grid search. set the param to a tuple with length = 2 for random search. :param input_feature_num: (int) no. of input feature :param output_feature_num: (int) no. of ouput feature :param future_seq_len: (int) no. of steps to be predicted (i.e. horizon) :param num_rand_samples: (int) number of hyper-param configurations sampled randomly :param epochs: (int) no. of epochs to train in each iteration :param training_iteration: (int) no. of iterations for training (n epochs) in trials :param batch_size: (tuple|list) grid search candidates for batch size :param lr: (tuple|list) learning rate :param lstm_hidden_dim: (tuple|list) lstm hidden dim for both encoder and decoder :param lstm_layer_num: (tuple|list) no. of lstm layer for both encoder and decoder :param dropout: (tuple|list) dropout for lstm layer :param teacher_forcing: (list) if to use teacher forcing machanism during training """ super(self.__class__, self).__init__() # -- runtime params self.num_samples = num_rand_samples self.training_iteration = training_iteration # -- optimization params self.lr = self._gen_sample_func(lr, "lr") self.batch_size = self._gen_sample_func(batch_size, "batch_size") self.epochs = epochs # -- model params self.input_feature_num = input_feature_num self.output_feature_num = output_feature_num self.future_seq_len = future_seq_len self.lstm_hidden_dim = self._gen_sample_func( lstm_hidden_dim, "lstm_hidden_dim") self.lstm_layer_num = self._gen_sample_func( lstm_layer_num, "lstm_layer_num") self.dropout = self._gen_sample_func(dropout, "dropout") self.teacher_forcing = self._gen_sample_func( teacher_forcing, "teacher_forcing") def _gen_sample_func(self, ranges, param_name): from bigdl.nano.utils.common import invalidInputError if isinstance(ranges, tuple): invalidInputError(len(ranges) == 2, f"length of tuple {param_name} should be" f" 2 while get {len(ranges)} instead.") invalidInputError(param_name != "teacher_forcing", f"type of {param_name} can only be a list while get a tuple") if param_name in ["lr"]: return hp.loguniform(lower=ranges[0], upper=ranges[1]) if param_name in ["lstm_hidden_dim", "lstm_layer_num", "batch_size"]: return hp.randint(lower=ranges[0], upper=ranges[1]) if param_name in ["dropout"]: return hp.uniform(lower=ranges[0], upper=ranges[1]) if isinstance(ranges, list): return hp.grid_search(ranges) invalidInputError(False, f"{param_name} should be either a list or a tuple.") def search_space(self): return { # ----------- data parameters "input_feature_num": self.input_feature_num, "output_feature_num": self.output_feature_num, "future_seq_len": self.future_seq_len, # ----------- optimization parameters "lr": self.lr, "batch_size": self.batch_size, "epochs": self.epochs, # ----------- model parameters "lstm_hidden_dim": self.lstm_hidden_dim, "lstm_layer_num": self.lstm_layer_num, "dropout": self.dropout, "teacher_forcing": self.teacher_forcing, }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class LSTMGridRandomRecipe(Recipe): """ A recipe involves both grid search and random search, only for LSTM. """ def __init__( self, num_rand_samples=1, epochs=5, training_iteration=10, look_back=2, lstm_1_units=[16, 32, 64, 128], lstm_2_units=[16, 32, 64], batch_size=[32, 64]): """ __init__() Constructor. :param lstm_1_units: random search candidates for num of lstm_1_units :param lstm_2_units: grid search candidates for num of lstm_1_units :param batch_size: grid search candidates for batch size :param num_rand_samples: number of hyper-param configurations sampled randomly :param look_back: the length to look back, either a tuple with 2 int values, which is in format is (min len, max len), or a single int, which is a fixed length to look back. :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration """ super(self.__class__, self).__init__() # -- runtime params self.num_samples = num_rand_samples self.training_iteration = training_iteration # -- model params self.past_seq_config = PastSeqParamHandler.get_past_seq_config( look_back) self.lstm_1_units_config = hp.choice(lstm_1_units) self.lstm_2_units_config = hp.grid_search(lstm_2_units) self.dropout_2_config = hp.uniform(0.2, 0.5) # -- optimization params self.lr = hp.uniform(0.001, 0.01) self.batch_size = hp.grid_search(batch_size) self.epochs = epochs def search_space(self): return { "model": "LSTM", # --------- Vanilla LSTM model parameters "lstm_1_units": self.lstm_1_units_config, "dropout_1": 0.2, "lstm_2_units": self.lstm_2_units_config, "dropout_2": self.dropout_2_config, # ----------- optimization parameters "lr": self.lr, "batch_size": self.batch_size, "epochs": self.epochs, "past_seq_len": self.past_seq_config, }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class Seq2SeqRandomRecipe(Recipe): """ A recipe involves both grid search and random search, only for LSTM. """ def __init__( self, num_rand_samples=1, epochs=5, training_iteration=10, look_back=2, latent_dim=[32, 64, 128, 256], batch_size=[32, 64]): """ __init__() Constructor. :param lstm_1_units: random search candidates for num of lstm_1_units :param lstm_2_units: grid search candidates for num of lstm_1_units :param batch_size: grid search candidates for batch size :param num_rand_samples: number of hyper-param configurations sampled randomly :param look_back: the length to look back, either a tuple with 2 int values, which is in format is (min len, max len), or a single int, which is a fixed length to look back. :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration """ super(self.__class__, self).__init__() # -- runtime params self.num_samples = num_rand_samples self.training_iteration = training_iteration # -- model params self.past_seq_config = PastSeqParamHandler.get_past_seq_config( look_back) self.latent_dim = hp.choice(latent_dim) self.dropout_config = hp.uniform(0.2, 0.5) # -- optimization params self.lr = hp.uniform(0.001, 0.01) self.batch_size = hp.grid_search(batch_size) self.epochs = epochs def search_space(self): return { "model": "Seq2Seq", "latent_dim": self.latent_dim, "dropout": self.dropout_config, # ----------- optimization parameters "lr": self.lr, "batch_size": self.batch_size, "epochs": self.epochs, "past_seq_len": self.past_seq_config, }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class MTNetGridRandomRecipe(Recipe): """ Grid+Random Recipe for MTNet """ def __init__(self, num_rand_samples=1, epochs=5, training_iteration=10, time_step=[3, 4], long_num=[3, 4], cnn_height=[2, 3], cnn_hid_size=[32, 50, 100], ar_size=[2, 3], batch_size=[32, 64]): """ __init__() Constructor. :param num_rand_samples: number of hyper-param configurations sampled randomly :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration :param time_step: random search candidates for model param "time_step" :param long_num: random search candidates for model param "long_num" :param ar_size: random search candidates for model param "ar_size" :param batch_size: grid search candidates for batch size :param cnn_height: random search candidates for model param "cnn_height" :param cnn_hid_size: random search candidates for model param "cnn_hid_size" """ super(self.__class__, self).__init__() # -- run time params self.num_samples = num_rand_samples self.training_iteration = training_iteration # -- optimization params self.lr = hp.uniform(0.001, 0.01) self.batch_size = hp.grid_search(batch_size) self.epochs = epochs # ---- model params self.cnn_dropout = hp.uniform(0.2, 0.5) self.rnn_dropout = hp.uniform(0.2, 0.5) self.time_step = hp.choice(time_step) self.long_num = hp.choice(long_num,) self.cnn_height = hp.choice(cnn_height) self.cnn_hid_size = hp.choice(cnn_hid_size) self.ar_size = hp.choice(ar_size) self.past_seq_len = hp.sample_from( lambda spec: ( spec.config.long_num + 1) * spec.config.time_step) def search_space(self): return { "model": "MTNet", "lr": self.lr, "batch_size": self.batch_size, "epochs": self.epochs, "cnn_dropout": self.cnn_dropout, "rnn_dropout": self.rnn_dropout, "time_step": self.time_step, "long_num": self.long_num, "ar_size": self.ar_size, "past_seq_len": self.past_seq_len, "cnn_hid_size": self.cnn_hid_size, "cnn_height": self.cnn_height }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class TCNGridRandomRecipe(Recipe): """ Grid+Random Recipe for TCN """ # TODO: use some more generalized exp hyperparameters def __init__(self, num_rand_samples=1, training_iteration=40, batch_size=[256, 512], hidden_size=[32, 48], levels=[6, 8], kernel_size=[3, 5], dropout=[0, 0.1], lr=[0.001, 0.003] ): """ __init__() Constructor. :param num_rand_samples: number of hyper-param configurations sampled randomly :param training_iteration: no. of iterations for training (n epochs) in trials :param batch_size: grid search candidates for batch size :param hidden_size: grid search candidates for hidden size of each layer :param levels: the number of layers :param kernel_size: the kernel size of each layer :param dropout: dropout rate (1 - keep probability) :param lr: learning rate """ super(self.__class__, self).__init__() # -- run time params self.num_samples = num_rand_samples self.training_iteration = training_iteration # -- optimization params self.lr = hp.choice(lr) self.batch_size = hp.grid_search(batch_size) # ---- model params self.hidden_size = hp.grid_search(hidden_size) self.levels = hp.grid_search(levels) self.kernel_size = hp.grid_search(kernel_size) self.dropout = hp.choice(dropout) def search_space(self): return { "lr": self.lr, "batch_size": self.batch_size, "nhid": self.hidden_size, "levels": self.levels, "kernel_size": self.kernel_size, "dropout": self.dropout }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class RandomRecipe(Recipe): """ Pure random sample Recipe. Often used as baseline. """ def __init__( self, num_rand_samples=1, look_back=2, epochs=5, reward_metric=-0.05, training_iteration=10): """ __init__() Constructor. :param num_rand_samples: number of hyper-param configurations sampled randomly :param look_back:the length to look back, either a tuple with 2 int values, which is in format is (min len, max len), or a single int, which is a fixed length to look back. :param reward_metric: the rewarding metric value, when reached, stop trial :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration """ super(self.__class__, self).__init__() self.num_samples = num_rand_samples self.reward_metric = reward_metric self.training_iteration = training_iteration self.epochs = epochs self.past_seq_config = PastSeqParamHandler.get_past_seq_config( look_back) def search_space(self): return { "model": hp.choice(["LSTM", "Seq2seq"]), # --------- Vanilla LSTM model parameters "lstm_1_units": hp.choice([8, 16, 32, 64, 128]), "dropout_1": hp.uniform(0.2, 0.5), "lstm_2_units": hp.choice([8, 16, 32, 64, 128]), "dropout_2": hp.uniform(0.2, 0.5), # ----------- Seq2Seq model parameters "latent_dim": hp.choice([32, 64, 128, 256]), "dropout": hp.uniform(0.2, 0.5), # ----------- optimization parameters "lr": hp.uniform(0.001, 0.01), "batch_size": hp.choice([32, 64, 1024]), "epochs": self.epochs, "past_seq_len": self.past_seq_config, }
[docs]@deprecated('Please use `bigdl.orca.automl.hp` instead.') class BayesRecipe(Recipe): """ A Bayes search Recipe. (Experimental) """ def __init__( self, num_samples=1, look_back=2, epochs=5, reward_metric=-0.05, training_iteration=5): """ __init__() Constructor. :param num_samples: number of hyper-param configurations sampled :param look_back: the length to look back, either a tuple with 2 int values, which is in format is (min len, max len), or a single int, which is a fixed length to look back. :param reward_metric: the rewarding metric value, when reached, stop trial :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration """ super(self.__class__, self).__init__() from bigdl.nano.utils.common import invalidInputError self.num_samples = num_samples self.reward_metric = reward_metric self.training_iteration = training_iteration self.epochs = epochs if isinstance(look_back, tuple) and len(look_back) == 2 and \ isinstance(look_back[0], int) and isinstance(look_back[1], int): if look_back[1] < 2: invalidInputError(False, "The max look back value should be at least 2") if look_back[0] < 2: print("The input min look back value is smaller than 2. " "We sample from range (2, {}) instead.".format(look_back[1])) self.bayes_past_seq_config = \ {"past_seq_len_float": hp.uniform(look_back[0], look_back[1])} elif isinstance(look_back, int): if look_back < 2: invalidInputError(False, "look back value should not be smaller than 2. " "Current value is ", look_back) self.bayes_past_seq_config = {"past_seq_len": look_back} else: invalidInputError(False, "look back is {}.\n " "look_back should be either a tuple with 2 int values:" " (min_len, max_len) or a single int".format(look_back)) def search_space(self): total_params = { "epochs": self.epochs, "model": "LSTM", # --------- model parameters "lstm_1_units_float": hp.uniform(8, 128), "dropout_1": hp.uniform(0.2, 0.5), "lstm_2_units_float": hp.uniform(8, 128), "dropout_2": hp.uniform(0.2, 0.5), # ----------- optimization parameters "lr": hp.uniform(0.001, 0.1), "batch_size_float": hp.uniform(32, 128), } total_params.update(self.bayes_past_seq_config) return total_params
[docs]class XgbRegressorGridRandomRecipe(Recipe): """ Grid + Random Recipe for XGBoost Regressor. """ def __init__( self, num_rand_samples=1, n_estimators=[8, 15], max_depth=[10, 15], n_jobs=-1, tree_method='hist', random_state=2, seed=0, lr=(1e-4, 1e-1), subsample=0.8, colsample_bytree=0.8, min_child_weight=[1, 2, 3], gamma=0, reg_alpha=0, reg_lambda=1): """ Constructor. For XGBoost hyper parameters, refer to https://xgboost.readthedocs.io/en/latest/python/python_api.html for details. :param num_rand_samples: number of hyper-param configurations sampled randomly :param n_estimators: number of gradient boosted trees. :param max_depth: max tree depth :param n_jobs: number of parallel threads used to run xgboost. :param tree_method: specify which tree method to use. :param random_state: random number seed. :param seed: seed used to generate the folds :param lr: learning rate :param subsample: subsample ratio of the training instance :param colsample_bytree: subsample ratio of columns when constructing each tree. :param min_child_weight: minimum sum of instance weight(hessian) needed in a child. :param gamma: minimum loss reduction required to make a further partition on a leaf node of the tree. :param reg_alpha: L1 regularization term on weights (xgb’s alpha). :param reg_lambda: L2 regularization term on weights (xgb’s lambda). """ super(self.__class__, self).__init__() self.num_samples = num_rand_samples self.n_jobs = n_jobs self.tree_method = tree_method self.random_state = random_state self.seed = seed self.colsample_bytree = colsample_bytree self.gamma = gamma self.reg_alpha = reg_alpha self.reg_lambda = reg_lambda self.n_estimators = hp.grid_search(n_estimators) self.max_depth = hp.grid_search(max_depth) self.lr = hp.loguniform(lr[0], lr[-1]) self.subsample = subsample self.min_child_weight = hp.choice(min_child_weight) def search_space(self): return { # -------- feature related parameters "model": "XGBRegressor", "imputation": hp.choice(["LastFillImpute", "FillZeroImpute"]), "n_estimators": self.n_estimators, "max_depth": self.max_depth, "min_child_weight": self.min_child_weight, "lr": self.lr }
[docs]class XgbRegressorSkOptRecipe(Recipe): """ A recipe using SkOpt search algorithm for XGBoost Regressor. """ def __init__( self, num_rand_samples=10, n_estimators_range=(50, 1000), max_depth_range=(2, 15), lr=(1e-4, 1e-1), min_child_weight=[1, 2, 3], ): """ Constructor. :param num_rand_samples: number of hyper-param configurations sampled randomly :param n_estimators_range: range of number of gradient boosted trees. :param max_depth_range: range of max tree depth :param lr: learning rate :param min_child_weight: minimum sum of instance weight(hessian) needed in a child. """ super(self.__class__, self).__init__() self.num_samples = num_rand_samples self.n_estimators_range = n_estimators_range self.max_depth_range = max_depth_range self.lr = hp.loguniform(lr[0], lr[1]) self.min_child_weight = hp.choice(min_child_weight) def search_space(self): space = { "n_estimators": hp.randint(self.n_estimators_range[0], self.n_estimators_range[1]), "max_depth": hp.randint(self.max_depth_range[0], self.max_depth_range[1]), "min_child_weight": self.min_child_weight, "lr": self.lr } return space