Hyperopt Tuning

405 Views Asked by At

I am trying to do hyper paramter tuning with Hyperopt on latest version of both scikit learn and hyperopt. While doing it, it is showing an error in using max_features parameter as list ['auto','sqrt','log2']. It is saying it should be int.

from hyperopt import hp,fmin,tpe,STATUS_OK,Trials
space={
    'criterion':hp.choice('criterion',['entropy','gini']),
    'max_depth':hp.quniform('max_depth',10,1200,10),
    'max_features':hp.choice('max_features',['auto','sqrt','log2',None]),
    'min_samples_leaf':hp.uniform('min_samples_leaf',0,0.5),
    'min_samples_split':hp.uniform('min_samples_split',0,1),
    'n_estimators':hp.choice('n_estimators',[10,50,300,750,1200,1300,1800,2000])
}
def objective(space):
    model=RandomForestClassifier(criterion=space['criterion'],
                                max_depth=int(space['max_depth']),
                                #max_features=space['max_features'],
                                 min_samples_leaf=space['min_samples_leaf'],
                                 min_samples_split=space['min_samples_split'],
                                 n_estimators=space['n_estimators']                                 
                                )
    accuracy=cross_val_score(model,X_train,Y_train,cv=5).mean()
    return {'loss':-accuracy,'status':STATUS_OK}
trails=Trials()
best=fmin(fn=objective,
         space=space,
         algo=tpe.suggest,
         max_evals=80,
         trials=trails)
best

Error I am getting:

InvalidParameterError: The 'max_features' parameter of RandomForestClassifier must be an int in the range [1, inf), a float in the range (0.0, 1.0], a str among {'sqrt', 'log2'} or None. Got 'auto' instead.

Also if I comment out max_features the code runs perfect.

---------------------------------------------------------------------------
InvalidParameterError                     Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_35460\1441699718.py in <module>
     23 
     24 # Step 5: Run Bayesian Optimization
---> 25 best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100)
     26 
     27 # Step 6: Retrieve best hyperparameters

~\Anaconda3\lib\site-packages\hyperopt\fmin.py in fmin(fn, space, algo, max_evals, timeout, loss_threshold, trials, rstate, allow_trials_fmin, pass_expr_memo_ctrl, catch_eval_exceptions, verbose, return_argmin, points_to_evaluate, max_queue_len, show_progressbar, early_stop_fn, trials_save_file)
    584 
    585     # next line is where the fmin is actually executed
--> 586     rval.exhaust()
    587 
    588     if return_argmin:

~\Anaconda3\lib\site-packages\hyperopt\fmin.py in exhaust(self)
    362     def exhaust(self):
    363         n_done = len(self.trials)
--> 364         self.run(self.max_evals - n_done, block_until_done=self.asynchronous)
    365         self.trials.refresh()
    366         return self

~\Anaconda3\lib\site-packages\hyperopt\fmin.py in run(self, N, block_until_done)
    298                 else:
    299                     # -- loop over trials and do the jobs directly
--> 300                     self.serial_evaluate()
    301 
    302                 self.trials.refresh()

~\Anaconda3\lib\site-packages\hyperopt\fmin.py in serial_evaluate(self, N)
    176                 ctrl = base.Ctrl(self.trials, current_trial=trial)
    177                 try:
--> 178                     result = self.domain.evaluate(spec, ctrl)
    179                 except Exception as e:
    180                     logger.error("job exception: %s" % str(e))

~\Anaconda3\lib\site-packages\hyperopt\base.py in evaluate(self, config, ctrl, attach_attachments)
    890                 print_node_on_error=self.rec_eval_print_node_on_error,
    891             )
--> 892             rval = self.fn(pyll_rval)
    893 
    894         if isinstance(rval, (float, int, np.number)):

~\AppData\Local\Temp\ipykernel_35460\1441699718.py in objective(params)
      7 def objective(params):
      8     clf = RandomForestClassifier(**params)
----> 9     clf.fit(X_train, Y_train)
     10     y_pred = clf.predict(X_test)
     11     accuracy = accuracy_score(Y_test, y_pred)

~\Anaconda3\lib\site-packages\sklearn\base.py in wrapper(estimator, *args, **kwargs)
   1142 
   1143             if not global_skip_validation and not partial_fit_and_fitted:
-> 1144                 estimator._validate_params()
   1145 
   1146             with config_context(

~\Anaconda3\lib\site-packages\sklearn\base.py in _validate_params(self)
    635         accepted constraints.
    636         """
--> 637         validate_parameter_constraints(
    638             self._parameter_constraints,
    639             self.get_params(deep=False),

~\Anaconda3\lib\site-packages\sklearn\utils\_param_validation.py in validate_parameter_constraints(parameter_constraints, params, caller_name)
     93                 )
     94 
---> 95             raise InvalidParameterError(
     96                 f"The {param_name!r} parameter of {caller_name} must be"
     97                 f" {constraints_str}. Got {param_val!r} instead."

InvalidParameterError: The 'max_features' parameter of RandomForestClassifier must be an int in the range [1, inf), a float in the range (0.0, 1.0], a str among {'sqrt', 'log2'} or None. Got 'auto' instead.
1

There are 1 best solutions below

0
Muhammed Yunus On

RandomForestClassifier doesn't accept "auto" for max_features=, so you need to remove "auto" from the list of parameters. The only acceptable values there are "sqrt", "log2", None, and an integer or float.

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html