I am using Bayesian optimization to get best hyperparameters with the smallest loss. The code is:
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from hyperopt import STATUS_OK
from hyperopt import fmin, tpe, hp
# Define the hyperparameter space
space = {
'max_depth': hp.choice('max_depth', np.arange(5, 8, dtype=int)),
'max_leaves': hp.choice('max_leaves', np.arange(2, 10, dtype=int)),
'num_parallel_tree': hp.choice('num_parallel_tree', np.arange(7, 11, dtype=int)),
'n_estimators': hp.choice('n_estimators', np.arange(500, 800, dtype=int)),
'max_bin': hp.choice('max_bin', np.arange(100, 300, dtype=int)),
'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
'subsample': hp.uniform('subsample', 0.5, 1),
'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1),
'colsample_bylevel': hp.uniform('colsample_bylevel', 0.8, 1),
'colsample_bynode': hp.uniform('colsample_bynode', 0.8, 1),
'min_split_loss': hp.uniform('min_split_loss', 0.8, 1),
'reg_alpha': hp.uniform('reg_alpha', 0.5, 5),
'reg_lambda': hp.uniform('reg_lambda', 1, 3),
'max_delta_step': hp.uniform('max_delta_step', 0.6, 1)
}
# Define the objective function to minimize
def objective(params):
xgb_model = xgb.XGBRegressor(**params)
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
score = mean_squared_error(y_test, y_pred)
return {'loss': score, 'status': STATUS_OK}
# Perform the optimization
best_params = fmin(objective, space, algo=tpe.suggest, max_evals=5000)
print("Best set of hyperparameters: ", best_params)
The result is decent:
100%|██████████| 5000/5000 [12:17:11<00:00, 8.85s/trial, best loss: 0.17949651181697845]
But when I use those best parameters and try to recreate the loss, it is much worse.
RMSE: 0.32463914
Train RMSE: 0.0049098525
Code:
params = best_params
model = xgb.XGBRegressor(**params)
model.fit(X_train, y_train)
outputs = model.predict(X_test)
print(f"RMSE:", mean_squared_error(y_test, outputs))
print(f"Train RMSE:", mean_squared_error(y_train, model.predict(X_train)))
I don't see why this would give me a different result, because rerunning fit with same parameters always gives the same result