How to deal with "ValueError: Domain error in arguments" while using sklearn.model_selection.RandomizedSearchCV?

373 Views Asked by At

I am applying a randomized search on hyper parameters of anSGDClassifier. However, I am not sure why randomizedsearch_estimator.fit(x_train, y_train) is not outputting correct values.

from constants import (SPLITS_NUM, SEED, N_JOBS, PROBLEM_METRIC)  
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import (KFold, RandomizedSearchCV)
from scipy.stats import (randint, uniform)  


def randomized_search(estimator, param_distributions, x_train, y_train,
                  x_validation, y_validation):
    kfold = KFold(n_splits=SPLITS_NUM, shuffle=True, random_state=SEED)
    randomizedsearch_estimator = RandomizedSearchCV(estimator,
                                                param_distributions,
                                                cv=kfold,
                                                return_train_score=True,
                                                n_jobs=N_JOBS,
                                                scoring=PROBLEM_METRIC)
    search = randomizedsearch_estimator.fit(x_train, y_train)
    print(f"Best estimator:\n{search.best_estimator_} \
       \nBest parameters:\n{search.best_params_} \
       \nBest cross-validation score: {search.best_score_:.3f} \
       \nBest test score: {search.score(x_validation, y_validation):.3f}\n\n")

def searching_list():
    return [(SGDClassifier(random_state=SEED, learning_rate='optimal', class_weight='balanced'), {
    'alpha': uniform(0.15, 0.25),
    'l1_ratio': uniform(0.002, 0.008),
    'max_iter': randint(45000, 55000),
    'tol': uniform(0.04, 0.12),
    'epsilon': uniform(45000, 55000),
    'power_t': uniform(-100000, -50000),
    'loss': [
        'hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_error',
        'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'
    ],
    'penalty': ['l2', 'l1', 'elasticnet']
})]

def parameter_initializer(features_train, target_train, features_validation,
                      target_validation):
    for model, distribution in searching_list():
        randomized_search(model, distribution, features_train, target_train, features_validation,
                      target_validation)

Traceback (most recent call last):
  File "c:\Users\username\Desktop\some-calculator\graph-analyzer\graph_analyzer.py", line 197, in <module>
    main()
  File "c:\Users\username\Desktop\some-calculator\graph-analyzer\graph_analyzer.py", line 191, in main
    predicted_class = node_class_predictor(new_graph)
  File "c:\Users\username\Desktop\some-calculator\graph-analyzer\utilities_module.py", line 3379, in node_class_predictor
    x_test, y_test, clf = custom_classifier(emb_df)
  File "c:\Users\username\Desktop\some-calculator\graph-analyzer\utilities_module.py", line 3337, in custom_classifier
    parameter_initializer(x_train, y_train, x_test, y_test)
  File "c:\Users\username\Desktop\some-calculator\graph-analyzer\utilities_module.py", line 3129, in parameter_initializer
    randomized_search(model, distribution, features_train, target_train, features_validation,
  File "c:\Users\username\Desktop\some-calculator\graph-analyzer\utilities_module.py", line 3079, in randomized_search
    search = randomizedsearch_estimator.fit(x_train, y_train)
  File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_search.py", line 875, in fit
    self._run_search(evaluate_candidates)
  File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_search.py", line 1749, in _run_search   
    evaluate_candidates(
  File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_search.py", line 811, in evaluate_candidatesates
    candidate_params = list(candidate_params)
  File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_search.py", line 324, in __iter__       
    params[k] = v.rvs(random_state=rng)
  File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\scipy\stats\_distn_infrastructure.py", line 473, in rvs
    return self.dist.rvs(*self.args, **kwds)
  File "C:\ProgramData\Anaconda3\envs\tf\lib\site-packages\scipy\stats\_distn_infrastructure.py", line 1068, in rvs
    raise ValueError("Domain error in arguments.")
ValueError: Domain error in arguments.
1

There are 1 best solutions below

1
Sohail Mohammad On

when i got domain error it was due to me entering hyperparameter ranges in reverse order e.g., optimize__lr:reciprocal(3e-3,3e-4) and when changed to reciprocal(3e-4,3e-1) it was solved. so i suggest tinkering with the hyperparameter values and reversing them