import pandas as pd import numpy as np import math import warnings import lightgbm as lgb from sklearn.model_selection import GridSearchCV from sklearn.model_selection import RandomizedSearchCV lg = lgb.LGBMClassifier(silent=False) param_dist = {"max_depth": [2, 3, 4, 5, 7, 10], "n_estimators": [50, 100, 150, 200], "min_child_samples": [2,3,4,5,6] } grid_search = GridSearchCV(estimator=lg, n_jobs=10, param_grid=param_dist, cv = 5, scoring='f1', verbose=5) grid_search.fit(X_train, y) grid_search.best_estimator_, grid_search.best_score_ # Fitting 5 folds for each of 120 candidates, totalling 600 fits # [Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers. # [Parallel(n_jobs=10)]: Done 52 tasks | elapsed: 2.5s # [Parallel(n_jobs=10)]: Done 142 tasks | elapsed: 6.6s # [Parallel(n_jobs=10)]: Done 268 tasks | elapsed: 14.0s # [Parallel(n_jobs=10)]: Done 430 tasks | elapsed: 25.5s # [Parallel(n_jobs=10)]: Done 600 out of 600 | elapsed: 40.6s finished # (LGBMClassifier(max_depth=10, min_child_samples=6, n_estimators=200, # silent=False), 0.6359524127649383) |
Better than grid search in various senses but still expensive to guarantee good coverage |
import pandas as pd import numpy as np import math import warnings import lightgbm as lgb from scipy.stats import uniform from sklearn.model_selection import GridSearchCV from sklearn.model_selection import RandomizedSearchCV lg = lgb.LGBMClassifier(silent=False) param_dist = {"max_depth": range(2,15,1), "n_estimators": range(50,200,4), "min_child_samples": [2,3,4,5,6], } random_search = RandomizedSearchCV(estimator=lg, n_jobs=10, param_distparam_distributions=param_dist, n_iter=100, cv = 5, scoring='f1', verbose=5) random_search.fit(X_train, y) random_search.best_estimator_, random_search.best_score_ # Fitting 5 folds for each of 100 candidates, totalling 500 fits # [Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers. # [Parallel(n_jobs=10)]: Done 52 tasks | elapsed: 6.6s # [Parallel(n_jobs=10)]: Done 142 tasks | elapsed: 12.9s # [Parallel(n_jobs=10)]: Done 268 tasks | elapsed: 22.9s # [Parallel(n_jobs=10)]: Done 430 tasks | elapsed: 36.2s # [Parallel(n_jobs=10)]: Done 500 out of 500 | elapsed: 42.0s finished # (LGBMClassifier(max_depth=11, min_child_samples=3, n_estimators=198, # silent=False), 0.628180299445963) |
pip install gpyopt pip install bayesian-optimization pip install scikit-optimize |
import GPy import GPyOpt from GPyOpt.methods import BayesianOptimization from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn.datasets import load_iris from xgboost import XGBRegressor import numpy as np iris = load_iris() X = iris.data y = iris.target x_train, x_test, y_train, y_test = train_test_split(X,y,test_size = 0.3,random_state = 14) # 超参数搜索空间 bds = [{'name': 'learning_rate', 'type': 'continuous', 'domain': (0, 1)}, {'name': 'gamma', 'type': 'continuous', 'domain': (0, 5)}, {'name': 'max_depth', 'type': 'continuous', 'domain': (1, 50)}] # Optimization objective 模型F def cv_score(parameters): parametersparameters = parameters[0] score = cross_val_score( XGBRegressor(learning_rate=parameters[0], gamma=int(parameters[1]), max_depth=int(parameters[2])), X, y, scoring='neg_mean_squared_error').mean() score = np.array(score) return score # acquisition就是选择不同的Acquisition Function optimizer = GPyOpt.methods.BayesianOptimization(f = cv_score, # function to optimize domain = bds, # box-constraints of the problem acquisition_type ='LCB', # LCB acquisition acquisition_weight = 0.1) # Exploration exploitation x_best = optimizer.X[np.argmax(optimizer.Y)] print("Best parameters: learning_rate="+str(x_best[0])+",gamma="+str(x_best[1])+",max_depth="+str(x_best[2])) # Best parameters: learning_rate=0.4272184438229706,gamma=1.4805727469635759,max_depth=41.8460390442754 |
from sklearn.datasets import make_classification from xgboost import XGBRegressor from sklearn.model_selection import cross_val_score from bayes_opt import BayesianOptimization iris = load_iris() X = iris.data y = iris.target x_train, x_test, y_train, y_test = train_test_split(X,y,test_size = 0.3,random_state = 14) bds ={'learning_rate': (0, 1), 'gamma': (0, 5), 'max_depth': (1, 50)} # Optimization objective def cv_score(learning_rate, gamma, max_depth): score = cross_val_score( XGBRegressor(learning_ratelearning_rate=learning_rate, gamma=int(gamma), max_depth=int(max_depth)), X, y, scoring='neg_mean_squared_error').mean() score = np.array(score) return score rf_bo = BayesianOptimization( cv_score, bds ) rf_bo.maximize() rf_bo.max | iter | target | gamma | learni... | max_depth | ------------------------------------------------------------- | 1 | -0.0907 | 0.7711 | 0.1819 | 20.33 | | 2 | -0.1339 | 4.933 | 0.6599 | 8.972 | | 3 | -0.07285 | 1.55 | 0.8247 | 33.94 | | 4 | -0.1359 | 4.009 | 0.3994 | 25.55 | | 5 | -0.08773 | 1.666 | 0.9551 | 48.67 | | 6 | -0.05654 | 0.0398 | 0.3707 | 1.221 | | 7 | -0.08425 | 0.6883 | 0.2564 | 33.25 | | 8 | -0.1113 | 3.071 | 0.8913 | 1.051 | | 9 | -0.9167 | 0.0 | 0.0 | 2.701 | | 10 | -0.05267 | 0.0538 | 0.1293 | 1.32 | | 11 | -0.08506 | 1.617 | 1.0 | 32.68 | | 12 | -0.09036 | 2.483 | 0.2906 | 33.21 | | 13 | -0.08969 | 0.4662 | 0.3612 | 34.74 | | 14 | -0.0723 | 1.295 | 0.2061 | 1.043 | | 15 | -0.07531 | 1.903 | 0.1182 | 35.11 | | 16 | -0.08494 | 2.977 | 1.0 | 34.57 | | 17 | -0.08506 | 1.231 | 1.0 | 36.05 | | 18 | -0.07023 | 2.81 | 0.838 | 36.16 | | 19 | -0.9167 | 1.94 | 0.0 | 36.99 | | 20 | -0.09041 | 3.894 | 0.9442 | 35.52 | | 21 | -0.1182 | 3.188 | 0.01882 | 35.14 | | 22 | -0.08521 | 0.931 | 0.05693 | 31.66 | | 23 | -0.1003 | 2.26 | 0.07555 | 31.78 | | 24 | -0.1018 | 0.08563 | 0.9838 | 32.22 | | 25 | -0.1017 | 0.8288 | 0.9947 | 30.57 | | 26 | -0.9167 | 1.943 | 0.0 | 30.2 | | 27 | -0.08506 | 1.518 | 1.0 | 35.04 | | 28 | -0.08494 | 3.464 | 1.0 | 32.36 | | 29 | -0.1224 | 4.296 | 0.4472 | 33.47 | | 30 | -0.1017 | 0.0 | 1.0 | 35.86 | ============================================================= {'target': -0.052665895082105285, 'params': {'gamma': 0.05379782654053811, 'learning_rate': 0.1292986176550608, 'max_depth': 1.3198257775801387}} |
欢迎光临 51Testing软件测试论坛 (http://bbs.51testing.com/) | Powered by Discuz! X3.2 |