pyscinloopsr.before
Module before for pre-analysis
1""" 2Module `before` for pre-analysis 3""" 4 5import pandas as pd 6import numpy as np 7import seaborn as snsi 8import matplotlib.pyplot as plt 9from matplotlib.pylab import rcParams 10#rcParams["figure.figsize"] = 15,6 11#plt.figure(figsize=(15, 10)) 12 13class view: 14 """👁️🗨️👁️🗨️""" 15 16 def __init__(self, dfx): 17 """Initiate the object with your predictors DataFrame""" 18 self.xdf = dfx 19 """Internal predictors Dataframe name""" 20 21 def box(self, feature): 22 """Send the feature name to boxplot""" 23 plt.figure(figsize=(5, 5)) 24 self.xdf.boxplot(feature) 25 def scatter(self, featureA, featureB): 26 """Send the two feature names to scatterplot""" 27 plt.figure(figsize=(5, 5)) 28 self.xdf.plot.scatter(x=featureA, y = featureB, color='r') 29 def correlation(self): 30 """Method to print the correlation matrix from your Dataframe""" 31 plt.figure(figsize=(8, 8)) 32 snsi.heatmap(self.xdf.corr().abs(), annot=True, center=1, fmt=".2f", cmap='Greens') 33 34 35 36class regressors: 37 """Class with a list of simplified call to a list of Regressors to build a reference on performance""" 38 39 def __init__(self): 40 self.model = None 41 """Internal model reference""" 42 self.prettytable = None 43 """Internal PrettyTable reference""" 44 45 46 47 48 49 50 51 def runBasicRegressors(self, dfxbr, dfybr, featuresDrop=None): 52 """ 53 Method to fit the regressors\n 54 Args: predictors DataFrame, target DataFrame, optional list of features names to drop 55 """ 56 57 if featuresDrop is not None: 58 for i in featuresDrop: 59 print("\ndropping: ", i) 60 dfxbr = dfxbr.drop(i, axis=1) 61 print("\n") 62 63 from sklearn.ensemble import RandomForestRegressor, BaggingRegressor 64 from sklearn.ensemble import AdaBoostRegressor 65 from sklearn.ensemble import GradientBoostingRegressor 66 from sklearn.tree import DecisionTreeRegressor 67 from sklearn.neighbors import KNeighborsRegressor 68 from sklearn.linear_model import LogisticRegression 69 from sklearn.linear_model import LinearRegression 70 from sklearn.linear_model import Lasso, ElasticNet, Ridge, SGDRegressor 71 from sklearn.svm import SVR, NuSVR 72 from sklearn.neural_network import MLPRegressor 73 from datetime import datetime 74 75 from sklearn import metrics 76 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 77 78 79 80 81 rfr = RandomForestRegressor() 82 gbr = GradientBoostingRegressor() 83 abr = AdaBoostRegressor() 84 85 86 import numpy as np 87 88 89 models = [ 90 91 92 SVR(gamma='auto', kernel='linear', C=1e-1), 93 SVR(gamma=1e-1, kernel='linear', C=1e-1), 94 SVR(gamma=1, kernel='linear', C=1e-1), 95 SVR(gamma=10, kernel='linear', C=1e-1), 96 97 SVR(gamma='auto', kernel='linear', C=1), 98 SVR(gamma=1e-1, kernel='linear', C=1), 99 SVR(gamma=1, kernel='linear', C=1), 100 SVR(gamma=10, kernel='linear', C=1), 101 102 SVR(gamma='auto', kernel='linear', C=10), 103 SVR(gamma=1e-1, kernel='linear', C=10), 104 SVR(gamma=1, kernel='linear', C=10), 105 SVR(gamma=10, kernel='linear', C=10), 106 107 SVR(gamma='auto', kernel='rbf', C=1e-1), 108 SVR(gamma=1e-1, kernel='rbf', C=1e-1), 109 SVR(gamma=1, kernel='rbf', C=1e-1), 110 SVR(gamma=10, kernel='rbf', C=1e-1), 111 112 SVR(gamma='auto', kernel='rbf', C=1), 113 SVR(gamma=1e-1, kernel='rbf', C=1), 114 SVR(gamma=1, kernel='rbf', C=1), 115 SVR(gamma=10, kernel='rbf', C=1), 116 117 SVR(gamma='auto', kernel='rbf', C=10), 118 SVR(gamma=1e-1, kernel='rbf', C=10), 119 SVR(gamma=1, kernel='rbf', C=10), 120 SVR(gamma=10, kernel='rbf', C=10), 121 122 MLPRegressor(random_state=None, max_iter=100, activation='relu', learning_rate_init=0.001), 123 MLPRegressor(random_state=None, max_iter=200, activation='relu', learning_rate_init=0.01), 124 MLPRegressor(random_state=None, max_iter=800, activation='relu', learning_rate_init=0.1), 125 126 MLPRegressor(random_state=None, max_iter=50, activation='tanh', learning_rate_init=0.001), 127 MLPRegressor(random_state=None, max_iter=100, activation='tanh', learning_rate_init=0.001), 128 MLPRegressor(random_state=None, max_iter=150, activation='tanh', learning_rate_init=0.001), 129 130 MLPRegressor(random_state=None, max_iter=200, activation='tanh', learning_rate_init=0.01), 131 MLPRegressor(random_state=None, max_iter=800, activation='tanh', learning_rate_init=0.1), 132 133 MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.001), 134 135 MLPRegressor(random_state=None, max_iter=50, activation='logistic', learning_rate_init=0.01), 136 MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.01), 137 MLPRegressor(random_state=None, max_iter=150, activation='logistic', learning_rate_init=0.01), 138 139 MLPRegressor(random_state=None, max_iter=800, activation='logistic', learning_rate_init=0.1), 140 141 142 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.01), 143 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.0001), 144 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.000001), 145 146 147 148 RandomForestRegressor( random_state=None, n_estimators=10), 149 RandomForestRegressor( random_state=None, n_estimators=40), 150 RandomForestRegressor( random_state=None, n_estimators=60), 151 RandomForestRegressor( random_state=None, n_estimators=80), 152 RandomForestRegressor( random_state=None, n_estimators=100), 153 RandomForestRegressor( random_state=None, n_estimators=150), 154 RandomForestRegressor( random_state=None, n_estimators=200), 155 RandomForestRegressor( random_state=None, n_estimators=300), 156 RandomForestRegressor( random_state=None, n_estimators=400), 157 RandomForestRegressor( random_state=None, n_estimators=500), 158 RandomForestRegressor( random_state=None, n_estimators=600), 159 RandomForestRegressor( random_state=None, n_estimators=700), 160 RandomForestRegressor( random_state=None, n_estimators=800), 161 162 Lasso(alpha=0.05), 163 Lasso(alpha=0.1), 164 Lasso(alpha=0.15), 165 Lasso(alpha=0.2), 166 167 Ridge(alpha=.1), 168 Ridge(alpha=.2), 169 Ridge(alpha=.3), 170 Ridge(alpha=.4), 171 Ridge(alpha=.5), 172 Ridge(alpha=.6), 173 Ridge(alpha=.7), 174 175 ElasticNet(), 176 BaggingRegressor(), 177 NuSVR(gamma='auto'), 178 179 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=3), 180 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=4), 181 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=5), 182 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=3), 183 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=4), 184 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=5), 185 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=3), 186 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=4), 187 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=5), 188 189 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3), 190 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=4), 191 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5), 192 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=3), 193 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=4), 194 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=5), 195 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=3), 196 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=4), 197 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=5), 198 199 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=3), 200 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=4), 201 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=5), 202 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=3), 203 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=4), 204 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=5), 205 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=3), 206 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=4), 207 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=5), 208 209 210 AdaBoostRegressor(n_estimators=5, learning_rate=0.1), 211 AdaBoostRegressor(n_estimators=5, learning_rate=0.05), 212 AdaBoostRegressor(n_estimators=15, learning_rate=0.1), 213 AdaBoostRegressor(n_estimators=15, learning_rate=0.05), 214 AdaBoostRegressor(n_estimators=40, learning_rate=0.1), 215 AdaBoostRegressor(n_estimators=40, learning_rate=0.05), 216 217 218 LinearRegression(), 219 220 KNeighborsRegressor(n_neighbors=5), 221 KNeighborsRegressor(n_neighbors=10) 222 223 ] 224 225 226 227 228 from sklearn.model_selection import train_test_split 229 230 231 # split the data into training and testing sets 232 X_train, X_test, y_train, y_test = train_test_split(dfxbr, dfybr, test_size=0.2, random_state=42) 233 234 235 236 class ConsoleColor: 237 # Color 238 BLACK = '\033[90m' 239 RED = '\033[91m' 240 GREEN = '\033[92m' 241 YELLOW = '\033[93m' 242 BLUE = '\033[94m' 243 PURPLE = '\033[95m' 244 CYAN = '\033[96m' 245 GRAY = '\033[97m' 246 247 # Style 248 BOLD = '\033[1m' 249 UNDERLINE = '\033[4m' 250 251 # BackgroundColor 252 BgBLACK = '\033[40m' 253 BgRED = '\033[41m' 254 BgGREEN = '\033[42m' 255 BgORANGE = '\033[43m' 256 BgBLUE = '\033[44m' 257 BgPURPLE = '\033[45m' 258 BgCYAN = '\033[46m' 259 BgGRAY = '\033[47m' 260 261 # End 262 END = '\033[0m' 263 264 265 266 267 268 269 from prettytable import PrettyTable 270 table = PrettyTable() 271 table.field_names = ["Model", "MSE", "SCORE", "R2", "TrainingTime"] 272 273 274 275 j = 1 276 for i in models: 277 then = datetime.now() 278 279 i.fit(X_train, y_train.values.ravel()) 280 y_res = i.predict(X_test) 281 282 now = datetime.now() 283 print ("Processing time of ",type(i).__name__,": ", now-then) 284 mse = mean_squared_error(y_test, y_res) 285 r2 = metrics.r2_score(y_test, y_res) 286 score = i.score(X_test, y_test) 287 288 newLine = [type(i).__name__, format(mse, ',.2f'), format(score, '.2f'), format(r2, '.2f'), now-then] 289 290 if r2>0.9: 291 newLine[3] = ConsoleColor.BLUE + newLine[3] + ConsoleColor.END 292 elif r2>0.8: 293 newLine[3] = ConsoleColor.GREEN + newLine[3] + ConsoleColor.END 294 elif r2>0.6: 295 newLine[3] = ConsoleColor.PURPLE + newLine[3] + ConsoleColor.END 296 elif r2>0.4: 297 newLine[3] = ConsoleColor.RED + newLine[3] + ConsoleColor.END 298 299 300 301 302 303 table.add_row(newLine) 304 305 306 307 j = j + 1 308 309 310 self.prettytable = table 311 print(self.prettytable) 312 313 return self.prettytable
class
view:
14class view: 15 """👁️🗨️👁️🗨️""" 16 17 def __init__(self, dfx): 18 """Initiate the object with your predictors DataFrame""" 19 self.xdf = dfx 20 """Internal predictors Dataframe name""" 21 22 def box(self, feature): 23 """Send the feature name to boxplot""" 24 plt.figure(figsize=(5, 5)) 25 self.xdf.boxplot(feature) 26 def scatter(self, featureA, featureB): 27 """Send the two feature names to scatterplot""" 28 plt.figure(figsize=(5, 5)) 29 self.xdf.plot.scatter(x=featureA, y = featureB, color='r') 30 def correlation(self): 31 """Method to print the correlation matrix from your Dataframe""" 32 plt.figure(figsize=(8, 8)) 33 snsi.heatmap(self.xdf.corr().abs(), annot=True, center=1, fmt=".2f", cmap='Greens')
👁️🗨️👁️🗨️
view(dfx)
17 def __init__(self, dfx): 18 """Initiate the object with your predictors DataFrame""" 19 self.xdf = dfx 20 """Internal predictors Dataframe name"""
Initiate the object with your predictors DataFrame
def
box(self, feature):
22 def box(self, feature): 23 """Send the feature name to boxplot""" 24 plt.figure(figsize=(5, 5)) 25 self.xdf.boxplot(feature)
Send the feature name to boxplot
class
regressors:
37class regressors: 38 """Class with a list of simplified call to a list of Regressors to build a reference on performance""" 39 40 def __init__(self): 41 self.model = None 42 """Internal model reference""" 43 self.prettytable = None 44 """Internal PrettyTable reference""" 45 46 47 48 49 50 51 52 def runBasicRegressors(self, dfxbr, dfybr, featuresDrop=None): 53 """ 54 Method to fit the regressors\n 55 Args: predictors DataFrame, target DataFrame, optional list of features names to drop 56 """ 57 58 if featuresDrop is not None: 59 for i in featuresDrop: 60 print("\ndropping: ", i) 61 dfxbr = dfxbr.drop(i, axis=1) 62 print("\n") 63 64 from sklearn.ensemble import RandomForestRegressor, BaggingRegressor 65 from sklearn.ensemble import AdaBoostRegressor 66 from sklearn.ensemble import GradientBoostingRegressor 67 from sklearn.tree import DecisionTreeRegressor 68 from sklearn.neighbors import KNeighborsRegressor 69 from sklearn.linear_model import LogisticRegression 70 from sklearn.linear_model import LinearRegression 71 from sklearn.linear_model import Lasso, ElasticNet, Ridge, SGDRegressor 72 from sklearn.svm import SVR, NuSVR 73 from sklearn.neural_network import MLPRegressor 74 from datetime import datetime 75 76 from sklearn import metrics 77 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 78 79 80 81 82 rfr = RandomForestRegressor() 83 gbr = GradientBoostingRegressor() 84 abr = AdaBoostRegressor() 85 86 87 import numpy as np 88 89 90 models = [ 91 92 93 SVR(gamma='auto', kernel='linear', C=1e-1), 94 SVR(gamma=1e-1, kernel='linear', C=1e-1), 95 SVR(gamma=1, kernel='linear', C=1e-1), 96 SVR(gamma=10, kernel='linear', C=1e-1), 97 98 SVR(gamma='auto', kernel='linear', C=1), 99 SVR(gamma=1e-1, kernel='linear', C=1), 100 SVR(gamma=1, kernel='linear', C=1), 101 SVR(gamma=10, kernel='linear', C=1), 102 103 SVR(gamma='auto', kernel='linear', C=10), 104 SVR(gamma=1e-1, kernel='linear', C=10), 105 SVR(gamma=1, kernel='linear', C=10), 106 SVR(gamma=10, kernel='linear', C=10), 107 108 SVR(gamma='auto', kernel='rbf', C=1e-1), 109 SVR(gamma=1e-1, kernel='rbf', C=1e-1), 110 SVR(gamma=1, kernel='rbf', C=1e-1), 111 SVR(gamma=10, kernel='rbf', C=1e-1), 112 113 SVR(gamma='auto', kernel='rbf', C=1), 114 SVR(gamma=1e-1, kernel='rbf', C=1), 115 SVR(gamma=1, kernel='rbf', C=1), 116 SVR(gamma=10, kernel='rbf', C=1), 117 118 SVR(gamma='auto', kernel='rbf', C=10), 119 SVR(gamma=1e-1, kernel='rbf', C=10), 120 SVR(gamma=1, kernel='rbf', C=10), 121 SVR(gamma=10, kernel='rbf', C=10), 122 123 MLPRegressor(random_state=None, max_iter=100, activation='relu', learning_rate_init=0.001), 124 MLPRegressor(random_state=None, max_iter=200, activation='relu', learning_rate_init=0.01), 125 MLPRegressor(random_state=None, max_iter=800, activation='relu', learning_rate_init=0.1), 126 127 MLPRegressor(random_state=None, max_iter=50, activation='tanh', learning_rate_init=0.001), 128 MLPRegressor(random_state=None, max_iter=100, activation='tanh', learning_rate_init=0.001), 129 MLPRegressor(random_state=None, max_iter=150, activation='tanh', learning_rate_init=0.001), 130 131 MLPRegressor(random_state=None, max_iter=200, activation='tanh', learning_rate_init=0.01), 132 MLPRegressor(random_state=None, max_iter=800, activation='tanh', learning_rate_init=0.1), 133 134 MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.001), 135 136 MLPRegressor(random_state=None, max_iter=50, activation='logistic', learning_rate_init=0.01), 137 MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.01), 138 MLPRegressor(random_state=None, max_iter=150, activation='logistic', learning_rate_init=0.01), 139 140 MLPRegressor(random_state=None, max_iter=800, activation='logistic', learning_rate_init=0.1), 141 142 143 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.01), 144 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.0001), 145 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.000001), 146 147 148 149 RandomForestRegressor( random_state=None, n_estimators=10), 150 RandomForestRegressor( random_state=None, n_estimators=40), 151 RandomForestRegressor( random_state=None, n_estimators=60), 152 RandomForestRegressor( random_state=None, n_estimators=80), 153 RandomForestRegressor( random_state=None, n_estimators=100), 154 RandomForestRegressor( random_state=None, n_estimators=150), 155 RandomForestRegressor( random_state=None, n_estimators=200), 156 RandomForestRegressor( random_state=None, n_estimators=300), 157 RandomForestRegressor( random_state=None, n_estimators=400), 158 RandomForestRegressor( random_state=None, n_estimators=500), 159 RandomForestRegressor( random_state=None, n_estimators=600), 160 RandomForestRegressor( random_state=None, n_estimators=700), 161 RandomForestRegressor( random_state=None, n_estimators=800), 162 163 Lasso(alpha=0.05), 164 Lasso(alpha=0.1), 165 Lasso(alpha=0.15), 166 Lasso(alpha=0.2), 167 168 Ridge(alpha=.1), 169 Ridge(alpha=.2), 170 Ridge(alpha=.3), 171 Ridge(alpha=.4), 172 Ridge(alpha=.5), 173 Ridge(alpha=.6), 174 Ridge(alpha=.7), 175 176 ElasticNet(), 177 BaggingRegressor(), 178 NuSVR(gamma='auto'), 179 180 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=3), 181 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=4), 182 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=5), 183 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=3), 184 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=4), 185 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=5), 186 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=3), 187 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=4), 188 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=5), 189 190 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3), 191 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=4), 192 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5), 193 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=3), 194 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=4), 195 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=5), 196 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=3), 197 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=4), 198 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=5), 199 200 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=3), 201 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=4), 202 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=5), 203 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=3), 204 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=4), 205 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=5), 206 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=3), 207 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=4), 208 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=5), 209 210 211 AdaBoostRegressor(n_estimators=5, learning_rate=0.1), 212 AdaBoostRegressor(n_estimators=5, learning_rate=0.05), 213 AdaBoostRegressor(n_estimators=15, learning_rate=0.1), 214 AdaBoostRegressor(n_estimators=15, learning_rate=0.05), 215 AdaBoostRegressor(n_estimators=40, learning_rate=0.1), 216 AdaBoostRegressor(n_estimators=40, learning_rate=0.05), 217 218 219 LinearRegression(), 220 221 KNeighborsRegressor(n_neighbors=5), 222 KNeighborsRegressor(n_neighbors=10) 223 224 ] 225 226 227 228 229 from sklearn.model_selection import train_test_split 230 231 232 # split the data into training and testing sets 233 X_train, X_test, y_train, y_test = train_test_split(dfxbr, dfybr, test_size=0.2, random_state=42) 234 235 236 237 class ConsoleColor: 238 # Color 239 BLACK = '\033[90m' 240 RED = '\033[91m' 241 GREEN = '\033[92m' 242 YELLOW = '\033[93m' 243 BLUE = '\033[94m' 244 PURPLE = '\033[95m' 245 CYAN = '\033[96m' 246 GRAY = '\033[97m' 247 248 # Style 249 BOLD = '\033[1m' 250 UNDERLINE = '\033[4m' 251 252 # BackgroundColor 253 BgBLACK = '\033[40m' 254 BgRED = '\033[41m' 255 BgGREEN = '\033[42m' 256 BgORANGE = '\033[43m' 257 BgBLUE = '\033[44m' 258 BgPURPLE = '\033[45m' 259 BgCYAN = '\033[46m' 260 BgGRAY = '\033[47m' 261 262 # End 263 END = '\033[0m' 264 265 266 267 268 269 270 from prettytable import PrettyTable 271 table = PrettyTable() 272 table.field_names = ["Model", "MSE", "SCORE", "R2", "TrainingTime"] 273 274 275 276 j = 1 277 for i in models: 278 then = datetime.now() 279 280 i.fit(X_train, y_train.values.ravel()) 281 y_res = i.predict(X_test) 282 283 now = datetime.now() 284 print ("Processing time of ",type(i).__name__,": ", now-then) 285 mse = mean_squared_error(y_test, y_res) 286 r2 = metrics.r2_score(y_test, y_res) 287 score = i.score(X_test, y_test) 288 289 newLine = [type(i).__name__, format(mse, ',.2f'), format(score, '.2f'), format(r2, '.2f'), now-then] 290 291 if r2>0.9: 292 newLine[3] = ConsoleColor.BLUE + newLine[3] + ConsoleColor.END 293 elif r2>0.8: 294 newLine[3] = ConsoleColor.GREEN + newLine[3] + ConsoleColor.END 295 elif r2>0.6: 296 newLine[3] = ConsoleColor.PURPLE + newLine[3] + ConsoleColor.END 297 elif r2>0.4: 298 newLine[3] = ConsoleColor.RED + newLine[3] + ConsoleColor.END 299 300 301 302 303 304 table.add_row(newLine) 305 306 307 308 j = j + 1 309 310 311 self.prettytable = table 312 print(self.prettytable) 313 314 return self.prettytable
Class with a list of simplified call to a list of Regressors to build a reference on performance
def
runBasicRegressors(self, dfxbr, dfybr, featuresDrop=None):
52 def runBasicRegressors(self, dfxbr, dfybr, featuresDrop=None): 53 """ 54 Method to fit the regressors\n 55 Args: predictors DataFrame, target DataFrame, optional list of features names to drop 56 """ 57 58 if featuresDrop is not None: 59 for i in featuresDrop: 60 print("\ndropping: ", i) 61 dfxbr = dfxbr.drop(i, axis=1) 62 print("\n") 63 64 from sklearn.ensemble import RandomForestRegressor, BaggingRegressor 65 from sklearn.ensemble import AdaBoostRegressor 66 from sklearn.ensemble import GradientBoostingRegressor 67 from sklearn.tree import DecisionTreeRegressor 68 from sklearn.neighbors import KNeighborsRegressor 69 from sklearn.linear_model import LogisticRegression 70 from sklearn.linear_model import LinearRegression 71 from sklearn.linear_model import Lasso, ElasticNet, Ridge, SGDRegressor 72 from sklearn.svm import SVR, NuSVR 73 from sklearn.neural_network import MLPRegressor 74 from datetime import datetime 75 76 from sklearn import metrics 77 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 78 79 80 81 82 rfr = RandomForestRegressor() 83 gbr = GradientBoostingRegressor() 84 abr = AdaBoostRegressor() 85 86 87 import numpy as np 88 89 90 models = [ 91 92 93 SVR(gamma='auto', kernel='linear', C=1e-1), 94 SVR(gamma=1e-1, kernel='linear', C=1e-1), 95 SVR(gamma=1, kernel='linear', C=1e-1), 96 SVR(gamma=10, kernel='linear', C=1e-1), 97 98 SVR(gamma='auto', kernel='linear', C=1), 99 SVR(gamma=1e-1, kernel='linear', C=1), 100 SVR(gamma=1, kernel='linear', C=1), 101 SVR(gamma=10, kernel='linear', C=1), 102 103 SVR(gamma='auto', kernel='linear', C=10), 104 SVR(gamma=1e-1, kernel='linear', C=10), 105 SVR(gamma=1, kernel='linear', C=10), 106 SVR(gamma=10, kernel='linear', C=10), 107 108 SVR(gamma='auto', kernel='rbf', C=1e-1), 109 SVR(gamma=1e-1, kernel='rbf', C=1e-1), 110 SVR(gamma=1, kernel='rbf', C=1e-1), 111 SVR(gamma=10, kernel='rbf', C=1e-1), 112 113 SVR(gamma='auto', kernel='rbf', C=1), 114 SVR(gamma=1e-1, kernel='rbf', C=1), 115 SVR(gamma=1, kernel='rbf', C=1), 116 SVR(gamma=10, kernel='rbf', C=1), 117 118 SVR(gamma='auto', kernel='rbf', C=10), 119 SVR(gamma=1e-1, kernel='rbf', C=10), 120 SVR(gamma=1, kernel='rbf', C=10), 121 SVR(gamma=10, kernel='rbf', C=10), 122 123 MLPRegressor(random_state=None, max_iter=100, activation='relu', learning_rate_init=0.001), 124 MLPRegressor(random_state=None, max_iter=200, activation='relu', learning_rate_init=0.01), 125 MLPRegressor(random_state=None, max_iter=800, activation='relu', learning_rate_init=0.1), 126 127 MLPRegressor(random_state=None, max_iter=50, activation='tanh', learning_rate_init=0.001), 128 MLPRegressor(random_state=None, max_iter=100, activation='tanh', learning_rate_init=0.001), 129 MLPRegressor(random_state=None, max_iter=150, activation='tanh', learning_rate_init=0.001), 130 131 MLPRegressor(random_state=None, max_iter=200, activation='tanh', learning_rate_init=0.01), 132 MLPRegressor(random_state=None, max_iter=800, activation='tanh', learning_rate_init=0.1), 133 134 MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.001), 135 136 MLPRegressor(random_state=None, max_iter=50, activation='logistic', learning_rate_init=0.01), 137 MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.01), 138 MLPRegressor(random_state=None, max_iter=150, activation='logistic', learning_rate_init=0.01), 139 140 MLPRegressor(random_state=None, max_iter=800, activation='logistic', learning_rate_init=0.1), 141 142 143 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.01), 144 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.0001), 145 SGDRegressor(max_iter=300, tol=1e-3, eta0=0.000001), 146 147 148 149 RandomForestRegressor( random_state=None, n_estimators=10), 150 RandomForestRegressor( random_state=None, n_estimators=40), 151 RandomForestRegressor( random_state=None, n_estimators=60), 152 RandomForestRegressor( random_state=None, n_estimators=80), 153 RandomForestRegressor( random_state=None, n_estimators=100), 154 RandomForestRegressor( random_state=None, n_estimators=150), 155 RandomForestRegressor( random_state=None, n_estimators=200), 156 RandomForestRegressor( random_state=None, n_estimators=300), 157 RandomForestRegressor( random_state=None, n_estimators=400), 158 RandomForestRegressor( random_state=None, n_estimators=500), 159 RandomForestRegressor( random_state=None, n_estimators=600), 160 RandomForestRegressor( random_state=None, n_estimators=700), 161 RandomForestRegressor( random_state=None, n_estimators=800), 162 163 Lasso(alpha=0.05), 164 Lasso(alpha=0.1), 165 Lasso(alpha=0.15), 166 Lasso(alpha=0.2), 167 168 Ridge(alpha=.1), 169 Ridge(alpha=.2), 170 Ridge(alpha=.3), 171 Ridge(alpha=.4), 172 Ridge(alpha=.5), 173 Ridge(alpha=.6), 174 Ridge(alpha=.7), 175 176 ElasticNet(), 177 BaggingRegressor(), 178 NuSVR(gamma='auto'), 179 180 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=3), 181 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=4), 182 GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=5), 183 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=3), 184 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=4), 185 GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=5), 186 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=3), 187 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=4), 188 GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=5), 189 190 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3), 191 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=4), 192 GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5), 193 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=3), 194 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=4), 195 GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=5), 196 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=3), 197 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=4), 198 GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=5), 199 200 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=3), 201 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=4), 202 GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=5), 203 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=3), 204 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=4), 205 GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=5), 206 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=3), 207 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=4), 208 GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=5), 209 210 211 AdaBoostRegressor(n_estimators=5, learning_rate=0.1), 212 AdaBoostRegressor(n_estimators=5, learning_rate=0.05), 213 AdaBoostRegressor(n_estimators=15, learning_rate=0.1), 214 AdaBoostRegressor(n_estimators=15, learning_rate=0.05), 215 AdaBoostRegressor(n_estimators=40, learning_rate=0.1), 216 AdaBoostRegressor(n_estimators=40, learning_rate=0.05), 217 218 219 LinearRegression(), 220 221 KNeighborsRegressor(n_neighbors=5), 222 KNeighborsRegressor(n_neighbors=10) 223 224 ] 225 226 227 228 229 from sklearn.model_selection import train_test_split 230 231 232 # split the data into training and testing sets 233 X_train, X_test, y_train, y_test = train_test_split(dfxbr, dfybr, test_size=0.2, random_state=42) 234 235 236 237 class ConsoleColor: 238 # Color 239 BLACK = '\033[90m' 240 RED = '\033[91m' 241 GREEN = '\033[92m' 242 YELLOW = '\033[93m' 243 BLUE = '\033[94m' 244 PURPLE = '\033[95m' 245 CYAN = '\033[96m' 246 GRAY = '\033[97m' 247 248 # Style 249 BOLD = '\033[1m' 250 UNDERLINE = '\033[4m' 251 252 # BackgroundColor 253 BgBLACK = '\033[40m' 254 BgRED = '\033[41m' 255 BgGREEN = '\033[42m' 256 BgORANGE = '\033[43m' 257 BgBLUE = '\033[44m' 258 BgPURPLE = '\033[45m' 259 BgCYAN = '\033[46m' 260 BgGRAY = '\033[47m' 261 262 # End 263 END = '\033[0m' 264 265 266 267 268 269 270 from prettytable import PrettyTable 271 table = PrettyTable() 272 table.field_names = ["Model", "MSE", "SCORE", "R2", "TrainingTime"] 273 274 275 276 j = 1 277 for i in models: 278 then = datetime.now() 279 280 i.fit(X_train, y_train.values.ravel()) 281 y_res = i.predict(X_test) 282 283 now = datetime.now() 284 print ("Processing time of ",type(i).__name__,": ", now-then) 285 mse = mean_squared_error(y_test, y_res) 286 r2 = metrics.r2_score(y_test, y_res) 287 score = i.score(X_test, y_test) 288 289 newLine = [type(i).__name__, format(mse, ',.2f'), format(score, '.2f'), format(r2, '.2f'), now-then] 290 291 if r2>0.9: 292 newLine[3] = ConsoleColor.BLUE + newLine[3] + ConsoleColor.END 293 elif r2>0.8: 294 newLine[3] = ConsoleColor.GREEN + newLine[3] + ConsoleColor.END 295 elif r2>0.6: 296 newLine[3] = ConsoleColor.PURPLE + newLine[3] + ConsoleColor.END 297 elif r2>0.4: 298 newLine[3] = ConsoleColor.RED + newLine[3] + ConsoleColor.END 299 300 301 302 303 304 table.add_row(newLine) 305 306 307 308 j = j + 1 309 310 311 self.prettytable = table 312 print(self.prettytable) 313 314 return self.prettytable
Method to fit the regressors
Args: predictors DataFrame, target DataFrame, optional list of features names to drop