pyscinloopsr.before

Module before for pre-analysis

View Source

  1"""
  2Module `before` for pre-analysis
  3"""
  4
  5import pandas as pd
  6import numpy as np
  7import seaborn as snsi
  8import matplotlib.pyplot as plt
  9from matplotlib.pylab import rcParams
 10#rcParams["figure.figsize"] = 15,6
 11#plt.figure(figsize=(15, 10))
 12
 13class view:
 14  """👁️‍🗨️👁️‍🗨️"""
 15
 16  def __init__(self, dfx):
 17    """Initiate the object with your predictors DataFrame"""
 18    self.xdf = dfx
 19    """Internal predictors Dataframe name"""
 20
 21  def box(self, feature):
 22    """Send the feature name to boxplot"""
 23    plt.figure(figsize=(5, 5))
 24    self.xdf.boxplot(feature)
 25  def scatter(self, featureA, featureB):
 26    """Send the two feature names to scatterplot"""
 27    plt.figure(figsize=(5, 5))
 28    self.xdf.plot.scatter(x=featureA, y = featureB, color='r')
 29  def correlation(self):
 30    """Method to print the correlation matrix from your Dataframe"""
 31    plt.figure(figsize=(8, 8))
 32    snsi.heatmap(self.xdf.corr().abs(),  annot=True, center=1,  fmt=".2f", cmap='Greens')
 33
 34
 35
 36class regressors:
 37  """Class with a list of simplified call to a list of Regressors to build a reference on performance"""
 38
 39  def __init__(self):
 40    self.model = None
 41    """Internal model reference"""
 42    self.prettytable = None
 43    """Internal PrettyTable reference"""
 44
 45 
 46
 47
 48 
 49
 50
 51  def runBasicRegressors(self, dfxbr, dfybr, featuresDrop=None):
 52    """
 53    Method to fit the regressors\n
 54    Args: predictors DataFrame, target DataFrame, optional list of features names to drop
 55    """
 56
 57    if featuresDrop is not None:
 58      for i in featuresDrop:
 59        print("\ndropping: ", i)
 60        dfxbr = dfxbr.drop(i, axis=1)
 61    print("\n")
 62
 63    from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
 64    from sklearn.ensemble import AdaBoostRegressor
 65    from sklearn.ensemble import GradientBoostingRegressor
 66    from sklearn.tree import DecisionTreeRegressor
 67    from sklearn.neighbors import KNeighborsRegressor
 68    from sklearn.linear_model import LogisticRegression
 69    from sklearn.linear_model import LinearRegression
 70    from sklearn.linear_model import Lasso, ElasticNet, Ridge, SGDRegressor
 71    from sklearn.svm import SVR, NuSVR
 72    from sklearn.neural_network import MLPRegressor
 73    from datetime import datetime
 74
 75    from sklearn import metrics
 76    from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 77
 78
 79
 80
 81    rfr = RandomForestRegressor()
 82    gbr = GradientBoostingRegressor()
 83    abr = AdaBoostRegressor()
 84
 85
 86    import numpy as np
 87
 88
 89    models = [
 90
 91
 92        SVR(gamma='auto', kernel='linear', C=1e-1),
 93        SVR(gamma=1e-1, kernel='linear', C=1e-1),
 94        SVR(gamma=1, kernel='linear', C=1e-1),
 95        SVR(gamma=10, kernel='linear', C=1e-1),
 96
 97        SVR(gamma='auto', kernel='linear', C=1),
 98        SVR(gamma=1e-1, kernel='linear', C=1),
 99        SVR(gamma=1, kernel='linear', C=1),
100        SVR(gamma=10, kernel='linear', C=1),
101
102        SVR(gamma='auto', kernel='linear', C=10),
103        SVR(gamma=1e-1, kernel='linear', C=10),
104        SVR(gamma=1, kernel='linear', C=10),
105        SVR(gamma=10, kernel='linear', C=10),
106
107        SVR(gamma='auto', kernel='rbf', C=1e-1),
108        SVR(gamma=1e-1, kernel='rbf', C=1e-1),
109        SVR(gamma=1, kernel='rbf', C=1e-1),
110        SVR(gamma=10, kernel='rbf', C=1e-1),
111
112        SVR(gamma='auto', kernel='rbf', C=1),
113        SVR(gamma=1e-1, kernel='rbf', C=1),
114        SVR(gamma=1, kernel='rbf', C=1),
115        SVR(gamma=10, kernel='rbf', C=1),
116
117        SVR(gamma='auto', kernel='rbf', C=10),
118        SVR(gamma=1e-1, kernel='rbf', C=10),
119        SVR(gamma=1, kernel='rbf', C=10),
120        SVR(gamma=10, kernel='rbf', C=10),
121
122        MLPRegressor(random_state=None, max_iter=100, activation='relu', learning_rate_init=0.001),
123        MLPRegressor(random_state=None, max_iter=200, activation='relu', learning_rate_init=0.01),
124        MLPRegressor(random_state=None, max_iter=800, activation='relu', learning_rate_init=0.1),
125
126        MLPRegressor(random_state=None, max_iter=50, activation='tanh', learning_rate_init=0.001),
127        MLPRegressor(random_state=None, max_iter=100, activation='tanh', learning_rate_init=0.001),
128        MLPRegressor(random_state=None, max_iter=150, activation='tanh', learning_rate_init=0.001),
129
130        MLPRegressor(random_state=None, max_iter=200, activation='tanh', learning_rate_init=0.01),
131        MLPRegressor(random_state=None, max_iter=800, activation='tanh', learning_rate_init=0.1),
132
133        MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.001),
134
135        MLPRegressor(random_state=None, max_iter=50, activation='logistic', learning_rate_init=0.01),
136        MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.01),
137        MLPRegressor(random_state=None, max_iter=150, activation='logistic', learning_rate_init=0.01),
138
139        MLPRegressor(random_state=None, max_iter=800, activation='logistic', learning_rate_init=0.1),
140
141
142        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.01),
143        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.0001),
144        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.000001),
145
146
147
148        RandomForestRegressor( random_state=None, n_estimators=10),
149        RandomForestRegressor( random_state=None, n_estimators=40),
150        RandomForestRegressor( random_state=None, n_estimators=60),
151        RandomForestRegressor( random_state=None, n_estimators=80),
152        RandomForestRegressor( random_state=None, n_estimators=100),
153        RandomForestRegressor( random_state=None, n_estimators=150),
154        RandomForestRegressor( random_state=None, n_estimators=200),
155        RandomForestRegressor( random_state=None, n_estimators=300),
156        RandomForestRegressor( random_state=None, n_estimators=400),
157        RandomForestRegressor( random_state=None, n_estimators=500),
158        RandomForestRegressor( random_state=None, n_estimators=600),
159        RandomForestRegressor( random_state=None, n_estimators=700),
160        RandomForestRegressor( random_state=None, n_estimators=800),
161
162        Lasso(alpha=0.05),
163        Lasso(alpha=0.1),
164        Lasso(alpha=0.15),
165        Lasso(alpha=0.2),
166
167        Ridge(alpha=.1),
168        Ridge(alpha=.2),
169        Ridge(alpha=.3),
170        Ridge(alpha=.4),
171        Ridge(alpha=.5),
172        Ridge(alpha=.6),
173        Ridge(alpha=.7),
174
175        ElasticNet(),
176        BaggingRegressor(),
177        NuSVR(gamma='auto'),
178
179        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=3),
180        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=4),
181        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=5),
182        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=3),
183        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=4),
184        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=5),
185        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=3),
186        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=4),
187        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=5),
188
189        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
190        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=4),
191        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5),
192        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=3),
193        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=4),
194        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=5),
195        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=3),
196        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=4),
197        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=5),
198
199        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=3),
200        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=4),
201        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=5),
202        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=3),
203        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=4),
204        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=5),
205        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=3),
206        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=4),
207        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=5),
208
209
210        AdaBoostRegressor(n_estimators=5, learning_rate=0.1),
211        AdaBoostRegressor(n_estimators=5, learning_rate=0.05),
212        AdaBoostRegressor(n_estimators=15, learning_rate=0.1),
213        AdaBoostRegressor(n_estimators=15, learning_rate=0.05),
214        AdaBoostRegressor(n_estimators=40, learning_rate=0.1),
215        AdaBoostRegressor(n_estimators=40, learning_rate=0.05),
216
217
218        LinearRegression(),
219
220        KNeighborsRegressor(n_neighbors=5),
221        KNeighborsRegressor(n_neighbors=10)
222
223    ]
224
225
226
227
228    from sklearn.model_selection import train_test_split
229
230
231    # split the data into training and testing sets
232    X_train, X_test, y_train, y_test = train_test_split(dfxbr, dfybr, test_size=0.2, random_state=42)
233
234
235
236    class ConsoleColor:
237        # Color
238        BLACK = '\033[90m'
239        RED = '\033[91m'
240        GREEN = '\033[92m'
241        YELLOW = '\033[93m'
242        BLUE = '\033[94m'
243        PURPLE = '\033[95m'
244        CYAN = '\033[96m'
245        GRAY = '\033[97m'
246
247        # Style
248        BOLD = '\033[1m'
249        UNDERLINE = '\033[4m'
250
251        # BackgroundColor
252        BgBLACK = '\033[40m'
253        BgRED = '\033[41m'
254        BgGREEN = '\033[42m'
255        BgORANGE = '\033[43m'
256        BgBLUE = '\033[44m'
257        BgPURPLE = '\033[45m'
258        BgCYAN = '\033[46m'
259        BgGRAY = '\033[47m'
260
261        # End
262        END = '\033[0m'
263
264
265
266
267
268
269    from prettytable import PrettyTable
270    table = PrettyTable()
271    table.field_names = ["Model", "MSE", "SCORE", "R2", "TrainingTime"]
272
273
274
275    j = 1
276    for i in models:
277        then = datetime.now()
278
279        i.fit(X_train, y_train.values.ravel())
280        y_res = i.predict(X_test)
281
282        now = datetime.now()
283        print ("Processing time of ",type(i).__name__,": ", now-then)
284        mse = mean_squared_error(y_test, y_res)
285        r2 = metrics.r2_score(y_test, y_res)
286        score = i.score(X_test, y_test)
287
288        newLine = [type(i).__name__, format(mse, ',.2f'), format(score, '.2f'), format(r2, '.2f'), now-then]
289
290        if r2>0.9:
291          newLine[3] = ConsoleColor.BLUE + newLine[3] + ConsoleColor.END
292        elif r2>0.8:
293          newLine[3] = ConsoleColor.GREEN + newLine[3] + ConsoleColor.END
294        elif r2>0.6:
295          newLine[3] = ConsoleColor.PURPLE + newLine[3] + ConsoleColor.END
296        elif r2>0.4:
297          newLine[3] = ConsoleColor.RED + newLine[3] + ConsoleColor.END
298
299
300
301
302
303        table.add_row(newLine)
304
305
306
307        j = j + 1
308
309
310    self.prettytable = table
311    print(self.prettytable)
312
313    return self.prettytable

class view: View Source

14class view:
15  """👁️‍🗨️👁️‍🗨️"""
16
17  def __init__(self, dfx):
18    """Initiate the object with your predictors DataFrame"""
19    self.xdf = dfx
20    """Internal predictors Dataframe name"""
21
22  def box(self, feature):
23    """Send the feature name to boxplot"""
24    plt.figure(figsize=(5, 5))
25    self.xdf.boxplot(feature)
26  def scatter(self, featureA, featureB):
27    """Send the two feature names to scatterplot"""
28    plt.figure(figsize=(5, 5))
29    self.xdf.plot.scatter(x=featureA, y = featureB, color='r')
30  def correlation(self):
31    """Method to print the correlation matrix from your Dataframe"""
32    plt.figure(figsize=(8, 8))
33    snsi.heatmap(self.xdf.corr().abs(),  annot=True, center=1,  fmt=".2f", cmap='Greens')

👁️‍🗨️👁️‍🗨️

view(dfx) View Source

17  def __init__(self, dfx):
18    """Initiate the object with your predictors DataFrame"""
19    self.xdf = dfx
20    """Internal predictors Dataframe name"""

Initiate the object with your predictors DataFrame

xdf

Internal predictors Dataframe name

def box(self, feature): View Source

22  def box(self, feature):
23    """Send the feature name to boxplot"""
24    plt.figure(figsize=(5, 5))
25    self.xdf.boxplot(feature)

Send the feature name to boxplot

def scatter(self, featureA, featureB): View Source

26  def scatter(self, featureA, featureB):
27    """Send the two feature names to scatterplot"""
28    plt.figure(figsize=(5, 5))
29    self.xdf.plot.scatter(x=featureA, y = featureB, color='r')

Send the two feature names to scatterplot

def correlation(self): View Source

30  def correlation(self):
31    """Method to print the correlation matrix from your Dataframe"""
32    plt.figure(figsize=(8, 8))
33    snsi.heatmap(self.xdf.corr().abs(),  annot=True, center=1,  fmt=".2f", cmap='Greens')

Method to print the correlation matrix from your Dataframe

class regressors: View Source

 37class regressors:
 38  """Class with a list of simplified call to a list of Regressors to build a reference on performance"""
 39
 40  def __init__(self):
 41    self.model = None
 42    """Internal model reference"""
 43    self.prettytable = None
 44    """Internal PrettyTable reference"""
 45
 46 
 47
 48
 49 
 50
 51
 52  def runBasicRegressors(self, dfxbr, dfybr, featuresDrop=None):
 53    """
 54    Method to fit the regressors\n
 55    Args: predictors DataFrame, target DataFrame, optional list of features names to drop
 56    """
 57
 58    if featuresDrop is not None:
 59      for i in featuresDrop:
 60        print("\ndropping: ", i)
 61        dfxbr = dfxbr.drop(i, axis=1)
 62    print("\n")
 63
 64    from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
 65    from sklearn.ensemble import AdaBoostRegressor
 66    from sklearn.ensemble import GradientBoostingRegressor
 67    from sklearn.tree import DecisionTreeRegressor
 68    from sklearn.neighbors import KNeighborsRegressor
 69    from sklearn.linear_model import LogisticRegression
 70    from sklearn.linear_model import LinearRegression
 71    from sklearn.linear_model import Lasso, ElasticNet, Ridge, SGDRegressor
 72    from sklearn.svm import SVR, NuSVR
 73    from sklearn.neural_network import MLPRegressor
 74    from datetime import datetime
 75
 76    from sklearn import metrics
 77    from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 78
 79
 80
 81
 82    rfr = RandomForestRegressor()
 83    gbr = GradientBoostingRegressor()
 84    abr = AdaBoostRegressor()
 85
 86
 87    import numpy as np
 88
 89
 90    models = [
 91
 92
 93        SVR(gamma='auto', kernel='linear', C=1e-1),
 94        SVR(gamma=1e-1, kernel='linear', C=1e-1),
 95        SVR(gamma=1, kernel='linear', C=1e-1),
 96        SVR(gamma=10, kernel='linear', C=1e-1),
 97
 98        SVR(gamma='auto', kernel='linear', C=1),
 99        SVR(gamma=1e-1, kernel='linear', C=1),
100        SVR(gamma=1, kernel='linear', C=1),
101        SVR(gamma=10, kernel='linear', C=1),
102
103        SVR(gamma='auto', kernel='linear', C=10),
104        SVR(gamma=1e-1, kernel='linear', C=10),
105        SVR(gamma=1, kernel='linear', C=10),
106        SVR(gamma=10, kernel='linear', C=10),
107
108        SVR(gamma='auto', kernel='rbf', C=1e-1),
109        SVR(gamma=1e-1, kernel='rbf', C=1e-1),
110        SVR(gamma=1, kernel='rbf', C=1e-1),
111        SVR(gamma=10, kernel='rbf', C=1e-1),
112
113        SVR(gamma='auto', kernel='rbf', C=1),
114        SVR(gamma=1e-1, kernel='rbf', C=1),
115        SVR(gamma=1, kernel='rbf', C=1),
116        SVR(gamma=10, kernel='rbf', C=1),
117
118        SVR(gamma='auto', kernel='rbf', C=10),
119        SVR(gamma=1e-1, kernel='rbf', C=10),
120        SVR(gamma=1, kernel='rbf', C=10),
121        SVR(gamma=10, kernel='rbf', C=10),
122
123        MLPRegressor(random_state=None, max_iter=100, activation='relu', learning_rate_init=0.001),
124        MLPRegressor(random_state=None, max_iter=200, activation='relu', learning_rate_init=0.01),
125        MLPRegressor(random_state=None, max_iter=800, activation='relu', learning_rate_init=0.1),
126
127        MLPRegressor(random_state=None, max_iter=50, activation='tanh', learning_rate_init=0.001),
128        MLPRegressor(random_state=None, max_iter=100, activation='tanh', learning_rate_init=0.001),
129        MLPRegressor(random_state=None, max_iter=150, activation='tanh', learning_rate_init=0.001),
130
131        MLPRegressor(random_state=None, max_iter=200, activation='tanh', learning_rate_init=0.01),
132        MLPRegressor(random_state=None, max_iter=800, activation='tanh', learning_rate_init=0.1),
133
134        MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.001),
135
136        MLPRegressor(random_state=None, max_iter=50, activation='logistic', learning_rate_init=0.01),
137        MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.01),
138        MLPRegressor(random_state=None, max_iter=150, activation='logistic', learning_rate_init=0.01),
139
140        MLPRegressor(random_state=None, max_iter=800, activation='logistic', learning_rate_init=0.1),
141
142
143        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.01),
144        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.0001),
145        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.000001),
146
147
148
149        RandomForestRegressor( random_state=None, n_estimators=10),
150        RandomForestRegressor( random_state=None, n_estimators=40),
151        RandomForestRegressor( random_state=None, n_estimators=60),
152        RandomForestRegressor( random_state=None, n_estimators=80),
153        RandomForestRegressor( random_state=None, n_estimators=100),
154        RandomForestRegressor( random_state=None, n_estimators=150),
155        RandomForestRegressor( random_state=None, n_estimators=200),
156        RandomForestRegressor( random_state=None, n_estimators=300),
157        RandomForestRegressor( random_state=None, n_estimators=400),
158        RandomForestRegressor( random_state=None, n_estimators=500),
159        RandomForestRegressor( random_state=None, n_estimators=600),
160        RandomForestRegressor( random_state=None, n_estimators=700),
161        RandomForestRegressor( random_state=None, n_estimators=800),
162
163        Lasso(alpha=0.05),
164        Lasso(alpha=0.1),
165        Lasso(alpha=0.15),
166        Lasso(alpha=0.2),
167
168        Ridge(alpha=.1),
169        Ridge(alpha=.2),
170        Ridge(alpha=.3),
171        Ridge(alpha=.4),
172        Ridge(alpha=.5),
173        Ridge(alpha=.6),
174        Ridge(alpha=.7),
175
176        ElasticNet(),
177        BaggingRegressor(),
178        NuSVR(gamma='auto'),
179
180        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=3),
181        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=4),
182        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=5),
183        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=3),
184        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=4),
185        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=5),
186        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=3),
187        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=4),
188        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=5),
189
190        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
191        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=4),
192        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5),
193        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=3),
194        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=4),
195        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=5),
196        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=3),
197        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=4),
198        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=5),
199
200        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=3),
201        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=4),
202        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=5),
203        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=3),
204        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=4),
205        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=5),
206        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=3),
207        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=4),
208        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=5),
209
210
211        AdaBoostRegressor(n_estimators=5, learning_rate=0.1),
212        AdaBoostRegressor(n_estimators=5, learning_rate=0.05),
213        AdaBoostRegressor(n_estimators=15, learning_rate=0.1),
214        AdaBoostRegressor(n_estimators=15, learning_rate=0.05),
215        AdaBoostRegressor(n_estimators=40, learning_rate=0.1),
216        AdaBoostRegressor(n_estimators=40, learning_rate=0.05),
217
218
219        LinearRegression(),
220
221        KNeighborsRegressor(n_neighbors=5),
222        KNeighborsRegressor(n_neighbors=10)
223
224    ]
225
226
227
228
229    from sklearn.model_selection import train_test_split
230
231
232    # split the data into training and testing sets
233    X_train, X_test, y_train, y_test = train_test_split(dfxbr, dfybr, test_size=0.2, random_state=42)
234
235
236
237    class ConsoleColor:
238        # Color
239        BLACK = '\033[90m'
240        RED = '\033[91m'
241        GREEN = '\033[92m'
242        YELLOW = '\033[93m'
243        BLUE = '\033[94m'
244        PURPLE = '\033[95m'
245        CYAN = '\033[96m'
246        GRAY = '\033[97m'
247
248        # Style
249        BOLD = '\033[1m'
250        UNDERLINE = '\033[4m'
251
252        # BackgroundColor
253        BgBLACK = '\033[40m'
254        BgRED = '\033[41m'
255        BgGREEN = '\033[42m'
256        BgORANGE = '\033[43m'
257        BgBLUE = '\033[44m'
258        BgPURPLE = '\033[45m'
259        BgCYAN = '\033[46m'
260        BgGRAY = '\033[47m'
261
262        # End
263        END = '\033[0m'
264
265
266
267
268
269
270    from prettytable import PrettyTable
271    table = PrettyTable()
272    table.field_names = ["Model", "MSE", "SCORE", "R2", "TrainingTime"]
273
274
275
276    j = 1
277    for i in models:
278        then = datetime.now()
279
280        i.fit(X_train, y_train.values.ravel())
281        y_res = i.predict(X_test)
282
283        now = datetime.now()
284        print ("Processing time of ",type(i).__name__,": ", now-then)
285        mse = mean_squared_error(y_test, y_res)
286        r2 = metrics.r2_score(y_test, y_res)
287        score = i.score(X_test, y_test)
288
289        newLine = [type(i).__name__, format(mse, ',.2f'), format(score, '.2f'), format(r2, '.2f'), now-then]
290
291        if r2>0.9:
292          newLine[3] = ConsoleColor.BLUE + newLine[3] + ConsoleColor.END
293        elif r2>0.8:
294          newLine[3] = ConsoleColor.GREEN + newLine[3] + ConsoleColor.END
295        elif r2>0.6:
296          newLine[3] = ConsoleColor.PURPLE + newLine[3] + ConsoleColor.END
297        elif r2>0.4:
298          newLine[3] = ConsoleColor.RED + newLine[3] + ConsoleColor.END
299
300
301
302
303
304        table.add_row(newLine)
305
306
307
308        j = j + 1
309
310
311    self.prettytable = table
312    print(self.prettytable)
313
314    return self.prettytable

Class with a list of simplified call to a list of Regressors to build a reference on performance

model

Internal model reference

prettytable

Internal PrettyTable reference

def runBasicRegressors(self, dfxbr, dfybr, featuresDrop=None): View Source

 52  def runBasicRegressors(self, dfxbr, dfybr, featuresDrop=None):
 53    """
 54    Method to fit the regressors\n
 55    Args: predictors DataFrame, target DataFrame, optional list of features names to drop
 56    """
 57
 58    if featuresDrop is not None:
 59      for i in featuresDrop:
 60        print("\ndropping: ", i)
 61        dfxbr = dfxbr.drop(i, axis=1)
 62    print("\n")
 63
 64    from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
 65    from sklearn.ensemble import AdaBoostRegressor
 66    from sklearn.ensemble import GradientBoostingRegressor
 67    from sklearn.tree import DecisionTreeRegressor
 68    from sklearn.neighbors import KNeighborsRegressor
 69    from sklearn.linear_model import LogisticRegression
 70    from sklearn.linear_model import LinearRegression
 71    from sklearn.linear_model import Lasso, ElasticNet, Ridge, SGDRegressor
 72    from sklearn.svm import SVR, NuSVR
 73    from sklearn.neural_network import MLPRegressor
 74    from datetime import datetime
 75
 76    from sklearn import metrics
 77    from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 78
 79
 80
 81
 82    rfr = RandomForestRegressor()
 83    gbr = GradientBoostingRegressor()
 84    abr = AdaBoostRegressor()
 85
 86
 87    import numpy as np
 88
 89
 90    models = [
 91
 92
 93        SVR(gamma='auto', kernel='linear', C=1e-1),
 94        SVR(gamma=1e-1, kernel='linear', C=1e-1),
 95        SVR(gamma=1, kernel='linear', C=1e-1),
 96        SVR(gamma=10, kernel='linear', C=1e-1),
 97
 98        SVR(gamma='auto', kernel='linear', C=1),
 99        SVR(gamma=1e-1, kernel='linear', C=1),
100        SVR(gamma=1, kernel='linear', C=1),
101        SVR(gamma=10, kernel='linear', C=1),
102
103        SVR(gamma='auto', kernel='linear', C=10),
104        SVR(gamma=1e-1, kernel='linear', C=10),
105        SVR(gamma=1, kernel='linear', C=10),
106        SVR(gamma=10, kernel='linear', C=10),
107
108        SVR(gamma='auto', kernel='rbf', C=1e-1),
109        SVR(gamma=1e-1, kernel='rbf', C=1e-1),
110        SVR(gamma=1, kernel='rbf', C=1e-1),
111        SVR(gamma=10, kernel='rbf', C=1e-1),
112
113        SVR(gamma='auto', kernel='rbf', C=1),
114        SVR(gamma=1e-1, kernel='rbf', C=1),
115        SVR(gamma=1, kernel='rbf', C=1),
116        SVR(gamma=10, kernel='rbf', C=1),
117
118        SVR(gamma='auto', kernel='rbf', C=10),
119        SVR(gamma=1e-1, kernel='rbf', C=10),
120        SVR(gamma=1, kernel='rbf', C=10),
121        SVR(gamma=10, kernel='rbf', C=10),
122
123        MLPRegressor(random_state=None, max_iter=100, activation='relu', learning_rate_init=0.001),
124        MLPRegressor(random_state=None, max_iter=200, activation='relu', learning_rate_init=0.01),
125        MLPRegressor(random_state=None, max_iter=800, activation='relu', learning_rate_init=0.1),
126
127        MLPRegressor(random_state=None, max_iter=50, activation='tanh', learning_rate_init=0.001),
128        MLPRegressor(random_state=None, max_iter=100, activation='tanh', learning_rate_init=0.001),
129        MLPRegressor(random_state=None, max_iter=150, activation='tanh', learning_rate_init=0.001),
130
131        MLPRegressor(random_state=None, max_iter=200, activation='tanh', learning_rate_init=0.01),
132        MLPRegressor(random_state=None, max_iter=800, activation='tanh', learning_rate_init=0.1),
133
134        MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.001),
135
136        MLPRegressor(random_state=None, max_iter=50, activation='logistic', learning_rate_init=0.01),
137        MLPRegressor(random_state=None, max_iter=100, activation='logistic', learning_rate_init=0.01),
138        MLPRegressor(random_state=None, max_iter=150, activation='logistic', learning_rate_init=0.01),
139
140        MLPRegressor(random_state=None, max_iter=800, activation='logistic', learning_rate_init=0.1),
141
142
143        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.01),
144        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.0001),
145        SGDRegressor(max_iter=300, tol=1e-3, eta0=0.000001),
146
147
148
149        RandomForestRegressor( random_state=None, n_estimators=10),
150        RandomForestRegressor( random_state=None, n_estimators=40),
151        RandomForestRegressor( random_state=None, n_estimators=60),
152        RandomForestRegressor( random_state=None, n_estimators=80),
153        RandomForestRegressor( random_state=None, n_estimators=100),
154        RandomForestRegressor( random_state=None, n_estimators=150),
155        RandomForestRegressor( random_state=None, n_estimators=200),
156        RandomForestRegressor( random_state=None, n_estimators=300),
157        RandomForestRegressor( random_state=None, n_estimators=400),
158        RandomForestRegressor( random_state=None, n_estimators=500),
159        RandomForestRegressor( random_state=None, n_estimators=600),
160        RandomForestRegressor( random_state=None, n_estimators=700),
161        RandomForestRegressor( random_state=None, n_estimators=800),
162
163        Lasso(alpha=0.05),
164        Lasso(alpha=0.1),
165        Lasso(alpha=0.15),
166        Lasso(alpha=0.2),
167
168        Ridge(alpha=.1),
169        Ridge(alpha=.2),
170        Ridge(alpha=.3),
171        Ridge(alpha=.4),
172        Ridge(alpha=.5),
173        Ridge(alpha=.6),
174        Ridge(alpha=.7),
175
176        ElasticNet(),
177        BaggingRegressor(),
178        NuSVR(gamma='auto'),
179
180        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=3),
181        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=4),
182        GradientBoostingRegressor(n_estimators=72, learning_rate=0.1, max_depth=5),
183        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=3),
184        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=4),
185        GradientBoostingRegressor(n_estimators=72, learning_rate=0.05, max_depth=5),
186        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=3),
187        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=4),
188        GradientBoostingRegressor(n_estimators=72, learning_rate=0.2, max_depth=5),
189
190        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
191        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=4),
192        GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5),
193        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=3),
194        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=4),
195        GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=5),
196        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=3),
197        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=4),
198        GradientBoostingRegressor(n_estimators=100, learning_rate=0.2, max_depth=5),
199
200        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=3),
201        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=4),
202        GradientBoostingRegressor(n_estimators=128, learning_rate=0.1, max_depth=5),
203        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=3),
204        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=4),
205        GradientBoostingRegressor(n_estimators=128, learning_rate=0.05, max_depth=5),
206        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=3),
207        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=4),
208        GradientBoostingRegressor(n_estimators=128, learning_rate=0.2, max_depth=5),
209
210
211        AdaBoostRegressor(n_estimators=5, learning_rate=0.1),
212        AdaBoostRegressor(n_estimators=5, learning_rate=0.05),
213        AdaBoostRegressor(n_estimators=15, learning_rate=0.1),
214        AdaBoostRegressor(n_estimators=15, learning_rate=0.05),
215        AdaBoostRegressor(n_estimators=40, learning_rate=0.1),
216        AdaBoostRegressor(n_estimators=40, learning_rate=0.05),
217
218
219        LinearRegression(),
220
221        KNeighborsRegressor(n_neighbors=5),
222        KNeighborsRegressor(n_neighbors=10)
223
224    ]
225
226
227
228
229    from sklearn.model_selection import train_test_split
230
231
232    # split the data into training and testing sets
233    X_train, X_test, y_train, y_test = train_test_split(dfxbr, dfybr, test_size=0.2, random_state=42)
234
235
236
237    class ConsoleColor:
238        # Color
239        BLACK = '\033[90m'
240        RED = '\033[91m'
241        GREEN = '\033[92m'
242        YELLOW = '\033[93m'
243        BLUE = '\033[94m'
244        PURPLE = '\033[95m'
245        CYAN = '\033[96m'
246        GRAY = '\033[97m'
247
248        # Style
249        BOLD = '\033[1m'
250        UNDERLINE = '\033[4m'
251
252        # BackgroundColor
253        BgBLACK = '\033[40m'
254        BgRED = '\033[41m'
255        BgGREEN = '\033[42m'
256        BgORANGE = '\033[43m'
257        BgBLUE = '\033[44m'
258        BgPURPLE = '\033[45m'
259        BgCYAN = '\033[46m'
260        BgGRAY = '\033[47m'
261
262        # End
263        END = '\033[0m'
264
265
266
267
268
269
270    from prettytable import PrettyTable
271    table = PrettyTable()
272    table.field_names = ["Model", "MSE", "SCORE", "R2", "TrainingTime"]
273
274
275
276    j = 1
277    for i in models:
278        then = datetime.now()
279
280        i.fit(X_train, y_train.values.ravel())
281        y_res = i.predict(X_test)
282
283        now = datetime.now()
284        print ("Processing time of ",type(i).__name__,": ", now-then)
285        mse = mean_squared_error(y_test, y_res)
286        r2 = metrics.r2_score(y_test, y_res)
287        score = i.score(X_test, y_test)
288
289        newLine = [type(i).__name__, format(mse, ',.2f'), format(score, '.2f'), format(r2, '.2f'), now-then]
290
291        if r2>0.9:
292          newLine[3] = ConsoleColor.BLUE + newLine[3] + ConsoleColor.END
293        elif r2>0.8:
294          newLine[3] = ConsoleColor.GREEN + newLine[3] + ConsoleColor.END
295        elif r2>0.6:
296          newLine[3] = ConsoleColor.PURPLE + newLine[3] + ConsoleColor.END
297        elif r2>0.4:
298          newLine[3] = ConsoleColor.RED + newLine[3] + ConsoleColor.END
299
300
301
302
303
304        table.add_row(newLine)
305
306
307
308        j = j + 1
309
310
311    self.prettytable = table
312    print(self.prettytable)
313
314    return self.prettytable

Method to fit the regressors

Args: predictors DataFrame, target DataFrame, optional list of features names to drop