comparison PDAUG_ML_Models/PDAUG_ML_Models.py @ 0:0118cc553f3b draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author jay
date Wed, 28 Oct 2020 02:04:41 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0118cc553f3b
1
2 import numpy as np
3 import sys,os
4 from scipy import interp
5 import pandas as pd
6
7 ###############################################################
8 from sklearn.metrics import *
9 from sklearn import preprocessing
10 from sklearn.metrics import accuracy_score
11 from sklearn.metrics import precision_recall_fscore_support
12 from sklearn.metrics import roc_curve, auc
13 from sklearn.model_selection import StratifiedKFold
14 from sklearn.preprocessing import StandardScaler
15 from sklearn.preprocessing import MinMaxScaler
16 ###############################################################
17 from sklearn.linear_model import LogisticRegression
18 from sklearn.naive_bayes import GaussianNB
19 from sklearn.neighbors import KNeighborsClassifier
20 from sklearn.tree import DecisionTreeClassifier
21 from sklearn.svm import SVC
22 from sklearn.ensemble import RandomForestClassifier
23 from sklearn.linear_model import SGDClassifier
24 from sklearn.ensemble import GradientBoostingClassifier
25 from sklearn.neural_network import MLPClassifier
26 ###############################################################
27 from itertools import cycle
28 ################################################################
29 from sklearn.model_selection import train_test_split
30
31
32
33 def ReturnData(TrainFile, TestMethod, TestFile=None):
34
35 if (TestFile == None) and (TestMethod == 'Internal' or 'CrossVal'):
36
37 df = pd.read_csv(TrainFile, sep='\t')
38 clm_list = df.columns.tolist()
39 X_train = df[clm_list[0:len(clm_list)-1]].values
40 y_train = df[clm_list[len(clm_list)-1]].values
41 X_test = None
42 y_test = None
43 return X_train, y_train, X_test, y_test
44
45 elif (TestFile is not None) and (TestMethod == 'External'):
46
47 df = pd.read_csv(TrainFile, sep='\t')
48 clm_list = df.columns.tolist()
49 X_train = df[clm_list[0:len(clm_list)-1]].values
50 y_train = df[clm_list[len(clm_list)-1]].values
51 df1 = pd.read_csv(TestFile, sep='\t')
52 clm_list = df1.columns.tolist()
53 X_test = df1[clm_list[0:len(clm_list)-1]].values
54 y_test = df1[clm_list[len(clm_list)-1]].values
55 return X_train, y_train, X_test, y_test
56
57 elif (TestFile is not None) and (TestMethod == 'Predict'):
58
59 df = pd.read_csv(TrainFile, sep='\t')
60 clm_list = df.columns.tolist()
61 X_train = df[clm_list[0:len(clm_list)-1]].values
62 y_train = df[clm_list[len(clm_list)-1]].values
63
64 df = pd.read_csv(TestFile, sep='\t')
65 X_test = df
66 y_test = None
67 return X_train, y_train, X_train, y_train
68
69 def Fit_Model(TrainData, Test_Method, Algo, Selected_Sclaer, Workdirpath, htmlOutDir, OutFile, htmlFname, NoOfFolds=None, TestSize=None, TestData=None ):
70
71 if not os.path.exists(htmlOutDir):
72 os.makedirs(htmlOutDir)
73
74 if Test_Method == 'Internal':
75 X,y,_,_ = ReturnData(TrainData, Test_Method)
76
77 mean_tpr = 0.0
78 mean_fpr = np.linspace(0, 1, 100)
79
80 specificity_list = []
81 sensitivity_list = []
82 precison_list = []
83 mcc_list = []
84 f1_list = []
85
86 folds = StratifiedKFold(n_splits=5)
87 mean_tpr = 0.0
88 mean_fpr = np.linspace(0, 1, 100)
89
90 ##########################
91 accuracy_score_l = []
92 cohen_kappa_score_l = []
93 matthews_corrcoef_l = []
94 precision_l = []
95 recall_l = []
96 f_score_l = []
97 ##########################
98
99 folds = StratifiedKFold(n_splits=5)
100
101 for i, (train, test) in enumerate(folds.split(X, y)):
102
103 if Selected_Sclaer=='Min_Max':
104 scaler = MinMaxScaler().fit(X[train])
105 x_train = scaler.transform(X[train])
106 x_test = scaler.transform(X[test])
107
108 elif Selected_Sclaer=='Standard_Scaler':
109 scaler = preprocessing.StandardScaler().fit(X[train])
110 x_train = scaler.transform(X[train])
111 x_test = scaler.transform(X[test])
112
113 elif Selected_Sclaer == 'No_Scaler':
114 x_train = X[train]
115 x_test = X[test]
116
117 else:
118 print('Scalling Method option was not correctly selected...!')
119
120 prob = Algo.fit(x_train, y[train]).predict_proba(x_test)
121 predicted = Algo.fit(x_train, y[train]).predict(x_test)
122
123 fpr, tpr, thresholds = roc_curve(y[test], prob[:, 1])
124 mean_tpr += interp(mean_fpr, fpr, tpr)
125 mean_tpr[0] = 0.0
126
127 TN, FP, FN, TP = confusion_matrix(y[test], predicted).ravel()
128
129 accuracy_score_l.append(round(accuracy_score(y[test], predicted),3))
130 a = precision_recall_fscore_support(y[test], predicted, average='macro')
131 precision_l.append(round(a[0],3))
132 recall_l.append(round(a[1],3))
133 f_score_l .append(round(a[2],3))
134
135 accuracy_score_mean = round(float(sum(accuracy_score_l)/float(len(accuracy_score_l))),3)
136 precision_mean = round(float(sum(precision_l)/float(len(precision_l))),3)
137 recall_mean = round(float(sum(recall_l)/float(len(recall_l))),3)
138 f_score_mean = round(float(sum(f_score_l )/float(len(f_score_l ))),3)
139
140
141 mean_tpr /= folds.get_n_splits(X, y)
142 mean_tpr[-1] = 1.0
143 mean_auc = auc(mean_fpr, mean_tpr)
144
145 ########################################################################################################################################
146 V_header = ["Algo","accuracy","precision","recall","f1","mean_auc"] #
147 v_values = [sys.argv[1], round(accuracy_score_mean, 3), round(precision_mean, 3), round(recall_mean, 3),round(f_score_mean, 3), round(mean_auc, 3)] #
148 ########################################################################################################################################
149
150 df = pd.DataFrame([v_values], columns=V_header)
151 df.to_csv(os.path.join(Workdirpath, OutFile), columns=V_header, sep='\t', index=None)
152
153 ############################################################
154 from plotly.subplots import make_subplots
155 import plotly.graph_objects as go
156
157 fig = make_subplots(
158 rows=1, cols=2,
159 specs=[[{"type": "xy"}, {"type": "scatter"}],], subplot_titles=("Algorithm performance", " ROC curve (AUC Score = %0.2f" % mean_auc+')'),
160
161 )
162
163 fig.add_trace( go.Bar(x=V_header[1:], y=v_values[1:],marker_color=['#F58518','#109618','#E45756','#1F77B4','#19D3F3']), row=1, col=1)
164
165 print (mean_fpr, mean_tpr)
166
167 fig.add_trace(go.Scatter(x=mean_fpr, y=mean_tpr), row=1, col=2)
168 fig.update_yaxes(title_text="True Positive Rate", range=[0, 1], row=1, col=2)
169 fig.update_xaxes(title_text="False Positive Rate", range=[0, 1], row=1, col=2)
170 fig.update_yaxes(title_text="Score", range=[0, 1], row=1, col=1)
171 fig.update_xaxes(title_text="Performance measures",row=1, col=1)
172 fig.update_layout(height=700, showlegend=False, title="Machine ")
173 fig.write_html(os.path.join(Workdirpath, htmlOutDir, htmlFname))
174
175 ############################################################
176
177 elif Test_Method == 'External':
178
179 X_train,y_train,X_test,y_test = ReturnData(TrainData, Test_Method, TestData)
180
181 if Selected_Sclaer=='Min_Max':
182 scaler = MinMaxScaler().fit(X_train)
183 x_train = scaler.transform(X_train)
184 x_test = scaler.transform(X_test)
185
186 elif Selected_Sclaer=='Standard_Scaler':
187 scaler = preprocessing.StandardScaler().fit(X_train)
188 x_train = scaler.transform(X_train)
189 x_test = scaler.transform(X_test)
190
191 elif Selected_Sclaer == 'No_Scaler':
192 x_train = X_train
193 x_test = X_test
194
195 else:
196 print('Scalling Method option was not correctly selected...!')
197
198 prob = Algo.fit(x_train, y_train).predict_proba(x_test)
199 predicted = Algo.fit(x_train, y_train).predict(x_test)
200
201 fpr, tpr, thresholds = roc_curve(y_test, prob[:, 1])
202 TN, FP, FN, TP = confusion_matrix(y_test, predicted).ravel()
203 accu_score = accuracy_score(y_test, predicted)
204
205 a = precision_recall_fscore_support(y_test, predicted, average='macro')
206
207 pre_score = round(a[0],3)
208 recall_score= round(a[1],3)
209 f_score= round(a[2],3)
210
211 pl.plot(fpr, tpr, '--', lw=2)
212 auc_score = auc(fpr, tpr)
213
214 a = precision_recall_fscore_support(y_test, predicted, average='macro')
215 pre_score = round(a[0],3)
216 rec_score = round(a[1],3)
217 f_score = round(a[2],3)
218
219 V_header = ["accuracy","presision","recall","f1","mean_auc"]
220 v_values = [accu_score, pre_score, rec_score, f_score, auc_score]
221
222 pl.figure()
223 pl.plot(fpr, tpr, '-', color='red',label='AUC = %0.2f' % auc_score, lw=2)
224 pl.xlim([0.0, 1.0])
225 pl.ylim([0.0, 1.05])
226 pl.xlabel('False Positive Rate')
227 pl.ylabel('True Positive Rate')
228 pl.title('ROC Cureve')
229 pl.legend(loc="lower right")
230
231 df = pd.DataFrame([v_values], columns=V_header)
232 pl.savefig(os.path.join(Workdirpath, htmlOutDir, "out.png"))
233 df.to_csv(os.path.join(Workdirpath, OutFile), columns=V_header, sep='\t')
234 pl.figure()
235 pl.bar(V_header, v_values, color=(0.2, 0.4, 0.6, 0.6))
236 pl.xlabel('Accuracy Perameters', fontweight='bold', color = 'orange', fontsize='17', horizontalalignment='center')
237 pl.savefig(os.path.join(Workdirpath, htmlOutDir, "2.png"))
238 #pl.show()
239 HTML_Gen(os.path.join(Workdirpath, htmlOutDir, htmlFname))
240
241 elif Test_Method == "TestSplit":
242
243 X_train,y_train,_,_ = ReturnData(TrainData, Test_Method)
244 X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=float(TestSize), random_state=0)
245
246
247 if Selected_Sclaer=='Min_Max':
248 scaler = MinMaxScaler().fit(X_train)
249 x_train = scaler.transform(X_train)
250 x_test = scaler.transform(X_test)
251
252 elif Selected_Sclaer=='Standard_Scaler':
253 scaler = preprocessing.StandardScaler().fit(X_train)
254 x_train = scaler.transform(X_train)
255 x_test = scaler.transform(X_test)
256
257 elif Selected_Sclaer == 'No_Scaler':
258 x_train = X_train
259 x_test = X_test
260
261 else:
262 print('Scalling Method option was not correctly selected...!')
263
264 prob = Algo.fit(x_train, y_train).predict_proba(x_test)
265 predicted = Algo.fit(x_train, y_train).predict(x_test)
266 fpr, tpr, thresholds = roc_curve(y_test, prob[:, 1])
267 accu_score = accuracy_score(y_test, predicted)
268
269 a = precision_recall_fscore_support(y_test, predicted, average='macro')
270
271 pre_score = round(a[0],3)
272 recall_score= round(a[1],3)
273 f_score= round(a[2],3)
274
275 pl.plot(fpr, tpr, '-', color='red',label='AUC = %0.2f' % accu_score, lw=2)
276
277 pl.xlim([0.0, 1.0])
278 pl.ylim([0.0, 1.05])
279 pl.xlabel('False Positive Rate')
280 pl.ylabel('True Positive Rate')
281 pl.title('ROC Cureve')
282 pl.legend(loc="lower right")
283 pl.savefig(os.path.join(Workdirpath, htmlOutDir, "out.png"))
284 pl.plot(fpr, tpr, '--', lw=2)
285
286 auc_score = auc(fpr, tpr)
287
288 a = precision_recall_fscore_support(y_test, predicted, average='macro')
289 pre_score = round(a[0],3)
290 rec_score = round(a[1],3)
291 f_score = round(a[2],3)
292
293 V_header = ["accuracy","presision","recall","f1","mean_auc"]
294 v_values = [accu_score, pre_score, rec_score, f_score, auc_score]
295 df = pd.DataFrame([v_values], columns=V_header)
296 df.to_csv(os.path.join(Workdirpath, OutFile), columns=V_header, sep='\t')
297 pl.figure()
298 pl.bar(V_header, v_values, color=(0.2, 0.4, 0.6, 0.6))
299 pl.xlabel('Accuracy Perameters', fontweight='bold', color = 'orange', fontsize='17', horizontalalignment='center')
300 pl.savefig(os.path.join(Workdirpath, htmlOutDir, "2.png"))
301 #pl.show()
302 HTML_Gen(os.path.join(Workdirpath, htmlOutDir, htmlFname))
303
304 elif Test_Method == "Predict":
305
306 X_train, y_train, X_test, _ = ReturnData(TrainData, Test_Method,TestData)
307
308 if Selected_Sclaer=='Min_Max':
309 scaler = MinMaxScaler().fit(X_train)
310 x_train = scaler.transform(X_train)
311 x_test = scaler.transform(X_test)
312
313 elif Selected_Sclaer=='Standard_Scaler':
314 scaler = preprocessing.StandardScaler().fit(X_train)
315 x_train = scaler.transform(X_train)
316 x_test = scaler.transform(X_test)
317
318 elif Selected_Sclaer == 'No_Scaler':
319 x_train = X_train
320 x_test = X_test
321
322 else:
323 print('Scalling Method option was not correctly selected...!')
324
325 predicted = model.fit(x_train, y_train).predict(x_test)
326
327
328 return predicted
329
330 def SVM_Classifier(C, kernel, degree, gamma, coef0, shrinking, probability, tol, cache_size, verbose, max_iter, decision_function_shape, randomState, breakties, TrainFile, TestMethod, SelectedSclaer, NFolds, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
331
332 if randomState == None:
333 randomState =None
334 else:
335 randomState = int(randomState)
336
337
338 if cache_size == None:
339 cache_size =None
340 else:
341 cache_size = float(cache_size)
342
343
344 if probability or shrinking == 'true':
345 probability, shrinking = True, True
346 else:
347 probability, shrinking = False, False
348
349
350 if verbose == 'true':
351 verbose = True
352 else:
353 verbose = False
354
355
356 if breakties == 'true':
357 breakties = True
358 else:
359 breakties = False
360
361
362
363
364 pera={
365
366 'C':float(C),
367 'kernel':kernel,
368 'degree':int(degree), #3
369 'gamma':gamma, #default=scale
370 'coef0':float(coef0), #default=0.0
371 'shrinking':shrinking, #P
372 'probability':probability,
373 'tol':float(tol), #default=1e-3
374 'cache_size':cache_size,
375 'verbose':verbose,
376 'max_iter':int(max_iter),#default=-1
377 'decision_function_shape':decision_function_shape,
378 'random_state':randomState,
379 'break_ties':breakties
380 }
381
382
383 model = SVC(**pera )
384
385 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=int(NFolds), TestData=TestFile)
386
387
388 def SGD_Classifier( loss, penalty, alpha, l1_ratio, fit_intercept, max_iter, tol, shuffle, verbose, epsilon, n_jobs, random_state, learning_rate, eta0, power_t, early_stopping, validation_fraction, n_iter_no_change, warm_start, average, TrainFile, TestMethod, SelectedSclaer, NFolds, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
389
390 if n_jobs == 'none':
391 n_jobs =None
392 else:
393 n_jobs = int(n_jobs)
394
395 if random_state == 'none':
396 random_state =None
397 else:
398 random_state = int(random_state)
399
400 if fit_intercept == 'true':
401 fit_intercept = True
402 else:
403 fit_intercept = False
404
405 if shuffle == 'true':
406 shuffle = True
407 else:
408 shuffle = False
409
410 if early_stopping == 'true':
411 early_stopping = True
412 else:
413 early_stopping = False
414
415 if warm_start == 'true':
416 warm_start = True
417 else:
418 warm_start = False
419
420 if average == 'true':
421 average = True
422 else:
423 average = False
424
425 pera = {"loss":loss,
426 "penalty":penalty,
427 "alpha":float(alpha),#0.0001
428 "l1_ratio":float(l1_ratio),#0.15
429 "fit_intercept":fit_intercept,#true
430 "max_iter":int(max_iter),#default=1000
431 "tol":float(tol),#default=1e-3
432 "shuffle":shuffle,
433 "verbose":int(verbose), #default=0
434 "epsilon":float(epsilon), #default=0.1
435 "n_jobs":n_jobs, #default=None
436 "random_state":random_state, #default=None
437 "learning_rate":learning_rate,
438 "eta0":float(eta0), #default=0.0
439 "power_t":float(power_t), #default=0.5
440 "early_stopping":early_stopping,
441 "validation_fraction":float(validation_fraction), #default=0.1
442 "n_iter_no_change":int(n_iter_no_change), #default=5
443 "warm_start":warm_start,
444 "average":average}
445
446 model = SGDClassifier(**pera)
447
448 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=int(NFolds), TestData=TestFile)
449
450
451 def DT_Classifier(criterion, splitter, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, random_state, max_leaf_nodes, min_impurity_decrease, min_impurity_split, presort, ccpalpha, max_features, TrainFile, TestMethod, SelectedSclaer, NFolds, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
452
453 if max_depth == 'none':
454 max_depth =None
455 else:
456 max_depth = int(max_depth)
457
458 if '.' in min_samples_split:
459 min_samples_split = float(min_samples_split)
460 else:
461 min_samples_split = int(min_samples_split)
462
463 if '.' in min_samples_leaf:
464 min_samples_split = float(min_samples_leaf)
465 else:
466 min_samples_leaf = int(min_samples_leaf)
467
468 if max_features == 'none':
469 max_features = None
470 else:
471 if '.' in max_features:
472 max_features = float(max_features)
473 else:
474 max_features = int(max_features)
475
476 if random_state == 'none':
477 random_state = None
478 else:
479 random_state = int(random_state)
480
481
482 if max_leaf_nodes == 'none':
483 max_leaf_nodes = None
484 else:
485 max_leaf_nodes = int(max_leaf_nodes)
486
487
488 pera = {"criterion":criterion,
489 "splitter":splitter,
490 "max_depth":max_depth,#int, default=None
491 "min_samples_split":int(min_samples_split),#default=2
492 "min_samples_leaf":int(min_samples_leaf), #default=1
493 "min_weight_fraction_leaf":float(min_weight_fraction_leaf),#default=0.0
494 "random_state":random_state, #default=None
495 "max_leaf_nodes":max_leaf_nodes, #default=None
496 "min_impurity_decrease":float(min_impurity_decrease),#float, default=0.0
497 "min_impurity_split":float(min_impurity_split), #float, default=1e-7
498 "presort":presort,#default=deprecated
499 'ccp_alpha':float(ccpalpha),#non-negative float, default=0.0
500 'max_features': max_features}#int, float or {"auto", "sqrt", "log2"}, default=None
501
502 model = DecisionTreeClassifier(**pera)
503
504 #Fit_Model('GBC.tsv', 'Internal', model, 'Min_Max', os.getcwd(), os.path.join(os.getcwd(),'report_dir'), 'out.tsv', 'out.html', NoOfFolds=3)
505 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=int(NFolds), TestData=TestFile)
506
507
508 def GB_Classifier(loss, learning_rate, n_estimators, subsample, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_depth, min_impurity_decrease,min_impurity_split, init, random_state, verbose, max_leaf_nodes, warm_start, presort, validation_fraction, n_iter_no_change, tol, ccpalpha, max_features, TrainFile, TestMethod, SelectedSclaer, NFolds, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
509
510 if '.' in min_samples_split:
511 min_samples_split = float(min_samples_split)
512 else:
513 min_samples_split = int(min_samples_split)
514
515 if '.' in min_samples_leaf:
516 min_samples_split = float(min_samples_leaf)
517 else:
518 min_samples_leaf = int(min_samples_leaf)
519
520 if max_features == 'none':
521 max_features = None
522 else:
523 if '.' in max_features:
524 max_features = float(max_features)
525 else:
526 max_features = int(max_features)
527
528 if random_state == 'none':
529 random_state = None
530 else:
531 random_state = int(random_state)
532
533 if max_leaf_nodes == 'none':
534 max_leaf_nodes = None
535 else:
536 max_leaf_nodes = int(max_leaf_nodes)
537
538 if warm_start == 'true':
539 warm_start = True
540 else:
541 warm_start = False
542
543
544 if n_iter_no_change == 'none':
545 n_iter_no_change = None
546 else:
547 n_iter_no_change = int(n_iter_no_change)
548
549
550 if init == 'none':
551 init = None
552 else:
553 init = init
554
555
556 pera = {"loss":loss,
557 "learning_rate":float(learning_rate),
558 "n_estimators":int(n_estimators), #int (default=100)
559 "subsample":float(subsample), #float, optional (default=1.0)
560 "criterion":criterion,
561 "min_samples_split":min_samples_split, #int, float, optional (default=2)
562 "min_samples_leaf":min_samples_leaf, #int, float, optional (default=1)
563 "min_weight_fraction_leaf":float(min_weight_fraction_leaf), #float, optional (default=0.)
564 "max_depth":int(max_depth), #integer, optional (default=3)
565 "min_impurity_decrease":float(min_impurity_decrease),#float, optional (default=0.)
566 "min_impurity_split":float(min_impurity_split), #float, (default=1e-7)
567 "init":init, #estimator or zero, optional (default=None)
568 "random_state":random_state, #int, RandomState instance or None, optional (default=None)
569 "verbose":int(verbose), #int, default: 0
570 "max_features": max_features,#int, float, string or None, optional (default=None)
571 "max_leaf_nodes":max_leaf_nodes, #int or None, optional (default=None)
572 "warm_start":warm_start, #bool, default: False
573 "presort":presort, #deprecated, default=deprecated
574 "validation_fraction":float(validation_fraction), #float, optional, default 0.1
575 "n_iter_no_change":n_iter_no_change, #int, default None
576 "tol":float(tol),#default 1e-4
577 "ccp_alpha":float(ccpalpha)} #non-negative float, optional (default=0.0)
578
579
580 model = GradientBoostingClassifier(**pera)
581
582 #Fit_Model('GBC.tsv', 'Internal', model, 'Min_Max', os.getcwd(), os.path.join(os.getcwd(),'report_dir'), 'out.tsv', 'out.html', NoOfFolds=3)
583 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=int(NFolds), TestData=TestFile)
584
585
586 def RF_Classifier( n_estimators, criterion, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, max_leaf_nodes, min_impurity_decrease, min_impurity_split, bootstrap, oob_score, n_jobs, random_state, verbose, warm_start, ccp_alpha, max_samples, TrainFile, TestMethod, SelectedSclaer, NFolds, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
587
588 if max_depth == 'none':
589 max_depth = None
590 else:
591 max_depth = int(max_depth)
592
593 if '.' in min_samples_split:
594 min_samples_split = float(min_samples_split)
595 else:
596 min_samples_split = int(min_samples_split)
597
598 if '.' in min_samples_leaf:
599 min_samples_split = float(min_samples_leaf)
600 else:
601 min_samples_leaf = int(min_samples_leaf)
602
603 if max_features == 'auto':
604 max_features = 'auto'
605 else:
606 if '.' in max_features:
607 max_features = float(max_features)
608 else:
609 max_features = int(max_features)
610
611 if max_leaf_nodes == 'none':
612 max_leaf_nodes = None
613 else:
614 max_leaf_nodes = int(max_leaf_nodes)
615
616 if bootstrap == 'true':
617 bootstrap = True
618 else:
619 bootstrap = False
620
621 if oob_score == 'true':
622 oob_score = True
623 else:
624 oob_score = False
625
626 if n_jobs == 'none':
627 n_jobs = None
628 else:
629 n_jobs = int(n_jobs)
630
631 if random_state == 'none':
632 random_state = None
633 else:
634 random_state = int(random_state)
635
636 if warm_start == 'true':
637 warm_start = True
638 else:
639 warm_start = False
640
641 if max_samples == 'none':
642 max_samples = None
643 else:
644 if '.' in max_samples:
645 max_samples = float(max_samples)
646 else:
647 max_samples = int(max_samples)
648
649
650 pera = {
651 "n_estimators":int(n_estimators), #integer, optional (default=100)
652 "criterion":criterion, #string, optional (default='gini')
653 "max_depth":max_depth, #integer #or None, optional (default=None)
654 "min_samples_split":min_samples_split,# int, float, optional (default=2)
655 "min_samples_leaf":min_samples_leaf, #int, float, optional (default=1)
656 "min_weight_fraction_leaf":float(min_weight_fraction_leaf),#float, optional (default=0.)
657 "max_features":max_features, #int, float, string or None, optional (default='auto')
658 "max_leaf_nodes":max_leaf_nodes, #int or None, optional (default=None)
659 "min_impurity_decrease":float(min_impurity_decrease), #float, optional (default=0.)
660 "min_impurity_split":float(min_samples_split), #float, (default=1e-7)
661 "bootstrap":bootstrap, #boolean, optional (default=True)
662 "oob_score":oob_score, #bool (default=False)
663 "n_jobs":n_jobs, #int or None, optional (default=None)
664 "random_state":random_state, #int, RandomState instance or None, optional (default=None)
665 "verbose":int(verbose), #int, optional (default=0)
666 "warm_start":warm_start,#bool, optional (default=False)
667 "ccp_alpha":float(ccp_alpha),#non-negative float, optional (default=0.0)
668 "max_samples": max_samples #int or float, default=None
669 }
670
671 model = RandomForestClassifier(**pera)
672 #Fit_Model('GBC.tsv', 'Internal', model, 'Min_Max', os.getcwd(), os.path.join(os.getcwd(),'report_dir'), 'out.tsv', 'out.html', NoOfFolds=3)
673 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=int(NFolds), TestData=TestFile)
674
675
676 def LR_Classifier(penalty, dual, tol, C, fit_intercept, intercept_scaling, random_state, solver, max_iter, multi_class, verbose, warm_start, n_jobs, l1_ratio, TrainFile, TestMethod, SelectedSclaer, NFolds, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
677
678 if dual == 'true':
679 dual = True
680 else:
681 dual = False
682
683 if fit_intercept == "true":
684 fit_intercept = True
685 else:
686 fit_intercept = False
687
688 if random_state == 'none':
689 random_state = None
690 else:
691 random_state = int(random_state)
692
693 if warm_start == 'true':
694 warm_start = True
695 else:
696 warm_start = False
697
698 if n_jobs == "none":
699 n_jobs = None
700 else:
701 n_jobs = int(n_jobs)
702
703 if l1_ratio == "none":
704 l1_ratio = None
705 else:
706 l1_ratio =float(l1_ratio)
707
708 pera = {
709 "penalty":penalty, #l2
710 "dual":dual, #false
711 "tol":float(tol), #1e-4
712 "C":float(C), #1.0
713 "fit_intercept":fit_intercept, #True
714 "intercept_scaling":float(intercept_scaling), #1
715 "random_state":random_state, #None
716 "solver":solver, #lbfgs
717 "max_iter":int(max_iter), #100
718 "multi_class":multi_class, #auto
719 "verbose":int(verbose), #0
720 "warm_start":warm_start,#False
721 "n_jobs":n_jobs, #None
722 "l1_ratio":l1_ratio} #None
723
724 model = LogisticRegression(**pera)
725
726 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=int(NFolds), TestData=TestFile)
727
728
729 def KN_Classifier(n_neighbors, weights, algorithm, leaf_size, p, metric, metric_params, n_jobs, TrainFile, TestMethod, SelectedSclaer, NFolds, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
730
731 if n_jobs == 'none':
732 n_jobs = None
733 else:
734 n_jobs = int(n_jobs)
735
736 pera = {
737 "n_neighbors":int(n_neighbors),#int5
738 "weights":weights,
739 "algorithm":algorithm,
740 "leaf_size":int(leaf_size), #int30
741 "p":int(p), #int2
742 "metric":metric, #minkowski
743 "n_jobs":n_jobs} #none
744
745 model = KNeighborsClassifier(**pera)
746
747 #Fit_Model('GBC.tsv', 'Internal', model, 'Min_Max', os.getcwd(), os.path.join(os.getcwd(),'report_dir'), 'out.tsv', 'out.html', NoOfFolds=3)
748 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=int(NFolds), TestData=TestFile)
749
750 def GNB_Classifier( var_smoothing, TrainFile, TestMethod, SelectedSclaer, NFolds, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
751
752 pera = {
753 "var_smoothing":float(var_smoothing)} #
754
755 model = GaussianNB(**pera)
756
757 #Fit_Model('GBC.tsv', 'Internal', model, 'Min_Max', os.getcwd(), os.path.join(os.getcwd(),'report_dir'), 'out.tsv', 'out.html', NoOfFolds=3)
758 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=int(NFolds), TestData=TestFile)
759
760
761 def MLP_Classifier(hidden_layer_sizes, activation,solver,alpha,batch_size,learning_rate,learning_rate_init,power_t,max_iter,shuffle,random_state,tol,verbose,warm_start,momentum,nesterovs_momentum,early_stopping,validation_fraction,beta_1,beta_2,epsilon,n_iter_no_change,max_fun,TrainFile, TestMethod, SelectedSclaer, NFolds, Testspt, TestFile, OutFile, htmlOutDir, htmlFname, Workdirpath):
762
763 if shuffle == 'true':
764 shuffle = True
765 else:
766 shuffle = False
767
768 if nesterovs_momentum == 'true':
769 nesterovs_momentum = True
770 else:
771 nesterovs_momentum = False
772
773 if early_stopping == 'true':
774 early_stopping = True
775 else:
776 early_stopping = False
777
778 if random_state == 'none':
779 random_state = None
780 else:
781 random_state = int(random_state)
782
783 if verbose == 'false':
784 verbose = False
785 else:
786 verbose = True
787
788 if warm_start == 'true':
789 warm_start = True
790 else:
791 warm_start = False
792
793 pera ={
794 'hidden_layer_sizes':hidden_layer_sizes, #=(100,),
795 'activation':activation, #='relu',
796 'solver':solver, #='adam',
797 'alpha':alpha, #=0.0001,
798 'batch_size':batch_size, #='auto',
799 'learning_rate':learning_rate, #='constant',
800 'learning_rate_init':learning_rate_init, #=0.001,
801 'power_t':power_t, #=0.5,
802 'max_iter':max_iter, #=200,
803 'shuffle':shuffle, #=True,
804 'random_state':random_state, #=None,
805 'tol':tol, #=0.0001,
806 'verbose':verbose, #=False,
807 'warm_start':warm_start, #=False,
808 'momentum':momentum, #=0.9,
809 'nesterovs_momentum':nesterovs_momentum, #=True,
810 'early_stopping':early_stopping, #=False,
811 'validation_fraction':validation_fraction, #=0.1,
812 'beta_1':beta_1, #=0.9,
813 'beta_2':beta_2, #=0.999,
814 'epsilon':epsilon, #=1e-08,
815 'n_iter_no_change':n_iter_no_change, #=10,
816 'max_fun':max_fun #=15000
817 }
818
819 model = MLPClassifier(**pera)
820
821 Fit_Model(TrainData=TrainFile, Test_Method=TestMethod, Algo=model, Selected_Sclaer=SelectedSclaer, Workdirpath=Workdirpath, htmlOutDir=htmlOutDir, OutFile=OutFile, htmlFname=htmlFname, NoOfFolds=NFolds, TestSize=Testspt, TestData=TestFile)
822
823
824 if __name__=="__main__":
825
826 import argparse
827
828 parser = argparse.ArgumentParser(description='Deployment tool')
829 subparsers = parser.add_subparsers()
830
831 svmc = subparsers.add_parser('SVMC')
832 svmc.add_argument("--C", required=False, default=1.0, help="Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty.")
833 svmc.add_argument("--kernel", required=False, default='rbf', help="Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape (n_samples, n_samples).")
834 svmc.add_argument("--degree", required=False, default=3, help="Degree of the polynomial kernel function ('poly'). Ignored by all other kernels.")
835 svmc.add_argument("--gamma", required=False, default='scale', help="Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. if gamma='scale' (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, if 'auto', uses 1 / n_features.")
836 svmc.add_argument("--coef0", required=False, default=0.0, help="Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'.")
837 svmc.add_argument("--shrinking", required=False, default=True, help="Whether to use the shrinking heuristic.")
838 svmc.add_argument("--probability", required=False, default=True, help="Whether to enable probability estimates. This must be enabled prior to calling fit, will slow down that method as it internally uses 5-fold cross-validation, and predict_proba may be inconsistent with predict")
839 svmc.add_argument("--tol", required=False, default=0.001, help="Tolerance for stopping criterion.")
840 svmc.add_argument("--cache_size", required=False, default=200, help="Specify the size of the kernel cache (in MB).")
841 svmc.add_argument("--verbose", required=False, default=False, help="Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context.")
842 svmc.add_argument("--max_iter", required=False, default=-1, help="Hard limit on iterations within solver, or -1 for no limit.")
843 svmc.add_argument("--decision_function_shape", required=False, default='ovr', help="Whether to return a one-vs-rest ('ovr') decision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one ('ovo') decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one ('ovo') is always used as multi-class strategy.")
844 svmc.add_argument("--randomState", required=False, default=None, help="The seed of the pseudo random number generator used when shuffling the data for probability estimates. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.")
845 svmc.add_argument("--breakties", required=False, default=False, help="If true, decision_function_shape='ovr', and number of classes > 2, predict will break ties according to the confidence values of decision_function; otherwise the first class among the tied classes is returned. Please note that breaking ties comes at a relatively high computational cost compared to a simple predict." )
846 svmc.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
847 svmc.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
848 svmc.add_argument("--SelectedSclaer", required=True, help="'Min_Max','Standard_Scaler','No_Scaler'")
849 svmc.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
850 svmc.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
851 svmc.add_argument("--OutFile", required=False, default='Out.csv', help="Out.csv")
852 svmc.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
853 svmc.add_argument("--htmlFname", required=False, default='Out.html', help="HTML out file")
854 svmc.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
855
856 sgdc = subparsers.add_parser('SGDC')
857 sgdc.add_argument("--loss", required=False, default='log', help="The loss function to be used. Defaults to 'hinge', which gives a linear SVM. The possible options are 'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', or a regression loss: 'squared_loss', 'huber', 'epsilon_insensitive', or squared_epsilon_insensitive'.")
858 sgdc.add_argument("--penalty", required=False, default='l2', help="The penalty (aka regularization term) to be used. Defaults to 'l2' which is the standard regularizer for linear SVM models. 'l1' and 'elasticnet' might bring sparsity to the model (feature selection) not achievable with 'l2'.")
859 sgdc.add_argument("--alpha", required=False, default=0.0001, help="Constant that multiplies the regularization term. Defaults to 0.0001. Also used to compute learning_rate when set to 'optimal'.")
860 sgdc.add_argument("--l1_ratio", required=False, default=0.15, help="The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Defaults to 0.15.")
861 sgdc.add_argument("--fit_intercept", required=False, default=True, help="Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. Defaults to True.")
862 sgdc.add_argument("--max_iter", required=False, default=1000, help="The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the fit method, and not the partial_fit method.")
863 sgdc.add_argument("--tol", required=False, default=0.001, help="The stopping criterion. If it is not None, the iterations will stop when (loss > best_loss - tol) for n_iter_no_change consecutive epochs.")
864 sgdc.add_argument("--shuffle", required=False, default=True, help="Whether or not the training data should be shuffled after each epoch. Defaults to True.")
865 sgdc.add_argument("--verbose", required=False, default=0, help="The verbosity level.")
866 sgdc.add_argument("--epsilon", required=False, default=0.1, help="Epsilon in the epsilon-insensitive loss functions; only if loss is 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'. For 'huber', determines the threshold at which it becomes less important to get the prediction exactly right. For epsilon-insensitive, any differences between the current prediction and the correct label are ignored if they are less than this threshold.")
867 sgdc.add_argument("--n_jobs", required=False, default='none', help="The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See Glossary for more details.")
868 sgdc.add_argument("--random_state", required=False, default='none', help="The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.")
869 sgdc.add_argument("--learning_rate", required=False, default='optimal', help="The learning rate schedule:")
870 sgdc.add_argument("--eta0", required=False, default=0.0, help="eta = eta0")
871 sgdc.add_argument("--power_t", required=False, default=0.5, help="eta = 1.0 / (alpha * (t + t0)) where t0 is chosen by a heuristic proposed by Leon Bottou.")
872 sgdc.add_argument("--early_stopping", required=False, default=False, help="MinMaxScaler")
873 sgdc.add_argument("--validation_fraction", required=False, default=0.1, help="The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True.")
874 sgdc.add_argument("--n_iter_no_change", required=False, default=5, help="Number of iterations with no improvement to wait before early stopping.")
875 sgdc.add_argument("--warm_start", required=False, default=False, help="When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution.")
876 sgdc.add_argument("--average", required=False, default=False, help="MinMaxScaler")
877 sgdc.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
878 sgdc.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
879 sgdc.add_argument("--SelectedSclaer", required=True, help="'Min_Max','Standard_Scaler','No_Scaler'")
880 sgdc.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
881 sgdc.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
882 sgdc.add_argument("--OutFile", required=False, default='Out.csv', help="float, Max=1.0")
883 sgdc.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
884 sgdc.add_argument("--htmlFname", required=False, default='Out.html', help="")
885 sgdc.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
886
887 dtc = subparsers.add_parser('DTC')
888 dtc.add_argument("--criterion", required=False, default='gini', help="The function to measure the quality of a split. Supported criteria are 'gini' for the Gini impurity and 'entropy' for the information gain.")
889 dtc.add_argument("--splitter", required=False, default='best', help="The strategy used to choose the split at each node. Supported strategies are 'best' to choose the best split and 'random' to choose the best random split." )
890 dtc.add_argument("--max_depth", required=False, default='none', help="The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.")
891 dtc.add_argument("--min_samples_split", required=False, default='2', help="The minimum number of samples required to split an internal node: If int, then consider min_samples_split as the minimum number. If float, then min_samples_split is a fraction and ceil(min_samples_split * n_samples) are the minimum number of samples for each split.")
892 dtc.add_argument("--min_samples_leaf", required=False, default='1', help="The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least min_samples_leaf training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression.")
893 dtc.add_argument("--min_weight_fraction_leaf", required=False, default=0.0, help="The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.")
894 dtc.add_argument("--random_state", required=False, default='none', help="If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.")
895 dtc.add_argument("--max_leaf_nodes", required=False, default='none', help="A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following")
896 dtc.add_argument("--min_impurity_decrease", required=False, default=0.0, help="")
897 dtc.add_argument("--min_impurity_split", required=False, default=1e-09, help="Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf.")
898 dtc.add_argument("--presort", required=False, default='deprecate', help="This parameter is deprecated and will be removed in v0.24.")
899 dtc.add_argument("--ccpalpha", required=False, default=0.0, help="Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. See Minimal Cost-Complexity Pruning for details.")
900 dtc.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
901 dtc.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
902 dtc.add_argument("--max_features", required=False, default='none')
903 dtc.add_argument("--SelectedSclaer", required=True, help="'Min_Max',Standard_Scaler','No_Scaler'")
904 dtc.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
905 dtc.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
906 dtc.add_argument("--OutFile", required=False, default='Out.csv', help="Out.tsv")
907 dtc.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
908 dtc.add_argument("--htmlFname", required=False, default='Out.html', help="")
909 dtc.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
910
911 gbc = subparsers.add_parser('GBC')
912 gbc.add_argument("--loss", required=False, default='deviance', help="loss function to be optimized. 'deviance' refers to deviance (= logistic regression) for classification with probabilistic outputs. For loss 'exponential' gradient boosting recovers the AdaBoost algorithm.")
913 gbc.add_argument("--learning_rate", required=False, default=0.1, help="learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.")
914 gbc.add_argument("--n_estimators", required=False, default=100, help="The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.")
915 gbc.add_argument("--subsample", required=False, default=1.0, help="The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample < 1.0 leads to a reduction of variance and an increase in bias.")
916 gbc.add_argument("--criterion", required=False,default='friedman_mse', help="The function to measure the quality of a split. Supported criteria are 'friedman_mse' for the mean squared error with improvement score by Friedman, 'mse' for mean squared error, and 'mae' for the mean absolute error. The default value of 'friedman_mse' is generally the best as it can provide a better approximation in some cases.")
917 gbc.add_argument("--min_samples_split", required=False, default='2', help="The minimum number of samples required to split an internal node: If int, then consider min_samples_split as the minimum number. If float, then min_samples_split is a fraction and ceil(min_samples_split * n_samples) are the minimum number of samples for each split.")
918 gbc.add_argument("--min_samples_leaf", required=False, default='1', help="The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least min_samples_leaf training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression.If int, then consider min_samples_leaf as the minimum number. If float, then min_samples_leaf is a fraction and ceil(min_samples_leaf * n_samples) are the minimum number of samples for each node.")
919 gbc.add_argument("--min_weight_fraction_leaf", required=False, default=0, help="The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.")
920 gbc.add_argument("--max_depth", required=False, default=3, help="maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.")
921 gbc.add_argument("--min_impurity_decrease", required=False, default=0.0, help="A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following: 'N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity'), where N is the total number of samples, N_t is the number of samples at the current node, N_t_L is the number of samples in the left child, and N_t_R is the number of samples in the right child. N, N_t, N_t_R and N_t_L all refer to the weighted sum, if sample_weight is passed. New in version 0.19.")
922 gbc.add_argument("--min_impurity_split", required=False, default=0.00000007, help="Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf.")
923 gbc.add_argument("--init", required=False,default='none', help="An estimator object that is used to compute the initial predictions. init has to provide fit and predict_proba. If 'zero', the initial raw predictions are set to zero. By default, a DummyEstimator predicting the classes priors is used.")
924 gbc.add_argument("--random_state", required=False, default='none', help="If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.")
925 gbc.add_argument("--max_features", required=False, default='none', help="The number of features to consider when looking for the best split: If int, then consider max_features features at each split. If float, then max_features is a fraction and int(max_features * n_features) features are considered at each split.If 'auto', then max_features=sqrt(n_features). If 'sqrt', then max_features=sqrt(n_features). If 'log2', then max_features=log2(n_features). If None, then max_features=n_features. Choosing max_features < n_features leads to a reduction of variance and an increase in bias. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than max_features features.")
926 gbc.add_argument("--verbose",required=False, default=0, help="Enable verbose output. If 1 then it prints progress and performance once in a while (the more trees the lower the frequency). If greater than 1 then it prints progress and performance for every tree.")
927 gbc.add_argument("--max_leaf_nodes", required=False, default=4, help="Grow trees with max_leaf_nodes in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.")
928 gbc.add_argument("--warm_start", required=False, default='false', help="When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the previous solution." )
929 gbc.add_argument("--presort", required=False,default='auto', help="This parameter is deprecated and will be removed in v0.24.")
930 gbc.add_argument("--validation_fraction", required=False, default=0.1, help="The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if n_iter_no_change is set to an integer.")
931 gbc.add_argument("--n_iter_no_change", required=False, default=10, help="n_iter_no_change is used to decide if early stopping will be used to terminate training when validation score is not improving. By default it is set to None to disable early stopping. If set to a number, it will set aside validation_fraction size of the training data as validation and terminate training when validation score is not improving in all of the previous n_iter_no_change numbers of iterations. The split is stratified.")
932 gbc.add_argument("--tol", required=False, default=0.0001, help="Tolerance for the early stopping. When the loss is not improving by at least tol for n_iter_no_change iterations (if set to a number), the training stops.")
933 gbc.add_argument("--ccpalpha", required=False, default=0.0, help="Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. See Minimal Cost-Complexity Pruning for details.")
934 gbc.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
935 gbc.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
936 gbc.add_argument("--SelectedSclaer", required=True, help="'Min_Max',Standard_Scaler','No_Scaler'")
937 gbc.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
938 gbc.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
939 gbc.add_argument("--OutFile", required=False, default='Out.csv', help="Out.tsv")
940 gbc.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
941 gbc.add_argument("--htmlFname", required=False, default='Out.html', help="")
942 gbc.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
943
944 rfc = subparsers.add_parser('RFC')
945 rfc.add_argument("--n_estimators", required=False, default=100, help="The number of trees in the forest.")
946 rfc.add_argument("--criterion", required=False, default='gini', help="The function to measure the quality of a split. Supported criteria are 'gini' for the Gini impurity and 'entropy' for the information gain. Note: this parameter is tree-specific." )
947 rfc.add_argument("--max_depth", required=False, default='none', help="The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.")
948 rfc.add_argument("--min_samples_split", required=False, default='2', help="The minimum number of samples required to split an internal node: If int, then consider min_samples_split as the minimum number. If float, then min_samples_split is a fraction and ceil(min_samples_split * n_samples) are the minimum number of samples for each split.")
949 rfc.add_argument("--min_samples_leaf", required=False, default='1', help="The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least min_samples_leaf training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression.")
950 rfc.add_argument("--min_weight_fraction_leaf", required=False, default=0.0, help="The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.")
951 rfc.add_argument("--max_features", required=False, default='auto', help="The number of features to consider when looking for the best split:")
952 rfc.add_argument("--max_leaf_nodes", required=False, default='none', help="Grow trees with max_leaf_nodes in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.")
953 rfc.add_argument("--min_impurity_decrease", required=False, default=0.0, help="A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where N is the total number of samples, N_t is the number of samples at the current node, N_t_L is the number of samples in the left child, and N_t_R is the number of samples in the right child. N, N_t, N_t_R and N_t_L all refer to the weighted sum, if sample_weight is passed. New in version 0.19.")
954 rfc.add_argument("--min_impurity_split", required=False, default=1e-7, help="Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf.")
955 rfc.add_argument("--bootstrap", required=False, default='true', help="Whether bootstrap samples are used when building trees. If False, the whole datset is used to build each tree.")
956 rfc.add_argument("--oob_score", required=False, default='false', help="Whether to use out-of-bag samples to estimate the generalization accuracy.")
957 rfc.add_argument("--n_jobs", required=False, default=-1, help="The number of jobs to run in parallel. fit, predict, decision_path and apply are all parallelized over the trees. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See Glossary for more details." )
958 rfc.add_argument("--random_state", required=False, default='none', help="Controls both the randomness of the bootstrapping of the samples used when building trees (if bootstrap=True) and the sampling of the features to consider when looking for the best split at each node (if max_features < n_features). See Glossary for details.")
959 rfc.add_argument("--verbose", required=False, default=0, help="Controls the verbosity when fitting and predicting." )
960 rfc.add_argument("--max_samples", required=False, default='none', help="")
961 rfc.add_argument("--ccp_alpha", required=False, default=0.0, help="")
962 rfc.add_argument("--warm_start", required=False, default='false', help="When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See the Glossary.")
963 rfc.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
964 rfc.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
965 rfc.add_argument("--SelectedSclaer", required=True, help="'Min_Max',Standard_Scaler','No_Scaler'")
966 rfc.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
967 rfc.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
968 rfc.add_argument("--OutFile", required=False, default='Out.csv', help="Out.tsv")
969 rfc.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
970 rfc.add_argument("--htmlFname", required=False, default='Out.html', help="")
971 rfc.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
972
973 lrc = subparsers.add_parser('LRC')
974 lrc.add_argument("--penalty", required=False, default='l2', help="Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver. If 'none' (not supported by the liblinear solver), no regularization is applied." )
975 lrc.add_argument("--dual", required=False, default='false', help="Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features.")
976 lrc.add_argument("--tol", required=False, default=0.0001, help="Tolerance for stopping criteria.")
977 lrc.add_argument("--C", required=False, default=1.0, help="Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization." )
978 lrc.add_argument("--fit_intercept", required=False, default='true', help="Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function." )
979 lrc.add_argument("--intercept_scaling", required=False, default=1, help="Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight." )
980 lrc.add_argument("--random_state", required=False, default=10, help="The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. Used when solver == 'sag' or 'liblinear'.")
981 lrc.add_argument("--solver", required=False, default='lbfgs', help="Algorithm to use in the optimization problem. For small datasets, 'liblinear' is a good choice, whereas 'sag' and 'saga' are faster for large ones. For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs' handle multinomial loss; 'liblinear' is limited to one-versus-rest schemes. 'newton-cg', 'lbfgs', 'sag' and 'saga' handle L2 or no penalty 'liblinear' and 'saga' also handle L1 penalty 'saga' also supports 'elasticnet' penalty 'liblinear' does not support setting penalty='none' Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. New in version 0.17: Stochastic Average Gradient descent solver.")
982 lrc.add_argument("--max_iter", required=False, default=100, help="Maximum number of iterations taken for the solvers to converge."),
983 lrc.add_argument("--multi_class", required=False, default='auto', help="If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, even when the data is binary. 'multinomial' is unavailable when solver='liblinear'. 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', and otherwise selects 'multinomial'. New in version 0.18: Stochastic Average Gradient descent solver for 'multinomial' case.")
984 lrc.add_argument("--verbose", required=False, default=0, help="For the liblinear and lbfgs solvers set verbose to any positive number for verbosity.")
985 lrc.add_argument("--warm_start", required=False, default='false', help="When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. Useless for liblinear solver. See the Glossary. New in version 0.17: warm_start to support lbfgs, newton-cg, sag, saga solvers.")
986 lrc.add_argument("--n_jobs", required=False, default='none', help="Number of CPU cores used when parallelizing over classes if multi_class='ovr'. This parameter is ignored when the solver is set to 'liblinear' regardless of whether 'multi_class' is specified or not. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See Glossary for more details." )
987 lrc.add_argument("--l1_ratio", required=False, default='none', help="The Elastic-Net mixing parameter, with 0 <= l1_ratio <= 1. Only used if penalty='elasticnet'. Setting 'l1_ratio=0 is equivalent to using penalty='l2', while setting l1_ratio=1 is equivalent to using penalty='l1'. For 0 < l1_ratio <1, the penalty is a combination of L1 and L2.")
988 lrc.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
989 lrc.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
990 lrc.add_argument("--SelectedSclaer", required=True, help="'Min_Max',Standard_Scaler','No_Scaler'")
991 lrc.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
992 lrc.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
993 lrc.add_argument("--OutFile", required=False, default='Out.csv', help="Out.tsv")
994 lrc.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
995 lrc.add_argument("--htmlFname", required=False, default='Out.html', help="")
996 lrc.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
997
998 knc = subparsers.add_parser('KNC')
999 knc.add_argument("--n_neighbors", required=False, default=5, help="Number of neighbors to use by default for kneighbors queries.")
1000 knc.add_argument("--weights",required=False, default='uniform', help="weight function used in prediction. Possible values: 'uniform' : uniform weights. All points in each neighborhood are weighted equally. 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights.")
1001 knc.add_argument("--algorithm", required=False, default='auto', help="Algorithm used to compute the nearest neighbors:'ball_tree' will use BallTree 'kd_tree' will use KDTree 'brute' will use a brute-force search. 'auto' will attempt to decide the most appropriate algorithm based on the values passed to fit method. Note: fitting on sparse input will override the setting of this parameter, using brute force." )
1002 knc.add_argument("--leaf_size", required=False, default=30, help="Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem.")
1003 knc.add_argument("--p", required=False, default=2, help="Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." )
1004 knc.add_argument("--metric", required=False, default='minkowski', help="the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of the DistanceMetric class for a list of available metrics. If metric is 'precomputed', X is assumed to be a distance matrix and must be square during fit. X may be a Glossary, in which case only 'nonzero' elements may be considered neighbors.")
1005 knc.add_argument("--metric_params", required=False, default=None, help="Additional keyword arguments for the metric function." )
1006 knc.add_argument("--n_jobs", required=False, default='none', help="The number of parallel jobs to run for neighbors search. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See Glossary for more details. Doesn't affect fit method.")
1007 knc.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
1008 knc.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
1009 knc.add_argument("--SelectedSclaer", required=True, help="'Min_Max',Standard_Scaler','No_Scaler'")
1010 knc.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
1011 knc.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
1012 knc.add_argument("--OutFile", required=False, default='Out.csv', help="Out.tsv")
1013 knc.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
1014 knc.add_argument("--htmlFname", required=False, default='Out.html', help="")
1015 knc.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
1016
1017 gnbc = subparsers.add_parser('GNBC')
1018 #gnbc.add_argument("--priors", required=False, default=None, help="Prior probabilities of the classes. If specified the priors are not adjusted according to the data.")
1019 gnbc.add_argument("--var_smoothing", required=False, default=1e-09, help="Portion of the largest variance of all features that is added to variances for calculation stability.")
1020 gnbc.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
1021 gnbc.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
1022 gnbc.add_argument("--SelectedSclaer", required=True, help="'Min_Max',Standard_Scaler','No_Scaler'")
1023 gnbc.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
1024 gnbc.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
1025 gnbc.add_argument("--OutFile", required=False, default='Out.csv', help="Out.tsv")
1026 gnbc.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
1027 gnbc.add_argument("--htmlFname", required=False, default='Out.html', help="")
1028 gnbc.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
1029
1030 MLP = subparsers.add_parser('MLP')
1031 MLP.add_argument("--hidden_layer_sizes", required=False, default=(100,), help="")
1032 MLP.add_argument("--activation", required=False, default='relu', help="")
1033 MLP.add_argument("--solver", required=False, default='adam', help="")
1034 MLP.add_argument("--alpha", required=False, default=0.0001 , help="")
1035 MLP.add_argument("--batch_size", required=False, default='auto', help="")
1036 MLP.add_argument("--learning_rate", required=False, default='constant', help="")
1037 MLP.add_argument("--learning_rate_init", required=False, default=0.001, help="")
1038 MLP.add_argument("--power_t", required=False, default=0.5, help="")
1039 MLP.add_argument("--max_iter", required=False, default=200, help="")
1040 MLP.add_argument("--shuffle", required=False, default='true', help="")
1041 MLP.add_argument("--random_state", required=False, default='none', help="")
1042 MLP.add_argument("--tol", required=False, default=0.0001, help="")
1043 MLP.add_argument("--verbose", required=False, default='false', help="")
1044 MLP.add_argument("--warm_start", required=False, default='false', help="")
1045 MLP.add_argument("--momentum", required=False, default=0.9, help="")
1046 MLP.add_argument("--nesterovs_momentum", required=False, default='true' ,help="")
1047 MLP.add_argument("--early_stopping", required=False, default='false' ,help="")
1048 MLP.add_argument("--validation_fraction", required=False, default=0.1 ,help="")
1049 MLP.add_argument("--beta_1", required=False, default=0.9, help="")
1050 MLP.add_argument("--beta_2", required=False , default=0.999, help="")
1051 MLP.add_argument("--epsilon", required=False, default=1e-08, help="")
1052 MLP.add_argument("--n_iter_no_change", required=False, default=10, help="")
1053 MLP.add_argument("--max_fun", required=False, default=15000, help="")
1054 MLP.add_argument("--TrainFile", required=True, default=None, help="Positive negative dataset Ex. 'Train.csv'")
1055 MLP.add_argument("--TestMethod", required=True, default=None, help="Internal','CrossVal', 'External', 'Predict'")
1056 MLP.add_argument("--SelectedSclaer", required=True, help="'Min_Max',Standard_Scaler','No_Scaler'")
1057 MLP.add_argument("--NFolds", required=False, default=5, help="int, Max=10")
1058 MLP.add_argument("--Testspt", required=False, default=0.2, help="float, Max=1.0")
1059 MLP.add_argument("--TestFile", required=False, default=None, help="Test data, 'Test.csv'")
1060 MLP.add_argument("--OutFile", required=False, default='Out.csv', help="Out.tsv")
1061 MLP.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
1062 MLP.add_argument("--htmlFname", required=False, help="HTML out file", default="jai.html")
1063 MLP.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
1064
1065 args = parser.parse_args()
1066
1067 if sys.argv[1] == 'SVMC':
1068 SVM_Classifier(args.C, args.kernel, args.degree, args.gamma, args.coef0, args.shrinking, args.probability, args.tol, args.cache_size, args.verbose, args.max_iter, args.decision_function_shape, args.randomState, args.breakties, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1069 elif sys.argv[1] == 'SGDC':
1070 SGD_Classifier( args.loss, args.penalty, args.alpha, args.l1_ratio, args.fit_intercept, args.max_iter, args.tol, args.shuffle, args.verbose, args.epsilon, args.n_jobs, args.random_state, args.learning_rate, args.eta0, args.power_t, args.early_stopping, args.validation_fraction, args.n_iter_no_change, args.warm_start, args.average, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1071 elif sys.argv[1] == 'DTC':
1072 DT_Classifier(args.criterion, args.splitter, args.max_depth, args.min_samples_split, args.min_samples_leaf, args.min_weight_fraction_leaf, args.random_state, args.max_leaf_nodes, args.min_impurity_decrease, args.min_impurity_split, args.presort, args.ccpalpha, args.max_features, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1073 elif sys.argv[1] == 'GBC':
1074 GB_Classifier(args.loss, args.learning_rate, args.n_estimators, args.subsample, args.criterion, args.min_samples_split, args.min_samples_leaf, args.min_weight_fraction_leaf, args.max_depth, args.min_impurity_decrease, args.min_impurity_split, args.init, args.random_state, args.verbose, args.max_leaf_nodes, args.warm_start, args.presort, args.validation_fraction, args.n_iter_no_change, args.tol, args.ccpalpha, args.max_features, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1075 elif sys.argv[1] == 'RFC':
1076 RF_Classifier( args.n_estimators, args.criterion, args.max_depth, args.min_samples_split, args.min_samples_leaf, args.min_weight_fraction_leaf, args.max_features, args.max_leaf_nodes, args.min_impurity_decrease, args.min_impurity_split, args.bootstrap, args.oob_score, args.n_jobs, args.random_state, args.verbose, args.warm_start, args.ccp_alpha, args.max_samples, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1077 elif sys.argv[1] == 'LRC':
1078 LR_Classifier(args.penalty, args.dual, args.tol, args.C, args.fit_intercept, args.intercept_scaling, args.random_state, args.solver, args.max_iter, args.multi_class, args.verbose, args.warm_start, args.n_jobs, args.l1_ratio, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1079 elif sys.argv[1] == 'KNC':
1080 KN_Classifier(args.n_neighbors, args.weights, args.algorithm, args.leaf_size, args.p, args.metric, args.metric_params, args.n_jobs, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1081 elif sys.argv[1] == 'GNBC':
1082 GNB_Classifier( args.var_smoothing, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1083 elif sys.argv[1] == 'MLP' :
1084 MLP_Classifier(args.hidden_layer_sizes, args.activation, args.solver, args.alpha, args.batch_size, args.learning_rate, args.learning_rate_init, args.power_t, args.max_iter, args.shuffle, args.random_state, args.tol, args.verbose, args.warm_start, args.momentum, args.nesterovs_momentum, args.early_stopping, args.validation_fraction, args.beta_1, args.beta_2, args.epsilon, args.n_iter_no_change, args.max_fun, args.TrainFile, args.TestMethod, args.SelectedSclaer, args.NFolds, args.Testspt, args.TestFile, args.OutFile, args.htmlOutDir, args.htmlFname, args.Workdirpath)
1085 else:
1086 print ("option not correct")
1087 exit()
1088