comparison model_validation.xml @ 16:86e1e2874460 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author bgruening
date Sun, 30 Dec 2018 02:02:32 -0500
parents e244d6f2df1a
children cf9aa11b91c8
comparison
equal deleted inserted replaced
15:33d2606fdb3f 16:86e1e2874460
21 import pandas 21 import pandas
22 import numpy as np 22 import numpy as np
23 from sklearn import preprocessing, model_selection, svm, linear_model, ensemble, naive_bayes, tree, neighbors 23 from sklearn import preprocessing, model_selection, svm, linear_model, ensemble, naive_bayes, tree, neighbors
24 from sklearn.pipeline import Pipeline 24 from sklearn.pipeline import Pipeline
25 25
26 exec(open("$__tool_directory__/utils.py").read(), globals()) 26 exec(open('$__tool_directory__/utils.py').read(), globals())
27
28 warnings.filterwarnings('ignore')
27 29
28 safe_eval = SafeEval() 30 safe_eval = SafeEval()
29 31
30 input_json_path = sys.argv[1] 32 input_json_path = sys.argv[1]
31 with open(input_json_path, "r") as param_handler: 33 with open(input_json_path, 'r') as param_handler:
32 params = json.load(param_handler) 34 params = json.load(param_handler)
33 35
34 input_type = params["input_options"]["selected_input"] 36 input_type = params['input_options']['selected_input']
35 if input_type=="tabular": 37 if input_type == 'tabular':
36 header = 'infer' if params["input_options"]["header1"] else None 38 header = 'infer' if params['input_options']['header1'] else None
37 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] 39 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option']
38 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 40 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
39 c = params["input_options"]["column_selector_options_1"]["col1"] 41 c = params['input_options']['column_selector_options_1']['col1']
40 else: 42 else:
41 c = None 43 c = None
42 X = read_columns( 44 X = read_columns(
43 "$input_options.infile1", 45 '$input_options.infile1',
44 c = c, 46 c = c,
45 c_option = column_option, 47 c_option = column_option,
46 sep='\t', 48 sep='\t',
47 header=header, 49 header=header,
48 parse_dates=True 50 parse_dates=True
49 ) 51 )
50 else: 52 else:
51 X = mmread("$input_options.infile1") 53 X = mmread('$input_options.infile1')
52 54
53 header = 'infer' if params["input_options"]["header2"] else None 55 header = 'infer' if params['input_options']['header2'] else None
54 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] 56 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2']
55 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 57 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
56 c = params["input_options"]["column_selector_options_2"]["col2"] 58 c = params['input_options']['column_selector_options_2']['col2']
57 else: 59 else:
58 c = None 60 c = None
59 y = read_columns( 61 y = read_columns(
60 "$input_options.infile2", 62 '$input_options.infile2',
61 c = c, 63 c = c,
62 c_option = column_option, 64 c_option = column_option,
63 sep='\t', 65 sep='\t',
64 header=header, 66 header=header,
65 parse_dates=True 67 parse_dates=True
66 ) 68 )
67 y=y.ravel() 69 y=y.ravel()
68 70
69 options = params["model_validation_functions"]["options"] 71 options = params['model_validation_functions']['options']
70 options['cv'] = get_cv( options['cv'] ) 72 splitter, groups = get_cv( options.pop('cv_selector') )
73 if groups is None:
74 options['cv'] = splitter
75 elif groups == '':
76 options['cv'] = list( splitter.split(X, y, groups=None) )
77 else:
78 options['cv'] = list( splitter.split(X, y, groups=groups) )
71 options['n_jobs'] = N_JOBS 79 options['n_jobs'] = N_JOBS
72 if 'scoring' in options: 80 if 'scoring' in options:
73 options['scoring'] = get_scoring(options['scoring']) 81 options['scoring'] = get_scoring(options['scoring'])
74 if 'pre_dispatch' in options and options['pre_dispatch'] == '': 82 if 'pre_dispatch' in options and options['pre_dispatch'] == '':
75 options['pre_dispatch'] = None 83 options['pre_dispatch'] = None
76 84
77 pipeline_steps = [] 85 pipeline_steps = []
78 86
79 ## Set up pre_processor and add to pipeline steps. 87 ## Set up pre_processor and add to pipeline steps.
80 if params['pre_processing']['do_pre_processing'] == 'Yes': 88 if params['pre_processing']['do_pre_processing'] == 'Yes':
81 preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"] 89 preprocessor = params['pre_processing']['pre_processors']['selected_pre_processor']
82 pre_processor_options = params["pre_processing"]["pre_processors"]["options"] 90 pre_processor_options = params['pre_processing']['pre_processors']['options']
83 my_class = getattr(preprocessing, preprocessor) 91 my_class = getattr(preprocessing, preprocessor)
84 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) 92 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) )
85 93
86 ## Set up feature selector and add to pipeline steps. 94 ## Set up feature selector and add to pipeline steps.
87 if params['feature_selection']['do_feature_selection'] == 'Yes': 95 if params['feature_selection']['do_feature_selection'] == 'Yes':
88 feature_selector = feature_selector(params['feature_selection']['fs_algorithm_selector']) 96 feature_selector = feature_selector(params['feature_selection']['fs_algorithm_selector'])
89 pipeline_steps.append( ('feature_selector', feature_selector) ) 97 pipeline_steps.append( ('feature_selector', feature_selector) )
90 98
91 ## Set up estimator and add to pipeline. 99 ## Set up estimator and add to pipeline.
92 estimator_json = params["model_validation_functions"]['estimator_selector'] 100 estimator_json = params['model_validation_functions']['estimator_selector']
93 estimator = get_estimator(estimator_json) 101 estimator = get_estimator(estimator_json)
94 102
95 pipeline_steps.append( ('estimator', estimator) ) 103 pipeline_steps.append( ('estimator', estimator) )
96 104
97 pipeline = Pipeline(pipeline_steps) 105 pipeline = Pipeline(pipeline_steps)
98 106
99 ## Set up validator, run pipeline through validator and return results. 107 ## Set up validator, run pipeline through validator and return results.
100 108
101 validator = params["model_validation_functions"]["selected_function"] 109 validator = params['model_validation_functions']['selected_function']
102 validator = getattr(model_selection, validator) 110 validator = getattr(model_selection, validator)
103 111
104 selected_function = params["model_validation_functions"]["selected_function"] 112 selected_function = params['model_validation_functions']['selected_function']
105 rval_type = params["model_validation_functions"].get("return_type", None) 113 rval_type = params['model_validation_functions'].get('return_type', None)
106 114
107 if selected_function == 'cross_validate': 115 if selected_function == 'cross_validate':
108 res = validator(pipeline, X, y, **options) 116 res = validator(pipeline, X, y, **options)
109 rval = res[rval_type] 117 rval = res[rval_type]
110 elif selected_function == 'learning_curve': 118 elif selected_function == 'learning_curve':
112 train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options) 120 train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options)
113 rval = eval(rval_type) 121 rval = eval(rval_type)
114 elif selected_function == 'permutation_test_score': 122 elif selected_function == 'permutation_test_score':
115 score, permutation_scores, pvalue = validator(pipeline, X, y, **options) 123 score, permutation_scores, pvalue = validator(pipeline, X, y, **options)
116 rval = eval(rval_type) 124 rval = eval(rval_type)
117 if rval_type in ["score", "pvalue"]: 125 if rval_type in ['score', 'pvalue']:
118 rval = [rval] 126 rval = [rval]
119 elif selected_function == 'validation_curve': 127 elif selected_function == 'validation_curve':
120 options['param_name'] = 'estimator__' + options['param_name'] 128 options['param_name'] = 'estimator__' + options['param_name']
121 options['param_range'] = eval(options['param_range']) 129 options['param_range'] = eval(options['param_range'])
122 train_scores, test_scores = validator(pipeline, X, y, **options) 130 train_scores, test_scores = validator(pipeline, X, y, **options)
123 rval = eval(rval_type) 131 rval = eval(rval_type)
124 else: 132 else:
125 rval = validator(pipeline, X, y, **options) 133 rval = validator(pipeline, X, y, **options)
126 134
127 rval = pandas.DataFrame(rval) 135 rval = pandas.DataFrame(rval)
128 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) 136 rval.to_csv(path_or_buf='$outfile', sep='\t', header=False, index=False)
129 137
130 ]]> 138 ]]>
131 </configfile> 139 </configfile>
132 </configfiles> 140 </configfiles>
133 <inputs> 141 <inputs>
149 <option value="No" selected="true"/> 157 <option value="No" selected="true"/>
150 <option value="Yes"/> 158 <option value="Yes"/>
151 </param> 159 </param>
152 <when value="No"/> 160 <when value="No"/>
153 <when value="Yes"> 161 <when value="Yes">
154 <expand macro="feature_selection_all"> 162 <expand macro="feature_selection_pipeline"/>
155 <expand macro="fs_selectfrommodel_no_prefitted"/>
156 </expand>
157 </when> 163 </when>
158 </conditional> 164 </conditional>
159 <conditional name="model_validation_functions"> 165 <conditional name="model_validation_functions">
160 <param name="selected_function" type="select" label="Select a model validation function"> 166 <param name="selected_function" type="select" label="Select a model validation function">
161 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> 167 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option>