comparison pipeline.xml @ 10:775b004b7920 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:18:27 -0400
parents 913ee94945f3
children 3f3c6dc38f3e
comparison
equal deleted inserted replaced
9:019bd8289224 10:775b004b7920
15 <inputs name="inputs" /> 15 <inputs name="inputs" />
16 <configfile name="sklearn_pipeline_script"> 16 <configfile name="sklearn_pipeline_script">
17 <![CDATA[ 17 <![CDATA[
18 import imblearn 18 import imblearn
19 import json 19 import json
20 import pandas as pd
20 import pickle 21 import pickle
21 import pprint 22 import pprint
22 import skrebate 23 import skrebate
23 import sys 24 import sys
24 import warnings 25 import warnings
25 from mlxtend import classifier, regressor
26 from sklearn import ( 26 from sklearn import (
27 cluster, compose, decomposition, ensemble, feature_extraction, 27 cluster, compose, decomposition, ensemble, feature_extraction,
28 feature_selection, gaussian_process, kernel_approximation, metrics, 28 feature_selection, gaussian_process, kernel_approximation, metrics,
29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
30 svm, linear_model, tree, discriminant_analysis) 30 svm, linear_model, tree, discriminant_analysis)
31 from sklearn.pipeline import make_pipeline 31 from sklearn.pipeline import make_pipeline
32 from imblearn.pipeline import make_pipeline as imb_make_pipeline 32 from imblearn.pipeline import make_pipeline as imb_make_pipeline
33 33 from galaxy_ml.utils import (SafeEval, feature_selector, get_estimator,
34 sys.path.insert(0, '$__tool_directory__') 34 try_get_attr, get_search_params)
35 35
36 from utils import SafeEval, feature_selector, get_estimator, try_get_attr
37 from preprocessors import Z_RandomOverSampler
38 36
39 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) 37 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
40 38
41 warnings.filterwarnings('ignore') 39 warnings.filterwarnings('ignore')
42 40
67 sys.exit("The pre-processing component type can't be None " 65 sys.exit("The pre-processing component type can't be None "
68 "when the number of components is greater than 1.") 66 "when the number of components is greater than 1.")
69 if input_json['component_type'] == 'pre_processor': 67 if input_json['component_type'] == 'pre_processor':
70 preprocessor = input_json['pre_processors']['selected_pre_processor'] 68 preprocessor = input_json['pre_processors']['selected_pre_processor']
71 pre_processor_options = input_json['pre_processors']['options'] 69 pre_processor_options = input_json['pre_processors']['options']
70 if 'feature_range' in pre_processor_options:
71 feature_range = safe_eval(pre_processor_options['feature_range'].strip())
72 if not feature_range:
73 feature_range = (0, 1)
74 pre_processor_options['feature_range'] = feature_range
72 my_class = getattr(preprocessing, preprocessor) 75 my_class = getattr(preprocessing, preprocessor)
73 obj = my_class(**pre_processor_options) 76 obj = my_class(**pre_processor_options)
74 elif input_json['component_type'] == 'feature_selection': 77 elif input_json['component_type'] == 'feature_selection':
75 obj = feature_selector(input_json['fs_algorithm_selector']) 78 obj = feature_selector(input_json['fs_algorithm_selector'])
76 elif input_json['component_type'] == 'decomposition': 79 elif input_json['component_type'] == 'decomposition':
108 is_imblearn = True 111 is_imblearn = True
109 algorithm = input_json['imblearn_selector']['select_algorithm'] 112 algorithm = input_json['imblearn_selector']['select_algorithm']
110 if algorithm == 'over_sampling.SMOTENC': 113 if algorithm == 'over_sampling.SMOTENC':
111 obj = over_sampling.SMOTENC(categorical_features=[]) 114 obj = over_sampling.SMOTENC(categorical_features=[])
112 elif algorithm == 'Z_RandomOverSampler': 115 elif algorithm == 'Z_RandomOverSampler':
116 Z_RandomOverSampler = try_get_attr('galaxy_ml.preprocessors',
117 'Z_RandomOverSampler')
113 obj = Z_RandomOverSampler() 118 obj = Z_RandomOverSampler()
114 else: 119 else:
115 globals = algorithm.split('.') 120 globals = algorithm.split('.')
116 mod, klass = globals[0], globals[1] 121 mod, klass = globals[0], globals[1]
117 obj = getattr(getattr(imblearn, mod), klass)() 122 obj = getattr(getattr(imblearn, mod), klass)()
118 options = input_json['imblearn_selector']['text_params'].strip() 123 options = input_json['imblearn_selector']['text_params'].strip()
119 if options != '': 124 if options != '':
120 options = safe_eval( 'dict(' + options + ')' ) 125 options = safe_eval( 'dict(' + options + ')' )
121 obj.set_params(**options) 126 obj.set_params(**options)
122 elif input_json['component_type'] == 'IRAPS': 127 elif input_json['component_type'] == 'IRAPS':
123 iraps_core = try_get_attr('iraps_classifier','IRAPSCore')() 128 iraps_core = try_get_attr('galaxy_ml.iraps_classifier','IRAPSCore')()
124 core_params = input_json['text_params'].strip() 129 core_params = input_json['text_params'].strip()
125 if core_params != '': 130 if core_params != '':
126 try: 131 try:
127 params = safe_eval('dict(' + core_params + ')') 132 params = safe_eval('dict(' + core_params + ')')
128 except ValueError: 133 except ValueError:
135 options['fc_thres'] = input_json['fc_thres'] 140 options['fc_thres'] = input_json['fc_thres']
136 if input_json['occurrence'] is not None: 141 if input_json['occurrence'] is not None:
137 options['occurrence'] = input_json['occurrence'] 142 options['occurrence'] = input_json['occurrence']
138 if input_json['discretize'] is not None: 143 if input_json['discretize'] is not None:
139 options['discretize'] = input_json['discretize'] 144 options['discretize'] = input_json['discretize']
140 IRAPSClassifier = try_get_attr('iraps_classifier','IRAPSClassifier') 145 IRAPSClassifier = try_get_attr('galaxy_ml.iraps_classifier','IRAPSClassifier')
141 obj = IRAPSClassifier(iraps_core, **options) 146 obj = IRAPSClassifier(iraps_core, **options)
147 elif input_json['component_type'] == 'preprocessors':
148 encoder_selection = input_json['encoder_selection']
149 encoder_type = encoder_selection.pop('encoder_type')
150 klass = try_get_attr('galaxy_ml.preprocessors', encoder_type)
151 obj = klass(**encoder_selection)
152
142 if 'n_jobs' in obj.get_params(): 153 if 'n_jobs' in obj.get_params():
143 obj.set_params( n_jobs=N_JOBS ) 154 obj.set_params( n_jobs=N_JOBS )
144 return obj, is_imblearn 155 return obj, is_imblearn
145 156
146 has_imblearn = False 157 has_imblearn = False
170 #if $output_type == 'Final_Estimator_Builder': 181 #if $output_type == 'Final_Estimator_Builder':
171 with open('$outfile', 'wb') as out_handler: 182 with open('$outfile', 'wb') as out_handler:
172 final_est = pipeline_steps[-1] 183 final_est = pipeline_steps[-1]
173 print(final_est) 184 print(final_est)
174 pickle.dump(final_est, out_handler, pickle.HIGHEST_PROTOCOL) 185 pickle.dump(final_est, out_handler, pickle.HIGHEST_PROTOCOL)
186 out_obj = final_est
175 #else: 187 #else:
176 if has_imblearn: 188 if has_imblearn:
177 pipeline = imb_make_pipeline(*pipeline_steps) 189 pipeline = imb_make_pipeline(*pipeline_steps)
178 else: 190 else:
179 pipeline = make_pipeline(*pipeline_steps) 191 pipeline = make_pipeline(*pipeline_steps)
180 pprint.pprint(pipeline.named_steps) 192 pprint.pprint(pipeline.named_steps)
181 193
182 with open('$outfile', 'wb') as out_handler: 194 with open('$outfile', 'wb') as out_handler:
183 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL) 195 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL)
196 out_obj = pipeline
197 #end if
198
199 #if $get_params
200 results = get_search_params(out_obj)
201 df = pd.DataFrame(results, columns=['', 'Parameter', 'Value'])
202 df.to_csv('$outfile_params', sep='\t', index=False)
184 #end if 203 #end if
185 ]]> 204 ]]>
186 </configfile> 205 </configfile>
187 </configfiles> 206 </configfiles>
188 <inputs> 207 <inputs>
193 <option value="pre_processor">Sklearn Preprocessor</option> 212 <option value="pre_processor">Sklearn Preprocessor</option>
194 <option value="feature_selection">Feature Selection</option> 213 <option value="feature_selection">Feature Selection</option>
195 <option value="decomposition">Matrix Decomposition</option> 214 <option value="decomposition">Matrix Decomposition</option>
196 <option value="kernel_approximation">Kernel Approximation</option> 215 <option value="kernel_approximation">Kernel Approximation</option>
197 <option value="FeatureAgglomeration">Agglomerate Features</option> 216 <option value="FeatureAgglomeration">Agglomerate Features</option>
198 <option value="skrebate">SK-rebate feature selection</option> 217 <option value="skrebate">SK-rebate Feature Selection</option>
199 <option value="imblearn">imbalanced-learn sampling</option> 218 <option value="imblearn">Imbalanced-learn Sampling</option>
200 <option value="IRAPS">IRAPS -- feature selector and classifier</option> 219 <option value="IRAPS">IRAPS -- feature selector and classifier</option>
220 <option value="preprocessors">Bio-sequence Encoders</option>
201 </param> 221 </param>
202 <when value="None"/> 222 <when value="None"/>
203 <when value="pre_processor"> 223 <when value="pre_processor">
204 <conditional name="pre_processors"> 224 <conditional name="pre_processors">
205 <expand macro="sparse_preprocessors_ext" /> 225 <expand macro="sparse_preprocessors_ext" />
230 help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes"/> 250 help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes"/>
231 <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001"/> 251 <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001"/>
232 <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1"/> 252 <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1"/>
233 <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/> 253 <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/>
234 <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/> 254 <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/>
255 </when>
256 <when value="preprocessors">
257 <expand macro="preprocessors_sequence_encoders"/>
235 </when> 258 </when>
236 </conditional> 259 </conditional>
237 </repeat> 260 </repeat>
238 <section name="final_estimator" title="Final Estimator" expanded="true"> 261 <section name="final_estimator" title="Final Estimator" expanded="true">
239 <conditional name="estimator_selector"> 262 <conditional name="estimator_selector">
264 </section> 287 </section>
265 <param name="output_type" type="select" label="Output the final estimator instead?"> 288 <param name="output_type" type="select" label="Output the final estimator instead?">
266 <option value="Pipeline_Builder" selected="true">Pipeline</option> 289 <option value="Pipeline_Builder" selected="true">Pipeline</option>
267 <option value="Final_Estimator_Builder">Final Estimator</option> 290 <option value="Final_Estimator_Builder">Final Estimator</option>
268 </param> 291 </param>
292 <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?"
293 help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
269 </inputs> 294 </inputs>
270 <outputs> 295 <outputs>
271 <data format="zip" name="outfile" label="${output_type}"/> 296 <data format="zip" name="outfile" label="${output_type}"/>
297 <data format="tabular" name="outfile_params" label="get_params for ${output_type}">
298 <filter>get_params</filter>
299 </data>
272 </outputs> 300 </outputs>
273 <tests> 301 <tests>
274 <test> 302 <test>
275 <repeat name="pipeline_component"> 303 <repeat name="pipeline_component">
276 <conditional name="component_selector"> 304 <conditional name="component_selector">
470 </conditional> 498 </conditional>
471 </section> 499 </section>
472 <param name="output_type" value="Final_Estimator_Builder"/> 500 <param name="output_type" value="Final_Estimator_Builder"/>
473 <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/> 501 <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/>
474 </test> 502 </test>
503 <test>
504 <conditional name="component_selector">
505 <param name="component_type" value="preprocessors"/>
506 <conditional name="encoder_selection">
507 <param name="encoder_type" value="GenomeOneHotEncoder"/>
508 <param name="seq_length" value="1000"/>
509 <param name="padding" value="True"/>
510 </conditional>
511 </conditional>
512 <section name="final_estimator">
513 <conditional name="estimator_selector">
514 <param name="selected_module" value="custom_estimator"/>
515 <param name="c_estimator" value="keras_model02" ftype="zip"/>
516 </conditional>
517 </section>
518 <output name="outfile" file="pipeline16" compare="sim_size" delta="5"/>
519 </test>
475 </tests> 520 </tests>
476 <help> 521 <help>
477 <![CDATA[ 522 <![CDATA[
478 **What it does** 523 **What it does**
479 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps 524 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps