comparison qiime2/qiime_sample-classifier_classify-samples.xml @ 29:3ba9833030c1 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 13:12:49 +0000
parents
children
comparison
equal deleted inserted replaced
28:c28331a63dfd 29:3ba9833030c1
1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_classify-samples" name="qiime sample-classifier classify-samples"
3 version="2020.8">
4 <description>Train and test a cross-validated supervised learning classifier.</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime sample-classifier classify-samples
10
11 --i-table=$itable
12 # if $input_files_mmetadatafile:
13 # def list_dict_to_string(list_dict):
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
15 # for d in list_dict[1:]:
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
17 # end for
18 # return $file_list
19 # end def
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
21 # end if
22
23 #if '__ob__' in str($mmetadatacolumn):
24 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__ob__', '[')
25 #set $mmetadatacolumn = $mmetadatacolumn_temp
26 #end if
27 #if '__cb__' in str($mmetadatacolumn):
28 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__cb__', ']')
29 #set $mmetadatacolumn = $mmetadatacolumn_temp
30 #end if
31 #if 'X' in str($mmetadatacolumn):
32 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('X', '\\')
33 #set $mmetadatacolumn = $mmetadatacolumn_temp
34 #end if
35 #if '__sq__' in str($mmetadatacolumn):
36 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__sq__', "'")
37 #set $mmetadatacolumn = $mmetadatacolumn_temp
38 #end if
39 #if '__db__' in str($mmetadatacolumn):
40 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__db__', '"')
41 #set $mmetadatacolumn = $mmetadatacolumn_temp
42 #end if
43
44 --m-metadata-column=$mmetadatacolumn
45
46
47 --p-test-size=$ptestsize
48
49 --p-step=$pstep
50
51 --p-cv=$pcv
52
53 #if str($prandomstate):
54 --p-random-state=$prandomstate
55 #end if
56 --p-n-jobs=$pnjobs
57
58 --p-n-estimators=$pnestimators
59
60 #if str($pestimator) != 'None':
61 --p-estimator=$pestimator
62 #end if
63
64 #if $poptimizefeatureselection:
65 --p-optimize-feature-selection
66 #end if
67
68 #if $pparametertuning:
69 --p-parameter-tuning
70 #end if
71
72 #if str($ppalette) != 'None':
73 --p-palette=$ppalette
74 #end if
75
76 #if str($pmissingsamples) != 'None':
77 --p-missing-samples=$pmissingsamples
78 #end if
79
80 --o-sample-estimator=osampleestimator
81
82 --o-feature-importance=ofeatureimportance
83
84 --o-predictions=opredictions
85
86 --o-model-summary=omodelsummary
87
88 --o-accuracy-results=oaccuracyresults
89
90 --o-probabilities=oprobabilities
91
92 --o-heatmap=oheatmap
93
94 #if str($examples) != 'None':
95 --examples=$examples
96 #end if
97
98 ;
99 cp oprobabilities.qza $oprobabilities
100
101 ;
102 qiime tools export oheatmap.qzv --output-path out
103 && mkdir -p '$oheatmap.files_path'
104 && cp -r out/* '$oheatmap.files_path'
105 && mv '$oheatmap.files_path/index.html' '$oheatmap'
106
107 ]]></command>
108 <inputs>
109 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" />
110 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
111 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA" name="additional_input" optional="True" type="data" />
112 </repeat>
113 <param label="--m-metadata-column: COLUMN MetadataColumn[Categorical] Categorical metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text" />
114 <param exclude_min="True" label="--p-test-size: PROPORTION Range(0.0, 1.0, inclusive_start=False) Fraction of input samples to exclude from training set and use for classifier testing. [default: 0.2]" max="1.0" min="0.0" name="ptestsize" optional="True" type="float" value="0.2" />
115 <param exclude_min="True" label="--p-step: PROPORTION Range(0.0, 1.0, inclusive_start=False) If optimize-feature-selection is True, step is the percentage of features to remove at each iteration. [default: 0.05]" max="1.0" min="0.0" name="pstep" optional="True" type="float" value="0.05" />
116 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" />
117 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" />
118 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" />
119 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
120 <option selected="True" value="None">Selection is Optional</option>
121 <option value="RandomForestClassifier">RandomForestClassifier</option>
122 <option value="ExtraTreesClassifier">ExtraTreesClassifier</option>
123 <option value="GradientBoostingClassifier">GradientBoostingClassifier</option>
124 <option value="AdaBoostClassifier">AdaBoostClassifier</option>
125 <option value="KNeighborsClassifier">KNeighborsClassifier</option>
126 <option value="LinearSVC">LinearSVC</option>
127 <option value="SVC">SVC</option>
128 </param>
129 <param label="--p-optimize-feature-selection: --p-optimize-feature-selection: / --p-no-optimize-feature-selection Automatically optimize input feature selection using recursive feature elimination. [default: False]" name="poptimizefeatureselection" selected="False" type="boolean" />
130 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" />
131 <param label="--p-palette: " name="ppalette" optional="True" type="select">
132 <option selected="True" value="None">Selection is Optional</option>
133 <option value="YellowOrangeBrown">YellowOrangeBrown</option>
134 <option value="YellowOrangeRed">YellowOrangeRed</option>
135 <option value="OrangeRed">OrangeRed</option>
136 <option value="PurpleRed">PurpleRed</option>
137 <option value="RedPurple">RedPurple</option>
138 <option value="BluePurple">BluePurple</option>
139 <option value="GreenBlue">GreenBlue</option>
140 <option value="PurpleBlue">PurpleBlue</option>
141 <option value="YellowGreen">YellowGreen</option>
142 <option value="summer">summer</option>
143 <option value="copper">copper</option>
144 <option value="viridis">viridis</option>
145 <option value="cividis">cividis</option>
146 <option value="plasma">plasma</option>
147 <option value="inferno">inferno</option>
148 <option value="magma">magma</option>
149 <option value="sirocco">sirocco</option>
150 <option value="drifting">drifting</option>
151 <option value="melancholy">melancholy</option>
152 <option value="enigma">enigma</option>
153 <option value="eros">eros</option>
154 <option value="spectre">spectre</option>
155 <option value="ambition">ambition</option>
156 <option value="mysteriousstains">mysteriousstains</option>
157 <option value="daydream">daydream</option>
158 <option value="solano">solano</option>
159 <option value="navarro">navarro</option>
160 <option value="dandelions">dandelions</option>
161 <option value="deepblue">deepblue</option>
162 <option value="verve">verve</option>
163 <option value="greyscale">greyscale</option>
164 </param>
165 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
166 <option selected="True" value="None">Selection is Optional</option>
167 <option value="error">error</option>
168 <option value="ignore">ignore</option>
169 </param>
170 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
171
172 </inputs>
173
174 <outputs>
175 <data format="qza" label="${tool.name} on ${on_string}: sampleestimator.qza" name="osampleestimator" />
176 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" />
177 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions" />
178 <data format="html" label="${tool.name} on ${on_string}: modelsummary.html" name="omodelsummary" />
179 <data format="html" label="${tool.name} on ${on_string}: accuracyresults.html" name="oaccuracyresults" />
180 <data format="qza" label="${tool.name} on ${on_string}: probabilities.qza" name="oprobabilities" />
181 <data format="html" label="${tool.name} on ${on_string}: heatmap.html" name="oheatmap" />
182
183 </outputs>
184
185 <help><![CDATA[
186 Train and test a cross-validated supervised learning classifier.
187 ###############################################################
188
189 Predicts a categorical sample metadata column using a supervised learning
190 classifier. Splits input data into training and test sets. The training set
191 is used to train and test the estimator using a stratified k-fold cross-
192 validation scheme. This includes optional steps for automated feature
193 extraction and hyperparameter optimization. The test set validates
194 classification accuracy of the optimized estimator. Outputs classification
195 results for test set. For more details on the learning algorithm, see
196 http://scikit-learn.org/stable/supervised_learning.html
197
198 Parameters
199 ----------
200 table : FeatureTable[Frequency]
201 Feature table containing all features that should be used for target
202 prediction.
203 metadata : MetadataColumn[Categorical]
204 Categorical metadata column to use as prediction target.
205 test_size : Float % Range(0.0, 1.0, inclusive_start=False), optional
206 Fraction of input samples to exclude from training set and use for
207 classifier testing.
208 step : Float % Range(0.0, 1.0, inclusive_start=False), optional
209 If optimize_feature_selection is True, step is the percentage of
210 features to remove at each iteration.
211 cv : Int % Range(1, None), optional
212 Number of k-fold cross-validations to perform.
213 random_state : Int, optional
214 Seed used by random number generator.
215 n_jobs : Int, optional
216 Number of jobs to run in parallel.
217 n_estimators : Int % Range(1, None), optional
218 Number of trees to grow for estimation. More trees will improve
219 predictive accuracy up to a threshold level, but will also increase
220 time and memory requirements. This parameter only affects ensemble
221 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
222 GradientBoosting.
223 estimator : Str % Choices('RandomForestClassifier', 'ExtraTreesClassifier', 'GradientBoostingClassifier', 'AdaBoostClassifier', 'KNeighborsClassifier', 'LinearSVC', 'SVC'), optional
224 Estimator method to use for sample prediction.
225 optimize_feature_selection : Bool, optional
226 Automatically optimize input feature selection using recursive feature
227 elimination.
228 parameter_tuning : Bool, optional
229 Automatically tune hyperparameters using random grid search.
230 palette : Str % Choices('YellowOrangeBrown', 'YellowOrangeRed', 'OrangeRed', 'PurpleRed', 'RedPurple', 'BluePurple', 'GreenBlue', 'PurpleBlue', 'YellowGreen', 'summer', 'copper', 'viridis', 'cividis', 'plasma', 'inferno', 'magma', 'sirocco', 'drifting', 'melancholy', 'enigma', 'eros', 'spectre', 'ambition', 'mysteriousstains', 'daydream', 'solano', 'navarro', 'dandelions', 'deepblue', 'verve', 'greyscale'), optional
231 The color palette to use for plotting.
232 missing_samples : Str % Choices('error', 'ignore'), optional
233 How to handle missing samples in metadata. "error" will fail if missing
234 samples are detected. "ignore" will cause the feature table and
235 metadata to be filtered, so that only samples found in both files are
236 retained.
237
238 Returns
239 -------
240 sample_estimator : SampleEstimator[Classifier]
241 Trained sample estimator.
242 feature_importance : FeatureData[Importance]
243 Importance of each input feature to model accuracy.
244 predictions : SampleData[ClassifierPredictions]
245 Predicted target values for each input sample.
246 model_summary : Visualization
247 Summarized parameter and (if enabled) feature selection information for
248 the trained estimator.
249 accuracy_results : Visualization
250 Accuracy results visualization.
251 probabilities : SampleData[Probabilities]
252 Predicted class probabilities for each input sample.
253 heatmap : Visualization
254 A heatmap of the top 50 most important features from the table.
255 ]]></help>
256 <macros>
257 <import>qiime_citation.xml</import>
258 </macros>
259 <expand macro="qiime_citation"/>
260 </tool>