comparison qiime2/qiime_sample-classifier_regress-samples.xml @ 29:3ba9833030c1 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 13:12:49 +0000
parents
children
comparison
equal deleted inserted replaced
28:c28331a63dfd 29:3ba9833030c1
1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_regress-samples" name="qiime sample-classifier regress-samples"
3 version="2020.8">
4 <description>Train and test a cross-validated supervised learning regressor.</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime sample-classifier regress-samples
10
11 --i-table=$itable
12 # if $input_files_mmetadatafile:
13 # def list_dict_to_string(list_dict):
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
15 # for d in list_dict[1:]:
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
17 # end for
18 # return $file_list
19 # end def
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
21 # end if
22
23 #if '__ob__' in str($mmetadatacolumn):
24 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__ob__', '[')
25 #set $mmetadatacolumn = $mmetadatacolumn_temp
26 #end if
27 #if '__cb__' in str($mmetadatacolumn):
28 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__cb__', ']')
29 #set $mmetadatacolumn = $mmetadatacolumn_temp
30 #end if
31 #if 'X' in str($mmetadatacolumn):
32 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('X', '\\')
33 #set $mmetadatacolumn = $mmetadatacolumn_temp
34 #end if
35 #if '__sq__' in str($mmetadatacolumn):
36 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__sq__', "'")
37 #set $mmetadatacolumn = $mmetadatacolumn_temp
38 #end if
39 #if '__db__' in str($mmetadatacolumn):
40 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__db__', '"')
41 #set $mmetadatacolumn = $mmetadatacolumn_temp
42 #end if
43
44 --m-metadata-column=$mmetadatacolumn
45
46
47 --p-test-size=$ptestsize
48
49 --p-step=$pstep
50
51 --p-cv=$pcv
52
53 #if str($prandomstate):
54 --p-random-state=$prandomstate
55 #end if
56 --p-n-jobs=$pnjobs
57
58 --p-n-estimators=$pnestimators
59
60 #if str($pestimator) != 'None':
61 --p-estimator=$pestimator
62 #end if
63
64 #if $poptimizefeatureselection:
65 --p-optimize-feature-selection
66 #end if
67
68 #if $pstratify:
69 --p-stratify
70 #end if
71
72 #if $pparametertuning:
73 --p-parameter-tuning
74 #end if
75
76 #if str($pmissingsamples) != 'None':
77 --p-missing-samples=$pmissingsamples
78 #end if
79
80 --o-sample-estimator=osampleestimator
81
82 --o-feature-importance=ofeatureimportance
83
84 --o-predictions=opredictions
85
86 --o-model-summary=omodelsummary
87
88 --o-accuracy-results=oaccuracyresults
89
90 #if str($examples) != 'None':
91 --examples=$examples
92 #end if
93
94 ;
95 cp opredictions.qza $opredictions
96
97 ;
98 qiime tools export omodelsummary.qzv --output-path out
99 && mkdir -p '$omodelsummary.files_path'
100 && cp -r out/* '$omodelsummary.files_path'
101 && mv '$omodelsummary.files_path/index.html' '$omodelsummary'
102
103 ;
104 qiime tools export oaccuracyresults.qzv --output-path out
105 && mkdir -p '$oaccuracyresults.files_path'
106 && cp -r out/* '$oaccuracyresults.files_path'
107 && mv '$oaccuracyresults.files_path/index.html' '$oaccuracyresults'
108
109 ]]></command>
110 <inputs>
111 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" />
112 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
113 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA" name="additional_input" optional="True" type="data" />
114 </repeat>
115 <param label="--m-metadata-column: COLUMN MetadataColumn[Numeric] Numeric metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text" />
116 <param exclude_min="True" label="--p-test-size: PROPORTION Range(0.0, 1.0, inclusive_start=False) Fraction of input samples to exclude from training set and use for classifier testing. [default: 0.2]" max="1.0" min="0.0" name="ptestsize" optional="True" type="float" value="0.2" />
117 <param exclude_min="True" label="--p-step: PROPORTION Range(0.0, 1.0, inclusive_start=False) If optimize-feature-selection is True, step is the percentage of features to remove at each iteration. [default: 0.05]" max="1.0" min="0.0" name="pstep" optional="True" type="float" value="0.05" />
118 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" />
119 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" />
120 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" />
121 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
122 <option selected="True" value="None">Selection is Optional</option>
123 <option value="RandomForestRegressor">RandomForestRegressor</option>
124 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
125 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
126 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
127 <option value="ElasticNet">ElasticNet</option>
128 <option value="Ridge">Ridge</option>
129 <option value="Lasso">Lasso</option>
130 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
131 <option value="LinearSVR">LinearSVR</option>
132 <option value="SVR">SVR</option>
133 </param>
134 <param label="--p-optimize-feature-selection: --p-optimize-feature-selection: / --p-no-optimize-feature-selection Automatically optimize input feature selection using recursive feature elimination. [default: False]" name="poptimizefeatureselection" selected="False" type="boolean" />
135 <param label="--p-stratify: --p-stratify: / --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean" />
136 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" />
137 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
138 <option selected="True" value="None">Selection is Optional</option>
139 <option value="error">error</option>
140 <option value="ignore">ignore</option>
141 </param>
142 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
143
144 </inputs>
145
146 <outputs>
147 <data format="qza" label="${tool.name} on ${on_string}: sampleestimator.qza" name="osampleestimator" />
148 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" />
149 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions" />
150 <data format="html" label="${tool.name} on ${on_string}: modelsummary.html" name="omodelsummary" />
151 <data format="html" label="${tool.name} on ${on_string}: accuracyresults.html" name="oaccuracyresults" />
152
153 </outputs>
154
155 <help><![CDATA[
156 Train and test a cross-validated supervised learning regressor.
157 ###############################################################
158
159 Predicts a continuous sample metadata column using a supervised learning
160 regressor. Splits input data into training and test sets. The training set
161 is used to train and test the estimator using a stratified k-fold cross-
162 validation scheme. This includes optional steps for automated feature
163 extraction and hyperparameter optimization. The test set validates
164 classification accuracy of the optimized estimator. Outputs classification
165 results for test set. For more details on the learning algorithm, see
166 http://scikit-learn.org/stable/supervised_learning.html
167
168 Parameters
169 ----------
170 table : FeatureTable[Frequency]
171 Feature table containing all features that should be used for target
172 prediction.
173 metadata : MetadataColumn[Numeric]
174 Numeric metadata column to use as prediction target.
175 test_size : Float % Range(0.0, 1.0, inclusive_start=False), optional
176 Fraction of input samples to exclude from training set and use for
177 classifier testing.
178 step : Float % Range(0.0, 1.0, inclusive_start=False), optional
179 If optimize_feature_selection is True, step is the percentage of
180 features to remove at each iteration.
181 cv : Int % Range(1, None), optional
182 Number of k-fold cross-validations to perform.
183 random_state : Int, optional
184 Seed used by random number generator.
185 n_jobs : Int, optional
186 Number of jobs to run in parallel.
187 n_estimators : Int % Range(1, None), optional
188 Number of trees to grow for estimation. More trees will improve
189 predictive accuracy up to a threshold level, but will also increase
190 time and memory requirements. This parameter only affects ensemble
191 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
192 GradientBoosting.
193 estimator : Str % Choices('RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet', 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'), optional
194 Estimator method to use for sample prediction.
195 optimize_feature_selection : Bool, optional
196 Automatically optimize input feature selection using recursive feature
197 elimination.
198 stratify : Bool, optional
199 Evenly stratify training and test data among metadata categories. If
200 True, all values in column must match at least two samples.
201 parameter_tuning : Bool, optional
202 Automatically tune hyperparameters using random grid search.
203 missing_samples : Str % Choices('error', 'ignore'), optional
204 How to handle missing samples in metadata. "error" will fail if missing
205 samples are detected. "ignore" will cause the feature table and
206 metadata to be filtered, so that only samples found in both files are
207 retained.
208
209 Returns
210 -------
211 sample_estimator : SampleEstimator[Regressor]
212 Trained sample estimator.
213 feature_importance : FeatureData[Importance]
214 Importance of each input feature to model accuracy.
215 predictions : SampleData[RegressorPredictions]
216 Predicted target values for each input sample.
217 model_summary : Visualization
218 Summarized parameter and (if enabled) feature selection information for
219 the trained estimator.
220 accuracy_results : Visualization
221 Accuracy results visualization.
222 ]]></help>
223 <macros>
224 <import>qiime_citation.xml</import>
225 </macros>
226 <expand macro="qiime_citation"/>
227 </tool>