comparison qiime2/qiime_sample-classifier_regress-samples.xml @ 0:370e0b6e9826 draft

Uploaded
author florianbegusch
date Wed, 17 Jul 2019 03:05:17 -0400
parents
children a025a4a89e07
comparison
equal deleted inserted replaced
-1:000000000000 0:370e0b6e9826
1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_regress-samples" name="qiime sample-classifier regress-samples" version="2019.4">
3 <description> - Train and test a cross-validated supervised learning regressor.</description>
4 <requirements>
5 <requirement type="package" version="2019.4">qiime2</requirement>
6 </requirements>
7 <command><![CDATA[
8 qiime sample-classifier regress-samples
9
10 --i-table=$itable
11 --m-metadata-column="$mmetadatacolumn"
12
13 #if $ptestsize:
14 --p-test-size=$ptestsize
15 #end if
16
17 #if $pstep:
18 --p-step=$pstep
19 #end if
20
21 #if $pcv:
22 --p-cv=$pcv
23 #end if
24
25 #if str($prandomstate):
26 --p-random-state="$prandomstate"
27 #end if
28
29 #set $pnjobs = '${GALAXY_SLOTS:-4}'
30
31 #if str($pnjobs):
32 --p-n-jobs="$pnjobs"
33 #end if
34
35
36 #if $pnestimators:
37 --p-n-estimators=$pnestimators
38 #end if
39
40 #if str($pestimator) != 'None':
41 --p-estimator=$pestimator
42 #end if
43
44 #if $poptimizefeatureselection:
45 --p-optimize-feature-selection
46 #end if
47
48 #if $pstratify:
49 --p-stratify
50 #end if
51
52 #if $pparametertuning:
53 --p-parameter-tuning
54 #end if
55
56 #if str($pmissingsamples) != 'None':
57 --p-missing-samples=$pmissingsamples
58 #end if
59
60
61 #if $input_files_mmetadatafile:
62 #def list_dict_to_string(list_dict):
63 #set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
64 #for d in list_dict[1:]:
65 #set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
66 #end for
67 #return $file_list
68 #end def
69 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
70 #end if
71
72
73 --o-sample-estimator=osampleestimator
74 --o-feature-importance=ofeatureimportance
75 --o-predictions=opredictions
76 --o-model-summary=omodelsummary
77 --o-accuracy-results=oaccuracyresults
78 ;
79 cp osampleestimator.qza $osampleestimator;
80 cp ofeatureimportance.qza $ofeatureimportance;
81 cp opredictions.qza $opredictions;
82 qiime tools export --input-path omodelsummary.qzv --output-path out && mkdir -p '$omodelsummary.files_path'
83 && cp -r out/* '$omodelsummary.files_path'
84 && mv '$omodelsummary.files_path/index.html' '$omodelsummary';
85 qiime tools export --input-path oaccuracyresults.qzv --output-path out && mkdir -p '$oaccuracyresults.files_path'
86 && cp -r out/* '$oaccuracyresults.files_path'
87 && mv '$oaccuracyresults.files_path/index.html' '$oaccuracyresults'
88 ]]></command>
89 <inputs>
90 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data"/>
91 <param label="--m-metadata-column: COLUMN MetadataColumn[Numeric] Numeric metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text"/>
92 <param label="--p-test-size: PROPORTION Range(0.0, 1.0, inclusive_start=False) Fraction of input samples to exclude from training set and use for classifier testing. [default: 0.2]" name="ptestsize" optional="True" type="float" value="0.2"/>
93 <param label="--p-step: PROPORTION Range(0.0, 1.0, inclusive_start=False) If optimize-feature-selection is True, step is the percentage of features to remove at each iteration. [default: 0.05]" name="pstep" optional="True" type="float" value="0.05"/>
94 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" name="pcv" optional="True" type="integer" value="5"/>
95 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="True" type="integer"/>
96 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" name="pnestimators" optional="True" type="integer" value="100" min="1"/>
97 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
98 <option selected="True" value="None">Selection is Optional</option>
99 <option value="RandomForestRegressor">RandomForestRegressor</option>
100 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
101 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
102 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
103 <option value="ElasticNet">ElasticNet</option>
104 <option value="Ridge">Ridge</option>
105 <option value="Lasso">Lasso</option>
106 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
107 <option value="LinearSVR">LinearSVR</option>
108 <option value="SVR">SVR</option>
109 </param>
110 <param label="--p-optimize-feature-selection: --p-no-optimize-feature-selection Automatically optimize input feature selection using recursive feature elimination. [default: False]" name="poptimizefeatureselection" selected="False" type="boolean"/>
111 <param label="--p-stratify: --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean"/>
112 <param label="--p-parameter-tuning: --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean"/>
113 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
114 <option selected="True" value="None">Selection is Optional</option>
115 <option value="error">error</option>
116 <option value="ignore">ignore</option>
117 </param>
118
119 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
120 <param label="--m-metadata-file: Metadata file or artifact viewable as metadata. This option may be supplied multiple times to merge metadata. [optional]" name="additional_input" type="data" format="tabular,qza,no_unzip.zip" />
121 </repeat>
122
123 </inputs>
124 <outputs>
125 <data format="qza" label="${tool.name} on ${on_string}: sampleestimator.qza" name="osampleestimator"/>
126 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance"/>
127 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions"/>
128 <data format="html" label="${tool.name} on ${on_string}: modelsummary.qzv" name="omodelsummary"/>
129 <data format="html" label="${tool.name} on ${on_string}: accuracyresults.qzv" name="oaccuracyresults"/>
130 </outputs>
131 <help><![CDATA[
132 Train and test a cross-validated supervised learning regressor.
133 ###############################################################
134
135 Predicts a continuous sample metadata column using a supervised learning
136 regressor. Splits input data into training and test sets. The training set
137 is used to train and test the estimator using a stratified k-fold cross-
138 validation scheme. This includes optional steps for automated feature
139 extraction and hyperparameter optimization. The test set validates
140 classification accuracy of the optimized estimator. Outputs classification
141 results for test set. For more details on the learning algorithm, see
142 http://scikit-learn.org/stable/supervised_learning.html
143
144 Parameters
145 ----------
146 table : FeatureTable[Frequency]
147 Feature table containing all features that should be used for target
148 prediction.
149 metadata : MetadataColumn[Numeric]
150 Numeric metadata column to use as prediction target.
151 test_size : Float % Range(0.0, 1.0, inclusive_start=False), optional
152 Fraction of input samples to exclude from training set and use for
153 classifier testing.
154 step : Float % Range(0.0, 1.0, inclusive_start=False), optional
155 If optimize_feature_selection is True, step is the percentage of
156 features to remove at each iteration.
157 cv : Int % Range(1, None), optional
158 Number of k-fold cross-validations to perform.
159 random_state : Int, optional
160 Seed used by random number generator.
161 n_estimators : Int % Range(1, None), optional
162 Number of trees to grow for estimation. More trees will improve
163 predictive accuracy up to a threshold level, but will also increase
164 time and memory requirements. This parameter only affects ensemble
165 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
166 GradientBoosting.
167 estimator : Str % Choices('RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet', 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'), optional
168 Estimator method to use for sample prediction.
169 optimize_feature_selection : Bool, optional
170 Automatically optimize input feature selection using recursive feature
171 elimination.
172 stratify : Bool, optional
173 Evenly stratify training and test data among metadata categories. If
174 True, all values in column must match at least two samples.
175 parameter_tuning : Bool, optional
176 Automatically tune hyperparameters using random grid search.
177 missing_samples : Str % Choices('error', 'ignore'), optional
178 How to handle missing samples in metadata. "error" will fail if missing
179 samples are detected. "ignore" will cause the feature table and
180 metadata to be filtered, so that only samples found in both files are
181 retained.
182
183 Returns
184 -------
185 sample_estimator : SampleEstimator[Regressor]
186 Trained sample estimator.
187 feature_importance : FeatureData[Importance]
188 Importance of each input feature to model accuracy.
189 predictions : SampleData[RegressorPredictions]
190 Predicted target values for each input sample.
191 model_summary : Visualization
192 Summarized parameter and (if enabled) feature selection information for
193 the trained estimator.
194 accuracy_results : Visualization
195 Accuracy results visualization.
196 ]]></help>
197 <macros>
198 <import>qiime_citation.xml</import>
199 </macros>
200 <expand macro="qiime_citation"/>
201 </tool>