comparison qiime2-2020.8/qiime_longitudinal_maturity-index.xml @ 0:5c352d975ef7 draft

Uploaded
author florianbegusch
date Thu, 03 Sep 2020 09:33:04 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5c352d975ef7
1 <?xml version="1.0" ?>
2 <tool id="qiime_longitudinal_maturity-index" name="qiime longitudinal maturity-index"
3 version="2020.8">
4 <description>Microbial maturity index prediction.</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime longitudinal maturity-index
10
11 --i-table=$itable
12 # if $input_files_mmetadatafile:
13 # def list_dict_to_string(list_dict):
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
15 # for d in list_dict[1:]:
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
17 # end for
18 # return $file_list
19 # end def
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
21 # end if
22
23 #if '__ob__' in str($pstatecolumn):
24 #set $pstatecolumn_temp = $pstatecolumn.replace('__ob__', '[')
25 #set $pstatecolumn = $pstatecolumn_temp
26 #end if
27 #if '__cb__' in str($pstatecolumn):
28 #set $pstatecolumn_temp = $pstatecolumn.replace('__cb__', ']')
29 #set $pstatecolumn = $pstatecolumn_temp
30 #end if
31 #if 'X' in str($pstatecolumn):
32 #set $pstatecolumn_temp = $pstatecolumn.replace('X', '\\')
33 #set $pstatecolumn = $pstatecolumn_temp
34 #end if
35 #if '__sq__' in str($pstatecolumn):
36 #set $pstatecolumn_temp = $pstatecolumn.replace('__sq__', "'")
37 #set $pstatecolumn = $pstatecolumn_temp
38 #end if
39 #if '__db__' in str($pstatecolumn):
40 #set $pstatecolumn_temp = $pstatecolumn.replace('__db__', '"')
41 #set $pstatecolumn = $pstatecolumn_temp
42 #end if
43
44 --p-state-column=$pstatecolumn
45
46
47 --p-group-by=$pgroupby
48
49 --p-control=$pcontrol
50
51 #if '__ob__' in str($pindividualidcolumn):
52 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('__ob__', '[')
53 #set $pindividualidcolumn = $pindividualidcolumn_temp
54 #end if
55 #if '__cb__' in str($pindividualidcolumn):
56 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('__cb__', ']')
57 #set $pindividualidcolumn = $pindividualidcolumn_temp
58 #end if
59 #if 'X' in str($pindividualidcolumn):
60 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('X', '\\')
61 #set $pindividualidcolumn = $pindividualidcolumn_temp
62 #end if
63 #if '__sq__' in str($pindividualidcolumn):
64 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('__sq__', "'")
65 #set $pindividualidcolumn = $pindividualidcolumn_temp
66 #end if
67 #if '__db__' in str($pindividualidcolumn):
68 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('__db__', '"')
69 #set $pindividualidcolumn = $pindividualidcolumn_temp
70 #end if
71
72 #if str($pindividualidcolumn):
73 --p-individual-id-column=$pindividualidcolumn
74 #end if
75
76 #if str($pestimator) != 'None':
77 --p-estimator=$pestimator
78 #end if
79
80 --p-n-estimators=$pnestimators
81
82 --p-test-size=$ptestsize
83
84 --p-step=$pstep
85
86 --p-cv=$pcv
87
88 #if str($prandomstate):
89 --p-random-state=$prandomstate
90 #end if
91 --p-n-jobs=$pnjobs
92
93 #if $pparametertuning:
94 --p-parameter-tuning
95 #end if
96
97 #if $poptimizefeatureselection:
98 --p-optimize-feature-selection
99 #end if
100
101 #if $pstratify:
102 --p-stratify
103 #end if
104
105 #if str($pmissingsamples) != 'None':
106 --p-missing-samples=$pmissingsamples
107 #end if
108
109 --p-feature-count=$pfeaturecount
110
111 --o-sample-estimator=osampleestimator
112
113 --o-feature-importance=ofeatureimportance
114
115 --o-predictions=opredictions
116
117 --o-model-summary=omodelsummary
118
119 --o-accuracy-results=oaccuracyresults
120
121 --o-maz-scores=omazscores
122
123 --o-clustermap=oclustermap
124
125 --o-volatility-plots=ovolatilityplots
126
127 #if str($examples) != 'None':
128 --examples=$examples
129 #end if
130
131 ;
132 cp omazscores.qza $omazscores
133
134 ;
135 qiime tools export oclustermap.qzv --output-path out
136 && mkdir -p '$oclustermap.files_path'
137 && cp -r out/* '$oclustermap.files_path'
138 && mv '$oclustermap.files_path/index.html' '$oclustermap'
139
140 ;
141 qiime tools export ovolatilityplots.qzv --output-path out
142 && mkdir -p '$ovolatilityplots.files_path'
143 && cp -r out/* '$ovolatilityplots.files_path'
144 && mv '$ovolatilityplots.files_path/index.html' '$ovolatilityplots'
145
146 ]]></command>
147 <inputs>
148 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" />
149 <repeat name="input_files_mmetadatafile" optional="False" title="--m-metadata-file">
150 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA... (multiple arguments will be merged) [required]" name="additional_input" optional="False" type="data" />
151 </repeat>
152 <param label="--p-state-column: TEXT Numeric metadata column containing sampling time (state) data to use as prediction target. [required]" name="pstatecolumn" optional="False" type="text" />
153 <param label="--p-group-by: TEXT Categorical metadata column to use for plotting and significance testing between main treatment groups. [required]" name="pgroupby" optional="False" type="text" />
154 <param label="--p-control: TEXT Value of group-by to use as control group. The regression model will be trained using only control group data, and the maturity scores of other groups consequently will be assessed relative to this group. [required]" name="pcontrol" optional="False" type="text" />
155 <param label="--p-individual-id-column: TEXT Optional metadata column containing IDs for individual subjects. Adds individual subject (spaghetti) vectors to volatility charts if a column name is provided. [optional]" name="pindividualidcolumn" optional="False" type="text" />
156 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
157 <option selected="True" value="None">Selection is Optional</option>
158 <option value="RandomForestRegressor">RandomForestRegressor</option>
159 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
160 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
161 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
162 <option value="ElasticNet">ElasticNet</option>
163 <option value="Ridge">Ridge</option>
164 <option value="Lasso">Lasso</option>
165 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
166 <option value="LinearSVR">LinearSVR</option>
167 <option value="SVR">SVR</option>
168 </param>
169 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" />
170 <param exclude_min="True" label="--p-test-size: PROPORTION Range(0.0, 1.0, inclusive_start=False) Fraction of input samples to exclude from training set and use for classifier testing. [default: 0.5]" max="1.0" min="0.0" name="ptestsize" optional="True" type="float" value="0.5" />
171 <param exclude_min="True" label="--p-step: PROPORTION Range(0.0, 1.0, inclusive_start=False) If optimize-feature-selection is True, step is the percentage of features to remove at each iteration. [default: 0.05]" max="1.0" min="0.0" name="pstep" optional="True" type="float" value="0.05" />
172 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" />
173 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" />
174 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" />
175 <param label="--p-optimize-feature-selection: --p-optimize-feature-selection: / --p-no-optimize-feature-selection Automatically optimize input feature selection using recursive feature elimination. [default: False]" name="poptimizefeatureselection" selected="False" type="boolean" />
176 <param label="--p-stratify: --p-stratify: / --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean" />
177 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
178 <option selected="True" value="None">Selection is Optional</option>
179 <option value="error">error</option>
180 <option value="ignore">ignore</option>
181 </param>
182 <param label="--p-feature-count: INTEGER Range(0, None) Filter feature table to include top N most important features. Set to zero to include all features. [default: 50]" min="0" name="pfeaturecount" optional="True" type="integer" value="50" />
183 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
184
185 </inputs>
186
187 <outputs>
188 <data format="qza" label="${tool.name} on ${on_string}: sampleestimator.qza" name="osampleestimator" />
189 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" />
190 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions" />
191 <data format="html" label="${tool.name} on ${on_string}: modelsummary.html" name="omodelsummary" />
192 <data format="html" label="${tool.name} on ${on_string}: accuracyresults.html" name="oaccuracyresults" />
193 <data format="qza" label="${tool.name} on ${on_string}: mazscores.qza" name="omazscores" />
194 <data format="html" label="${tool.name} on ${on_string}: clustermap.html" name="oclustermap" />
195 <data format="html" label="${tool.name} on ${on_string}: volatilityplots.html" name="ovolatilityplots" />
196
197 </outputs>
198
199 <help><![CDATA[
200 Microbial maturity index prediction.
201 ###############################################################
202
203 Calculates a "microbial maturity" index from a regression model trained on
204 feature data to predict a given continuous metadata column, e.g., to
205 predict age as a function of microbiota composition. The model is trained
206 on a subset of control group samples, then predicts the column value for
207 all samples. This visualization computes maturity index z-scores to compare
208 relative "maturity" between each group, as described in
209 doi:10.1038/nature13421. This method can be used to predict between-group
210 differences in relative trajectory across any type of continuous metadata
211 gradient, e.g., intestinal microbiome development by age, microbial
212 succession during wine fermentation, or microbial community differences
213 along environmental gradients, as a function of two or more different
214 "treatment" groups.
215
216 Parameters
217 ----------
218 table : FeatureTable[Frequency]
219 Feature table containing all features that should be used for target
220 prediction.
221 metadata : Metadata
222 state_column : Str
223 Numeric metadata column containing sampling time (state) data to use as
224 prediction target.
225 group_by : Str
226 Categorical metadata column to use for plotting and significance
227 testing between main treatment groups.
228 control : Str
229 Value of group_by to use as control group. The regression model will be
230 trained using only control group data, and the maturity scores of other
231 groups consequently will be assessed relative to this group.
232 individual_id_column : Str, optional
233 Optional metadata column containing IDs for individual subjects. Adds
234 individual subject (spaghetti) vectors to volatility charts if a column
235 name is provided.
236 estimator : Str % Choices('RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet', 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'), optional
237 Regression model to use for prediction.
238 n_estimators : Int % Range(1, None), optional
239 Number of trees to grow for estimation. More trees will improve
240 predictive accuracy up to a threshold level, but will also increase
241 time and memory requirements. This parameter only affects ensemble
242 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
243 GradientBoosting.
244 test_size : Float % Range(0.0, 1.0, inclusive_start=False), optional
245 Fraction of input samples to exclude from training set and use for
246 classifier testing.
247 step : Float % Range(0.0, 1.0, inclusive_start=False), optional
248 If optimize_feature_selection is True, step is the percentage of
249 features to remove at each iteration.
250 cv : Int % Range(1, None), optional
251 Number of k-fold cross-validations to perform.
252 random_state : Int, optional
253 Seed used by random number generator.
254 n_jobs : Int, optional
255 Number of jobs to run in parallel.
256 parameter_tuning : Bool, optional
257 Automatically tune hyperparameters using random grid search.
258 optimize_feature_selection : Bool, optional
259 Automatically optimize input feature selection using recursive feature
260 elimination.
261 stratify : Bool, optional
262 Evenly stratify training and test data among metadata categories. If
263 True, all values in column must match at least two samples.
264 missing_samples : Str % Choices('error', 'ignore'), optional
265 How to handle missing samples in metadata. "error" will fail if missing
266 samples are detected. "ignore" will cause the feature table and
267 metadata to be filtered, so that only samples found in both files are
268 retained.
269 feature_count : Int % Range(0, None), optional
270 Filter feature table to include top N most important features. Set to
271 zero to include all features.
272
273 Returns
274 -------
275 sample_estimator : SampleEstimator[Regressor]
276 Trained sample estimator.
277 feature_importance : FeatureData[Importance]
278 Importance of each input feature to model accuracy.
279 predictions : SampleData[RegressorPredictions]
280 Predicted target values for each input sample.
281 model_summary : Visualization
282 Summarized parameter and (if enabled) feature selection information for
283 the trained estimator.
284 accuracy_results : Visualization
285 Accuracy results visualization.
286 maz_scores : SampleData[RegressorPredictions]
287 Microbiota-for-age z-score predictions.
288 clustermap : Visualization
289 Heatmap of important feature abundance at each time point in each
290 group.
291 volatility_plots : Visualization
292 Interactive volatility plots of MAZ and maturity scores, target
293 (column) predictions, and the sample metadata.
294 ]]></help>
295 <macros>
296 <import>qiime_citation.xml</import>
297 </macros>
298 <expand macro="qiime_citation"/>
299 </tool>