comparison qiime2-2020.8/qiime_longitudinal_feature-volatility.xml @ 20:d93d8888f0b0 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 12:44:24 +0000
parents
children
comparison
equal deleted inserted replaced
19:6c48f8d82424 20:d93d8888f0b0
1 <?xml version="1.0" ?>
2 <tool id="qiime_longitudinal_feature-volatility" name="qiime longitudinal feature-volatility"
3 version="2020.8">
4 <description>Feature volatility analysis</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime longitudinal feature-volatility
10
11 --i-table=$itable
12 # if $input_files_mmetadatafile:
13 # def list_dict_to_string(list_dict):
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
15 # for d in list_dict[1:]:
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
17 # end for
18 # return $file_list
19 # end def
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
21 # end if
22
23 #if '__ob__' in str($pstatecolumn):
24 #set $pstatecolumn_temp = $pstatecolumn.replace('__ob__', '[')
25 #set $pstatecolumn = $pstatecolumn_temp
26 #end if
27 #if '__cb__' in str($pstatecolumn):
28 #set $pstatecolumn_temp = $pstatecolumn.replace('__cb__', ']')
29 #set $pstatecolumn = $pstatecolumn_temp
30 #end if
31 #if 'X' in str($pstatecolumn):
32 #set $pstatecolumn_temp = $pstatecolumn.replace('X', '\\')
33 #set $pstatecolumn = $pstatecolumn_temp
34 #end if
35 #if '__sq__' in str($pstatecolumn):
36 #set $pstatecolumn_temp = $pstatecolumn.replace('__sq__', "'")
37 #set $pstatecolumn = $pstatecolumn_temp
38 #end if
39 #if '__db__' in str($pstatecolumn):
40 #set $pstatecolumn_temp = $pstatecolumn.replace('__db__', '"')
41 #set $pstatecolumn = $pstatecolumn_temp
42 #end if
43
44 --p-state-column=$pstatecolumn
45
46
47 #if '__ob__' in str($pindividualidcolumn):
48 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('__ob__', '[')
49 #set $pindividualidcolumn = $pindividualidcolumn_temp
50 #end if
51 #if '__cb__' in str($pindividualidcolumn):
52 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('__cb__', ']')
53 #set $pindividualidcolumn = $pindividualidcolumn_temp
54 #end if
55 #if 'X' in str($pindividualidcolumn):
56 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('X', '\\')
57 #set $pindividualidcolumn = $pindividualidcolumn_temp
58 #end if
59 #if '__sq__' in str($pindividualidcolumn):
60 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('__sq__', "'")
61 #set $pindividualidcolumn = $pindividualidcolumn_temp
62 #end if
63 #if '__db__' in str($pindividualidcolumn):
64 #set $pindividualidcolumn_temp = $pindividualidcolumn.replace('__db__', '"')
65 #set $pindividualidcolumn = $pindividualidcolumn_temp
66 #end if
67
68 #if str($pindividualidcolumn):
69 --p-individual-id-column=$pindividualidcolumn
70 #end if
71
72 --p-cv=$pcv
73
74 #if str($prandomstate):
75 --p-random-state=$prandomstate
76 #end if
77 --p-n-jobs=$pnjobs
78
79 --p-n-estimators=$pnestimators
80
81 #if str($pestimator) != 'None':
82 --p-estimator=$pestimator
83 #end if
84
85 #if $pparametertuning:
86 --p-parameter-tuning
87 #end if
88
89 #if str($pmissingsamples) != 'None':
90 --p-missing-samples=$pmissingsamples
91 #end if
92
93 #if str($pimportancethreshold) != 'None':
94 --p-importance-threshold=$pimportancethreshold
95 #end if
96
97 #if str($pfeaturecount) != 'None':
98 --p-feature-count=$pfeaturecount
99 #end if
100
101 --o-filtered-table=ofilteredtable
102
103 --o-feature-importance=ofeatureimportance
104
105 --o-volatility-plot=ovolatilityplot
106
107 --o-accuracy-results=oaccuracyresults
108
109 --o-sample-estimator=osampleestimator
110
111 #if str($examples) != 'None':
112 --examples=$examples
113 #end if
114
115 ;
116 cp osampleestimator.qza $osampleestimator
117
118 ]]></command>
119 <inputs>
120 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" />
121 <repeat name="input_files_mmetadatafile" optional="False" title="--m-metadata-file">
122 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA... (multiple Sample metadata file containing arguments will be individual-id-column. merged) [required]" name="additional_input" optional="False" type="data" />
123 </repeat>
124 <param label="--p-state-column: TEXT Metadata containing collection time (state) values for each sample. Must contain exclusively numeric values. [required]" name="pstatecolumn" optional="False" type="text" />
125 <param label="--p-individual-id-column: TEXT Metadata column containing IDs for individual subjects. [optional]" name="pindividualidcolumn" optional="False" type="text" />
126 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" />
127 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" />
128 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" />
129 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
130 <option selected="True" value="None">Selection is Optional</option>
131 <option value="RandomForestRegressor">RandomForestRegressor</option>
132 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
133 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
134 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
135 <option value="ElasticNet">ElasticNet</option>
136 <option value="Ridge">Ridge</option>
137 <option value="Lasso">Lasso</option>
138 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
139 <option value="LinearSVR">LinearSVR</option>
140 <option value="SVR">SVR</option>
141 </param>
142 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" />
143 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
144 <option selected="True" value="None">Selection is Optional</option>
145 <option value="error">error</option>
146 <option value="ignore">ignore</option>
147 </param>
148 <param label="--p-importance-threshold: " name="pimportancethreshold" optional="True" type="select">
149 <option selected="True" value="None">Selection is Optional</option>
150 <option value="Float % Range(0">Float % Range(0</option>
151 <option value="None">None</option>
152 <option value="inclusive_start=False">inclusive_start=False</option>
153 </param>
154 <param label="--p-feature-count: " name="pfeaturecount" optional="True" type="select">
155 <option selected="True" value="None">Selection is Optional</option>
156 <option value="Int % Range(1">Int % Range(1</option>
157 <option value="None">None</option>
158 </param>
159 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
160
161 </inputs>
162
163 <outputs>
164 <data format="qza" label="${tool.name} on ${on_string}: filteredtable.qza" name="ofilteredtable" />
165 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" />
166 <data format="html" label="${tool.name} on ${on_string}: volatilityplot.html" name="ovolatilityplot" />
167 <data format="html" label="${tool.name} on ${on_string}: accuracyresults.html" name="oaccuracyresults" />
168 <data format="qza" label="${tool.name} on ${on_string}: sampleestimator.qza" name="osampleestimator" />
169
170 </outputs>
171
172 <help><![CDATA[
173 Feature volatility analysis
174 ###############################################################
175
176 Identify features that are predictive of a numeric metadata column,
177 state_column (e.g., time), and plot their relative frequencies across
178 states using interactive feature volatility plots. A supervised learning
179 regressor is used to identify important features and assess their ability
180 to predict sample states. state_column will typically be a measure of time,
181 but any numeric metadata column can be used.
182
183 Parameters
184 ----------
185 table : FeatureTable[Frequency]
186 Feature table containing all features that should be used for target
187 prediction.
188 metadata : Metadata
189 Sample metadata file containing individual_id_column.
190 state_column : Str
191 Metadata containing collection time (state) values for each sample.
192 Must contain exclusively numeric values.
193 individual_id_column : Str, optional
194 Metadata column containing IDs for individual subjects.
195 cv : Int % Range(1, None), optional
196 Number of k-fold cross-validations to perform.
197 random_state : Int, optional
198 Seed used by random number generator.
199 n_jobs : Int, optional
200 Number of jobs to run in parallel.
201 n_estimators : Int % Range(1, None), optional
202 Number of trees to grow for estimation. More trees will improve
203 predictive accuracy up to a threshold level, but will also increase
204 time and memory requirements. This parameter only affects ensemble
205 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
206 GradientBoosting.
207 estimator : Str % Choices('RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet', 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'), optional
208 Estimator method to use for sample prediction.
209 parameter_tuning : Bool, optional
210 Automatically tune hyperparameters using random grid search.
211 missing_samples : Str % Choices('error', 'ignore'), optional
212 How to handle missing samples in metadata. "error" will fail if missing
213 samples are detected. "ignore" will cause the feature table and
214 metadata to be filtered, so that only samples found in both files are
215 retained.
216 importance_threshold : Float % Range(0, None, inclusive_start=False) | Str % Choices('q1', 'q2', 'q3'), optional
217 Filter feature table to exclude any features with an importance score
218 less than this threshold. Set to "q1", "q2", or "q3" to select the
219 first, second, or third quartile of values. Set to "None" to disable
220 this filter.
221 feature_count : Int % Range(1, None) | Str % Choices('all'), optional
222 Filter feature table to include top N most important features. Set to
223 "all" to include all features.
224
225 Returns
226 -------
227 filtered_table : FeatureTable[RelativeFrequency]
228 Feature table containing only important features.
229 feature_importance : FeatureData[Importance]
230 Importance of each input feature to model accuracy.
231 volatility_plot : Visualization
232 Interactive volatility plot visualization.
233 accuracy_results : Visualization
234 Accuracy results visualization.
235 sample_estimator : SampleEstimator[Regressor]
236 Trained sample regressor.
237 ]]></help>
238 <macros>
239 <import>qiime_citation.xml</import>
240 </macros>
241 <expand macro="qiime_citation"/>
242 </tool>