comparison qiime2/qiime_sample-classifier_regress-samples-ncv.xml @ 29:3ba9833030c1 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 13:12:49 +0000
parents
children
comparison
equal deleted inserted replaced
28:c28331a63dfd 29:3ba9833030c1
1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_regress-samples-ncv" name="qiime sample-classifier regress-samples-ncv"
3 version="2020.8">
4 <description>Nested cross-validated supervised learning regressor.</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime sample-classifier regress-samples-ncv
10
11 --i-table=$itable
12 # if $input_files_mmetadatafile:
13 # def list_dict_to_string(list_dict):
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
15 # for d in list_dict[1:]:
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
17 # end for
18 # return $file_list
19 # end def
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
21 # end if
22
23 #if '__ob__' in str($mmetadatacolumn):
24 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__ob__', '[')
25 #set $mmetadatacolumn = $mmetadatacolumn_temp
26 #end if
27 #if '__cb__' in str($mmetadatacolumn):
28 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__cb__', ']')
29 #set $mmetadatacolumn = $mmetadatacolumn_temp
30 #end if
31 #if 'X' in str($mmetadatacolumn):
32 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('X', '\\')
33 #set $mmetadatacolumn = $mmetadatacolumn_temp
34 #end if
35 #if '__sq__' in str($mmetadatacolumn):
36 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__sq__', "'")
37 #set $mmetadatacolumn = $mmetadatacolumn_temp
38 #end if
39 #if '__db__' in str($mmetadatacolumn):
40 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__db__', '"')
41 #set $mmetadatacolumn = $mmetadatacolumn_temp
42 #end if
43
44 --m-metadata-column=$mmetadatacolumn
45
46
47 --p-cv=$pcv
48
49 #if str($prandomstate):
50 --p-random-state=$prandomstate
51 #end if
52 --p-n-jobs=$pnjobs
53
54 --p-n-estimators=$pnestimators
55
56 #if str($pestimator) != 'None':
57 --p-estimator=$pestimator
58 #end if
59
60 #if $pstratify:
61 --p-stratify
62 #end if
63
64 #if $pparametertuning:
65 --p-parameter-tuning
66 #end if
67
68 #if str($pmissingsamples) != 'None':
69 --p-missing-samples=$pmissingsamples
70 #end if
71
72 --o-predictions=opredictions
73
74 --o-feature-importance=ofeatureimportance
75
76 #if str($examples) != 'None':
77 --examples=$examples
78 #end if
79
80 ;
81 cp ofeatureimportance.qza $ofeatureimportance
82
83 ]]></command>
84 <inputs>
85 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" />
86 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
87 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA" name="additional_input" optional="True" type="data" />
88 </repeat>
89 <param label="--m-metadata-column: COLUMN MetadataColumn[Numeric] Numeric metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text" />
90 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" />
91 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" />
92 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" />
93 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
94 <option selected="True" value="None">Selection is Optional</option>
95 <option value="RandomForestRegressor">RandomForestRegressor</option>
96 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
97 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
98 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
99 <option value="ElasticNet">ElasticNet</option>
100 <option value="Ridge">Ridge</option>
101 <option value="Lasso">Lasso</option>
102 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
103 <option value="LinearSVR">LinearSVR</option>
104 <option value="SVR">SVR</option>
105 </param>
106 <param label="--p-stratify: --p-stratify: / --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean" />
107 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" />
108 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
109 <option selected="True" value="None">Selection is Optional</option>
110 <option value="error">error</option>
111 <option value="ignore">ignore</option>
112 </param>
113 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
114
115 </inputs>
116
117 <outputs>
118 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions" />
119 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" />
120
121 </outputs>
122
123 <help><![CDATA[
124 Nested cross-validated supervised learning regressor.
125 ###############################################################
126
127 Predicts a continuous sample metadata column using a supervised learning
128 regressor. Uses nested stratified k-fold cross validation for automated
129 hyperparameter optimization and sample prediction. Outputs predicted values
130 for each input sample, and relative importance of each feature for model
131 accuracy.
132
133 Parameters
134 ----------
135 table : FeatureTable[Frequency]
136 Feature table containing all features that should be used for target
137 prediction.
138 metadata : MetadataColumn[Numeric]
139 Numeric metadata column to use as prediction target.
140 cv : Int % Range(1, None), optional
141 Number of k-fold cross-validations to perform.
142 random_state : Int, optional
143 Seed used by random number generator.
144 n_jobs : Int, optional
145 Number of jobs to run in parallel.
146 n_estimators : Int % Range(1, None), optional
147 Number of trees to grow for estimation. More trees will improve
148 predictive accuracy up to a threshold level, but will also increase
149 time and memory requirements. This parameter only affects ensemble
150 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
151 GradientBoosting.
152 estimator : Str % Choices('RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet', 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'), optional
153 Estimator method to use for sample prediction.
154 stratify : Bool, optional
155 Evenly stratify training and test data among metadata categories. If
156 True, all values in column must match at least two samples.
157 parameter_tuning : Bool, optional
158 Automatically tune hyperparameters using random grid search.
159 missing_samples : Str % Choices('error', 'ignore'), optional
160 How to handle missing samples in metadata. "error" will fail if missing
161 samples are detected. "ignore" will cause the feature table and
162 metadata to be filtered, so that only samples found in both files are
163 retained.
164
165 Returns
166 -------
167 predictions : SampleData[RegressorPredictions]
168 Predicted target values for each input sample.
169 feature_importance : FeatureData[Importance]
170 Importance of each input feature to model accuracy.
171 ]]></help>
172 <macros>
173 <import>qiime_citation.xml</import>
174 </macros>
175 <expand macro="qiime_citation"/>
176 </tool>