comparison qiime2/qiime_sample-classifier_classify-samples-ncv.xml @ 29:3ba9833030c1 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 13:12:49 +0000
parents
children
comparison
equal deleted inserted replaced
28:c28331a63dfd 29:3ba9833030c1
1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_classify-samples-ncv" name="qiime sample-classifier classify-samples-ncv"
3 version="2020.8">
4 <description>Nested cross-validated supervised learning classifier.</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime sample-classifier classify-samples-ncv
10
11 --i-table=$itable
12 # if $input_files_mmetadatafile:
13 # def list_dict_to_string(list_dict):
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
15 # for d in list_dict[1:]:
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
17 # end for
18 # return $file_list
19 # end def
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
21 # end if
22
23 #if '__ob__' in str($mmetadatacolumn):
24 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__ob__', '[')
25 #set $mmetadatacolumn = $mmetadatacolumn_temp
26 #end if
27 #if '__cb__' in str($mmetadatacolumn):
28 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__cb__', ']')
29 #set $mmetadatacolumn = $mmetadatacolumn_temp
30 #end if
31 #if 'X' in str($mmetadatacolumn):
32 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('X', '\\')
33 #set $mmetadatacolumn = $mmetadatacolumn_temp
34 #end if
35 #if '__sq__' in str($mmetadatacolumn):
36 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__sq__', "'")
37 #set $mmetadatacolumn = $mmetadatacolumn_temp
38 #end if
39 #if '__db__' in str($mmetadatacolumn):
40 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__db__', '"')
41 #set $mmetadatacolumn = $mmetadatacolumn_temp
42 #end if
43
44 --m-metadata-column=$mmetadatacolumn
45
46
47 --p-cv=$pcv
48
49 #if str($prandomstate):
50 --p-random-state=$prandomstate
51 #end if
52 --p-n-jobs=$pnjobs
53
54 --p-n-estimators=$pnestimators
55
56 #if str($pestimator) != 'None':
57 --p-estimator=$pestimator
58 #end if
59
60 #if $pparametertuning:
61 --p-parameter-tuning
62 #end if
63
64 #if str($pmissingsamples) != 'None':
65 --p-missing-samples=$pmissingsamples
66 #end if
67
68 --o-predictions=opredictions
69
70 --o-feature-importance=ofeatureimportance
71
72 --o-probabilities=oprobabilities
73
74 #if str($examples) != 'None':
75 --examples=$examples
76 #end if
77
78 ;
79 cp oprobabilities.qza $oprobabilities
80
81 ]]></command>
82 <inputs>
83 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" />
84 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
85 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA" name="additional_input" optional="True" type="data" />
86 </repeat>
87 <param label="--m-metadata-column: COLUMN MetadataColumn[Categorical] Categorical metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text" />
88 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" />
89 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" />
90 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" />
91 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
92 <option selected="True" value="None">Selection is Optional</option>
93 <option value="RandomForestClassifier">RandomForestClassifier</option>
94 <option value="ExtraTreesClassifier">ExtraTreesClassifier</option>
95 <option value="GradientBoostingClassifier">GradientBoostingClassifier</option>
96 <option value="AdaBoostClassifier">AdaBoostClassifier</option>
97 <option value="KNeighborsClassifier">KNeighborsClassifier</option>
98 <option value="LinearSVC">LinearSVC</option>
99 <option value="SVC">SVC</option>
100 </param>
101 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" />
102 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
103 <option selected="True" value="None">Selection is Optional</option>
104 <option value="error">error</option>
105 <option value="ignore">ignore</option>
106 </param>
107 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
108
109 </inputs>
110
111 <outputs>
112 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions" />
113 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" />
114 <data format="qza" label="${tool.name} on ${on_string}: probabilities.qza" name="oprobabilities" />
115
116 </outputs>
117
118 <help><![CDATA[
119 Nested cross-validated supervised learning classifier.
120 ###############################################################
121
122 Predicts a categorical sample metadata column using a supervised learning
123 classifier. Uses nested stratified k-fold cross validation for automated
124 hyperparameter optimization and sample prediction. Outputs predicted values
125 for each input sample, and relative importance of each feature for model
126 accuracy.
127
128 Parameters
129 ----------
130 table : FeatureTable[Frequency]
131 Feature table containing all features that should be used for target
132 prediction.
133 metadata : MetadataColumn[Categorical]
134 Categorical metadata column to use as prediction target.
135 cv : Int % Range(1, None), optional
136 Number of k-fold cross-validations to perform.
137 random_state : Int, optional
138 Seed used by random number generator.
139 n_jobs : Int, optional
140 Number of jobs to run in parallel.
141 n_estimators : Int % Range(1, None), optional
142 Number of trees to grow for estimation. More trees will improve
143 predictive accuracy up to a threshold level, but will also increase
144 time and memory requirements. This parameter only affects ensemble
145 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
146 GradientBoosting.
147 estimator : Str % Choices('RandomForestClassifier', 'ExtraTreesClassifier', 'GradientBoostingClassifier', 'AdaBoostClassifier', 'KNeighborsClassifier', 'LinearSVC', 'SVC'), optional
148 Estimator method to use for sample prediction.
149 parameter_tuning : Bool, optional
150 Automatically tune hyperparameters using random grid search.
151 missing_samples : Str % Choices('error', 'ignore'), optional
152 How to handle missing samples in metadata. "error" will fail if missing
153 samples are detected. "ignore" will cause the feature table and
154 metadata to be filtered, so that only samples found in both files are
155 retained.
156
157 Returns
158 -------
159 predictions : SampleData[ClassifierPredictions]
160 Predicted target values for each input sample.
161 feature_importance : FeatureData[Importance]
162 Importance of each input feature to model accuracy.
163 probabilities : SampleData[Probabilities]
164 Predicted class probabilities for each input sample.
165 ]]></help>
166 <macros>
167 <import>qiime_citation.xml</import>
168 </macros>
169 <expand macro="qiime_citation"/>
170 </tool>