comparison qiime2/qiime_sample-classifier_fit-classifier.xml @ 29:3ba9833030c1 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 13:12:49 +0000
parents
children
comparison
equal deleted inserted replaced
28:c28331a63dfd 29:3ba9833030c1
1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_fit-classifier" name="qiime sample-classifier fit-classifier"
3 version="2020.8">
4 <description>Fit a supervised learning classifier.</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime sample-classifier fit-classifier
10
11 --i-table=$itable
12 # if $input_files_mmetadatafile:
13 # def list_dict_to_string(list_dict):
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
15 # for d in list_dict[1:]:
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
17 # end for
18 # return $file_list
19 # end def
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
21 # end if
22
23 #if '__ob__' in str($mmetadatacolumn):
24 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__ob__', '[')
25 #set $mmetadatacolumn = $mmetadatacolumn_temp
26 #end if
27 #if '__cb__' in str($mmetadatacolumn):
28 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__cb__', ']')
29 #set $mmetadatacolumn = $mmetadatacolumn_temp
30 #end if
31 #if 'X' in str($mmetadatacolumn):
32 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('X', '\\')
33 #set $mmetadatacolumn = $mmetadatacolumn_temp
34 #end if
35 #if '__sq__' in str($mmetadatacolumn):
36 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__sq__', "'")
37 #set $mmetadatacolumn = $mmetadatacolumn_temp
38 #end if
39 #if '__db__' in str($mmetadatacolumn):
40 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__db__', '"')
41 #set $mmetadatacolumn = $mmetadatacolumn_temp
42 #end if
43
44 --m-metadata-column=$mmetadatacolumn
45
46
47 --p-step=$pstep
48
49 --p-cv=$pcv
50
51 #if str($prandomstate):
52 --p-random-state=$prandomstate
53 #end if
54 --p-n-jobs=$pnjobs
55
56 --p-n-estimators=$pnestimators
57
58 #if str($pestimator) != 'None':
59 --p-estimator=$pestimator
60 #end if
61
62 #if $poptimizefeatureselection:
63 --p-optimize-feature-selection
64 #end if
65
66 #if $pparametertuning:
67 --p-parameter-tuning
68 #end if
69
70 #if str($pmissingsamples) != 'None':
71 --p-missing-samples=$pmissingsamples
72 #end if
73
74 --o-sample-estimator=osampleestimator
75
76 --o-feature-importance=ofeatureimportance
77
78 #if str($examples) != 'None':
79 --examples=$examples
80 #end if
81
82 ;
83 cp ofeatureimportance.qza $ofeatureimportance
84
85 ]]></command>
86 <inputs>
87 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" />
88 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
89 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA" name="additional_input" optional="True" type="data" />
90 </repeat>
91 <param label="--m-metadata-column: COLUMN MetadataColumn[Categorical] Numeric metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text" />
92 <param exclude_min="True" label="--p-step: PROPORTION Range(0.0, 1.0, inclusive_start=False) If optimize-feature-selection is True, step is the percentage of features to remove at each iteration. [default: 0.05]" max="1.0" min="0.0" name="pstep" optional="True" type="float" value="0.05" />
93 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" />
94 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" />
95 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" />
96 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
97 <option selected="True" value="None">Selection is Optional</option>
98 <option value="RandomForestClassifier">RandomForestClassifier</option>
99 <option value="ExtraTreesClassifier">ExtraTreesClassifier</option>
100 <option value="GradientBoostingClassifier">GradientBoostingClassifier</option>
101 <option value="AdaBoostClassifier">AdaBoostClassifier</option>
102 <option value="KNeighborsClassifier">KNeighborsClassifier</option>
103 <option value="LinearSVC">LinearSVC</option>
104 <option value="SVC">SVC</option>
105 </param>
106 <param label="--p-optimize-feature-selection: --p-optimize-feature-selection: / --p-no-optimize-feature-selection Automatically optimize input feature selection using recursive feature elimination. [default: False]" name="poptimizefeatureselection" selected="False" type="boolean" />
107 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" />
108 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
109 <option selected="True" value="None">Selection is Optional</option>
110 <option value="error">error</option>
111 <option value="ignore">ignore</option>
112 </param>
113 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
114
115 </inputs>
116
117 <outputs>
118 <data format="qza" label="${tool.name} on ${on_string}: sampleestimator.qza" name="osampleestimator" />
119 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" />
120
121 </outputs>
122
123 <help><![CDATA[
124 Fit a supervised learning classifier.
125 ###############################################################
126
127 Fit a supervised learning classifier. Outputs the fit estimator (for
128 prediction of test samples and/or unknown samples) and the relative
129 importance of each feature for model accuracy. Optionally use k-fold cross-
130 validation for automatic recursive feature elimination and hyperparameter
131 tuning.
132
133 Parameters
134 ----------
135 table : FeatureTable[Frequency]
136 Feature table containing all features that should be used for target
137 prediction.
138 metadata : MetadataColumn[Categorical]
139 Numeric metadata column to use as prediction target.
140 step : Float % Range(0.0, 1.0, inclusive_start=False), optional
141 If optimize_feature_selection is True, step is the percentage of
142 features to remove at each iteration.
143 cv : Int % Range(1, None), optional
144 Number of k-fold cross-validations to perform.
145 random_state : Int, optional
146 Seed used by random number generator.
147 n_jobs : Int, optional
148 Number of jobs to run in parallel.
149 n_estimators : Int % Range(1, None), optional
150 Number of trees to grow for estimation. More trees will improve
151 predictive accuracy up to a threshold level, but will also increase
152 time and memory requirements. This parameter only affects ensemble
153 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
154 GradientBoosting.
155 estimator : Str % Choices('RandomForestClassifier', 'ExtraTreesClassifier', 'GradientBoostingClassifier', 'AdaBoostClassifier', 'KNeighborsClassifier', 'LinearSVC', 'SVC'), optional
156 Estimator method to use for sample prediction.
157 optimize_feature_selection : Bool, optional
158 Automatically optimize input feature selection using recursive feature
159 elimination.
160 parameter_tuning : Bool, optional
161 Automatically tune hyperparameters using random grid search.
162 missing_samples : Str % Choices('error', 'ignore'), optional
163 How to handle missing samples in metadata. "error" will fail if missing
164 samples are detected. "ignore" will cause the feature table and
165 metadata to be filtered, so that only samples found in both files are
166 retained.
167
168 Returns
169 -------
170 sample_estimator : SampleEstimator[Classifier]
171 Trained sample classifier.
172 feature_importance : FeatureData[Importance]
173 Importance of each input feature to model accuracy.
174 ]]></help>
175 <macros>
176 <import>qiime_citation.xml</import>
177 </macros>
178 <expand macro="qiime_citation"/>
179 </tool>