Mercurial > repos > bgruening > sklearn_pca
comparison pca.xml @ 1:132805688fa3 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author | bgruening |
---|---|
date | Tue, 13 Apr 2021 18:49:35 +0000 |
parents | 2d7016b3ae92 |
children | c16818ce0424 |
comparison
equal
deleted
inserted
replaced
0:2d7016b3ae92 | 1:132805688fa3 |
---|---|
1 <tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@"> | 1 <tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@" profile="20.05"> |
2 <description>with scikit-learn</description> | 2 <description>with scikit-learn</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 <token name="@GALAXY_VERSION@">0</token> | 5 <token name="@GALAXY_VERSION@">0</token> |
6 </macros> | 6 </macros> |
7 <expand macro="python_requirements"/> | 7 <expand macro="python_requirements" /> |
8 <expand macro="macro_stdio"/> | 8 <expand macro="macro_stdio" /> |
9 <version_command>echo "@VERSION@"</version_command> | 9 <version_command>echo "@VERSION@"</version_command> |
10 <command detect_errors="exit_code"> | 10 <command detect_errors="exit_code"> |
11 <![CDATA[ | 11 <![CDATA[ |
12 python '$__tool_directory__/pca.py' | 12 python '$__tool_directory__/pca.py' |
13 -i '$infile' | 13 -i '$infile' |
59 #end if | 59 #end if |
60 -o '$outfile' | 60 -o '$outfile' |
61 ]]> | 61 ]]> |
62 </command> | 62 </command> |
63 <inputs> | 63 <inputs> |
64 <param name="infile" type="data" format="tabular" label="Input file"/> | 64 <param name="infile" type="data" format="tabular" label="Input file" /> |
65 <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset"/> | 65 <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset" /> |
66 <conditional name="column_selector_options"> | 66 <conditional name="column_selector_options"> |
67 <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile"/> | 67 <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile" /> |
68 </conditional> | 68 </conditional> |
69 <conditional name="select_pca_type"> | 69 <conditional name="select_pca_type"> |
70 <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use"> | 70 <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use"> |
71 <option value="classical" selected="true">Classical PCA</option> | 71 <option value="classical" selected="true">Classical PCA</option> |
72 <option value="incremental">Incremental PCA</option> | 72 <option value="incremental">Incremental PCA</option> |
73 <option value="kernel">Kernel PCA</option> | 73 <option value="kernel">Kernel PCA</option> |
74 </param> | 74 </param> |
75 <when value="classical"> | 75 <when value="classical"> |
76 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> | 76 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" /> |
77 <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features"/> | 77 <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features" /> |
78 <conditional name="select_solver_type"> | 78 <conditional name="select_solver_type"> |
79 <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition"> | 79 <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition"> |
80 <option value="auto" selected="true">auto</option> | 80 <option value="auto" selected="true">auto</option> |
81 <option value="full">full</option> | 81 <option value="full">full</option> |
82 <option value="arpack">arpack</option> | 82 <option value="arpack">arpack</option> |
83 <option value="randomized">randomized</option> | 83 <option value="randomized">randomized</option> |
84 </param> | 84 </param> |
85 <when value="arpack"> | 85 <when value="arpack"> |
86 <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/> | 86 <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack" /> |
87 </when> | 87 </when> |
88 <when value="auto"/> | 88 <when value="auto" /> |
89 <when value="full"/> | 89 <when value="full" /> |
90 <when value="randomized"/> | 90 <when value="randomized" /> |
91 </conditional> | 91 </conditional> |
92 </when> | 92 </when> |
93 <when value="incremental"> | 93 <when value="incremental"> |
94 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> | 94 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" /> |
95 <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue=""/> | 95 <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" /> |
96 <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch"/> | 96 <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch" /> |
97 </when> | 97 </when> |
98 <when value="kernel"> | 98 <when value="kernel"> |
99 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> | 99 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" /> |
100 <conditional name="select_kernel_opts"> | 100 <conditional name="select_kernel_opts"> |
101 <param name="kernel_opts" type="select" label="Kernel Type"> | 101 <param name="kernel_opts" type="select" label="Kernel Type"> |
102 <option value="linear" selected="true">linear</option> | 102 <option value="linear" selected="true">linear</option> |
103 <option value="poly">poly</option> | 103 <option value="poly">poly</option> |
104 <option value="rbf">rbf</option> | 104 <option value="rbf">rbf</option> |
105 <option value="sigmoid">sigmoid</option> | 105 <option value="sigmoid">sigmoid</option> |
106 <option value="cosine">cosine</option> | 106 <option value="cosine">cosine</option> |
107 <option value="precomputed">precomputed</option> | 107 <option value="precomputed">precomputed</option> |
108 </param> | 108 </param> |
109 <when value="poly"> | 109 <when value="poly"> |
110 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> | 110 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" /> |
111 <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels"/> | 111 <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels" /> |
112 <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/> | 112 <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels" /> |
113 </when> | 113 </when> |
114 <when value="sigmoid"> | 114 <when value="sigmoid"> |
115 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> | 115 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" /> |
116 <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/> | 116 <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels" /> |
117 </when> | 117 </when> |
118 <when value="rbf"> | 118 <when value="rbf"> |
119 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> | 119 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" /> |
120 </when> | 120 </when> |
121 <when value="linear"/> | 121 <when value="linear" /> |
122 <when value="cosine"/> | 122 <when value="cosine" /> |
123 <when value="precomputed"/> | 123 <when value="precomputed" /> |
124 </conditional> | 124 </conditional> |
125 <conditional name="select_solver_type"> | 125 <conditional name="select_solver_type"> |
126 <param name="eigen_solver_opts" type="select" label="Eigen Solver"> | 126 <param name="eigen_solver_opts" type="select" label="Eigen Solver"> |
127 <option value="auto" selected="true">auto</option> | 127 <option value="auto" selected="true">auto</option> |
128 <option value="dense">dense</option> | 128 <option value="dense">dense</option> |
129 <option value="arpack">arpack</option> | 129 <option value="arpack">arpack</option> |
130 </param> | 130 </param> |
131 <when value="arpack"> | 131 <when value="arpack"> |
132 <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/> | 132 <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack" /> |
133 <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack"/> | 133 <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack" /> |
134 </when> | 134 </when> |
135 <when value="auto"/> | 135 <when value="auto" /> |
136 <when value="dense"/> | 136 <when value="dense" /> |
137 </conditional> | 137 </conditional> |
138 </when> | 138 </when> |
139 </conditional> | 139 </conditional> |
140 </inputs> | 140 </inputs> |
141 <outputs> | 141 <outputs> |
142 <data format="tabular" name="outfile"/> | 142 <data format="tabular" name="outfile" /> |
143 </outputs> | 143 </outputs> |
144 <tests> | 144 <tests> |
145 <test> | 145 <test> |
146 <param name="infile" value="pca_input.dat" ftype="tabular"/> | 146 <param name="infile" value="pca_input.dat" ftype="tabular" /> |
147 <param name="selected_column_selector_option" value="by_index_number" /> | 147 <param name="selected_column_selector_option" value="by_index_number" /> |
148 <param name="col1" value="1,2,4,6,8,5"/> | 148 <param name="col1" value="1,2,4,6,8,5" /> |
149 <param name="number" value="5"/> | 149 <param name="number" value="5" /> |
150 <param name="select_pca_opts" value="classical"/> | 150 <param name="select_pca_opts" value="classical" /> |
151 <param name="svd_solver_opts" value="arpack"/> | 151 <param name="svd_solver_opts" value="arpack" /> |
152 <param name="tolerance" value="0.4"/> | 152 <param name="tolerance" value="0.4" /> |
153 <output name="outfile" ftype='tabular' file="pca_classical_output.dat"/> | 153 <output name="outfile" ftype='tabular' file="pca_classical_output.dat" /> |
154 </test> | 154 </test> |
155 <test> | 155 <test> |
156 <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/> | 156 <param name="infile" value="pca_input_with_headers.dat" ftype="tabular" /> |
157 <param name="header" value="--header"/> | 157 <param name="header" value="--header" /> |
158 <param name="selected_column_selector_option" value="by_header_name" /> | 158 <param name="selected_column_selector_option" value="by_header_name" /> |
159 <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5"/> | 159 <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5" /> |
160 <param name="number" value="5"/> | 160 <param name="number" value="5" /> |
161 <param name="select_pca_opts" value="classical"/> | 161 <param name="select_pca_opts" value="classical" /> |
162 <param name="svd_solver_opts" value="arpack"/> | 162 <param name="svd_solver_opts" value="arpack" /> |
163 <param name="tolerance" value="0.4"/> | 163 <param name="tolerance" value="0.4" /> |
164 <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat"/> | 164 <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat" /> |
165 </test> | 165 </test> |
166 <test> | 166 <test> |
167 <param name="infile" value="pca_input.dat" ftype="tabular"/> | 167 <param name="infile" value="pca_input.dat" ftype="tabular" /> |
168 <param name="selected_column_selector_option" value="all_but_by_index_number"/> | 168 <param name="selected_column_selector_option" value="all_but_by_index_number" /> |
169 <param name="col1" value="8,5" /> | 169 <param name="col1" value="8,5" /> |
170 <param name="number" value="7"/> | 170 <param name="number" value="7" /> |
171 <param name="select_pca_opts" value="incremental"/> | 171 <param name="select_pca_opts" value="incremental" /> |
172 <param name="batch_size" value="64"/> | 172 <param name="batch_size" value="64" /> |
173 <output name="outfile" ftype='tabular' file="pca_incremental_output.dat"/> | 173 <output name="outfile" ftype='tabular' file="pca_incremental_output.dat" /> |
174 </test> | 174 </test> |
175 <test> | 175 <test> |
176 <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/> | 176 <param name="infile" value="pca_input_with_headers.dat" ftype="tabular" /> |
177 <param name="header" value="--header"/> | 177 <param name="header" value="--header" /> |
178 <param name="selected_column_selector_option" value="all_but_by_header_name" /> | 178 <param name="selected_column_selector_option" value="all_but_by_header_name" /> |
179 <param name="col1" value="col_8,col_5"/> | 179 <param name="col1" value="col_8,col_5" /> |
180 <param name="number" value="7"/> | 180 <param name="number" value="7" /> |
181 <param name="select_pca_opts" value="incremental"/> | 181 <param name="select_pca_opts" value="incremental" /> |
182 <param name="batch_size" value="64"/> | 182 <param name="batch_size" value="64" /> |
183 <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat"/> | 183 <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat" /> |
184 </test> | 184 </test> |
185 <test> | 185 <test> |
186 <param name="infile" value="pca_input.dat" ftype="tabular"/> | 186 <param name="infile" value="pca_input.dat" ftype="tabular" /> |
187 <param name="selected_column_selector_option" value="all_columns" /> | 187 <param name="selected_column_selector_option" value="all_columns" /> |
188 <param name="number" value="8"/> | 188 <param name="number" value="8" /> |
189 <param name="select_pca_opts" value="kernel"/> | 189 <param name="select_pca_opts" value="kernel" /> |
190 <param name="kernel_opts" value="linear"/> | 190 <param name="kernel_opts" value="linear" /> |
191 <param name="eigen_solver_opts" value="arpack"/> | 191 <param name="eigen_solver_opts" value="arpack" /> |
192 <param name="tolerance" value="4.3"/> | 192 <param name="tolerance" value="4.3" /> |
193 <param name="max_iter" value="8"/> | 193 <param name="max_iter" value="8" /> |
194 <output name="outfile" ftype="tabular"> | 194 <output name="outfile" ftype="tabular"> |
195 <assert_contents> | 195 <assert_contents> |
196 <has_n_lines n="300"/> | 196 <has_n_lines n="300" /> |
197 <has_n_columns n="8"/> | 197 <has_n_columns n="8" /> |
198 </assert_contents> | 198 </assert_contents> |
199 </output> | 199 </output> |
200 </test> | 200 </test> |
201 <test> | 201 <test> |
202 <param name="infile" value="pca_input.dat" ftype="tabular"/> | 202 <param name="infile" value="pca_input.dat" ftype="tabular" /> |
203 <param name="selected_column_selector_option" value="all_columns" /> | 203 <param name="selected_column_selector_option" value="all_columns" /> |
204 <param name="number" value="8"/> | 204 <param name="number" value="8" /> |
205 <param name="select_pca_opts" value="kernel"/> | 205 <param name="select_pca_opts" value="kernel" /> |
206 <param name="kernel_opts" value="poly"/> | 206 <param name="kernel_opts" value="poly" /> |
207 <param name="gamma" value="0.3"/> | 207 <param name="gamma" value="0.3" /> |
208 <param name="degree" value="4"/> | 208 <param name="degree" value="4" /> |
209 <param name="coef0" value="1.6"/> | 209 <param name="coef0" value="1.6" /> |
210 <param name="eigen_solver_opts" value="auto"/> | 210 <param name="eigen_solver_opts" value="auto" /> |
211 <output name="outfile" ftype="tabular"> | 211 <output name="outfile" ftype="tabular"> |
212 <assert_contents> | 212 <assert_contents> |
213 <has_n_lines n="300"/> | 213 <has_n_lines n="300" /> |
214 <has_n_columns n="8"/> | 214 <has_n_columns n="8" /> |
215 </assert_contents> | 215 </assert_contents> |
216 </output> | 216 </output> |
217 </test> | 217 </test> |
218 </tests> | 218 </tests> |
219 <help><![CDATA[ | 219 <help><![CDATA[ |
220 .. class:: infomark | 220 .. class:: infomark |
221 | 221 |
222 **What it does** | 222 **What it does** |
223 | 223 |
224 This tool takes a tabular input file (one data point per row, each column a variable) | 224 This tool takes a tabular input file (one data point per row, each column a variable) |
225 and performs PCA using Singular Value Decomposition, returning an equally sized tabular | 225 and performs principal component analysis (PCA) using Singular Value Decomposition, |
226 file with the first PC in the first column, second PC in the second column, etc. | 226 returning an equally sized tabular file with the first PC in the first column, second |
227 PC in the second column, etc. | |
227 ]]> | 228 ]]> |
228 </help> | 229 </help> |
229 <expand macro="sklearn_citation"/> | 230 <expand macro="sklearn_citation" /> |
230 </tool> | 231 </tool> |