Mercurial > repos > iuc > ampvis2_load
comparison load.xml @ 0:474bbc45ddd9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ampvis2 commit 7c0ecbffdb5e993f5af7e3b52c424c2761fb91d3"
author | iuc |
---|---|
date | Mon, 04 Apr 2022 10:24:51 +0000 |
parents | |
children | 8d77d277996e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:474bbc45ddd9 |
---|---|
1 <tool id="ampvis2_load" name="ampvis2 load" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
2 <description></description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="header"/> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 #if $otutable.ext.startswith("biom") | |
9 ln -s '$otutable' otutable.biom && | |
10 #else | |
11 ln -s '$otutable' otutable.tsv && | |
12 #end if | |
13 #if $taxonomy | |
14 ln -s '$taxonomy' taxonomy.tsv && | |
15 #end if | |
16 Rscript '$rscript' | |
17 ]]></command> | |
18 <configfiles> | |
19 <configfile name="rscript"><![CDATA[ | |
20 library(ampvis2, quietly = TRUE) | |
21 library(readr, quietly = TRUE) | |
22 ## 'manually' load metadata treating all columns as character | |
23 ## giving colClasses to amp_load seems not possible | |
24 #if $metadata | |
25 metadata <- read.table("$metadata", header = TRUE, sep = "\t", colClasses = "character") | |
26 #end if | |
27 data <- amp_load( | |
28 #if $otutable.ext.startswith("biom") | |
29 otutable = "otutable.biom", | |
30 #else | |
31 otutable = "otutable.tsv", | |
32 #end if | |
33 #if $metadata | |
34 metadata = metadata, | |
35 #end if | |
36 #if $taxonomy | |
37 taxonomy = "taxonomy.tsv", | |
38 #end if | |
39 #if $fasta | |
40 fasta = "$fasta", | |
41 #end if | |
42 #if $tree | |
43 tree = "$tree", | |
44 #end if | |
45 pruneSingletons = $pruneSingletons | |
46 ) | |
47 ## try to guess column types with plyr::type.convert | |
48 #if $guess_column_types | |
49 data\$metadata <- readr::type_convert(data\$metadata, guess_integer=TRUE) | |
50 #end if | |
51 saveRDS(data, "$ampvis") | |
52 ## write metadata list for biom input or if metadata is given | |
53 #if "metadata" in $write_lists | |
54 @SAVE_METADATA_LIST@ | |
55 #end if | |
56 | |
57 #if "tax" in $write_lists | |
58 @SAVE_TAX_LIST@ | |
59 #end if | |
60 ## print overview of the data to stdout | |
61 data | |
62 ]]></configfile> | |
63 </configfiles> | |
64 <inputs> | |
65 <param argument="otutable" type="data" format="tabular,biom1,biom2" label="OTU table"/> | |
66 <param argument="metadata" type="data" format="tsv" optional="true" label="Sample metadata"> | |
67 <validator type="expression" message="Table must have at least 1 column"><![CDATA[value.metadata.columns > 0]]></validator> | |
68 <!-- TODO in future versions this might change https://github.com/MadsAlbertsen/ampvis2/pull/134 | |
69 if so, then also adapt help text and test data --> | |
70 <validator type="expression" message="First column must be named SampleID"><![CDATA[value.metadata.column_names[0] == "SampleID"]]></validator> | |
71 </param> | |
72 <param name="guess_column_types" type="boolean" checked="true" label="Guess metadata column types" help="See help"/> | |
73 <param argument="taxonomy" type="data" format="tabular" optional="true" label="Taxonomy table"/> | |
74 <param argument="fasta" type="data" format="fasta" optional="true" label="Fasta file"/> | |
75 <param argument="tree" type="data" format="newick" optional="true" label="Phylogenetic tree"/> | |
76 <param argument="pruneSingletons" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove singleton OTUs"/> | |
77 <param name="write_lists" type="select" optional="true" multiple="true" label="Output list data sets" help="Needed by most downstream tools. Select if the inputs contain taxonomic / metadata information."> | |
78 <option value="tax" selected="true">Taxonomy list</option> | |
79 <option value="metadata" selected="true">Metadata list</option> | |
80 </param> | |
81 </inputs> | |
82 <outputs> | |
83 <data name="ampvis" format="ampvis2"/> | |
84 <data name="metadata_list_out" format="tabular" label="${tool.name} on ${on_string}: metadata list"> | |
85 <filter>write_lists and "metadata" in write_lists</filter> | |
86 </data> | |
87 <data name="taxonomy_list_out" format="tabular" label="${tool.name} on ${on_string}: taxonomy list"> | |
88 <filter>write_lists and "tax" in write_lists</filter> | |
89 </data> | |
90 </outputs> | |
91 <tests> | |
92 <!-- load otu table + metadata + taxonomy --> | |
93 <test expect_num_outputs="3"> | |
94 <param name="otutable" value="AalborgWWTPs.otu.csv"/> | |
95 <param name="metadata" value="AalborgWWTPs.tsv" ftype="tsv"/> | |
96 <param name="taxonomy" value="AalborgWWTPs.tax"/> | |
97 <output name="ampvis" value="AalborgWWTPs.rds" ftype="ampvis2" compare="sim_size"/> | |
98 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/> | |
99 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/> | |
100 <assert_stdout> | |
101 <has_text text="575.79"/> | |
102 <has_text text="SampleID, Plant, Date, Year, Period"/> | |
103 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/> | |
104 </assert_stdout> | |
105 </test> | |
106 <!-- load otu table + metadata + taxonomy + tree + fasta --> | |
107 <test expect_num_outputs="3"> | |
108 <param name="otutable" value="AalborgWWTPs.otu.csv"/> | |
109 <param name="metadata" value="AalborgWWTPs.tsv" ftype="tsv"/> | |
110 <param name="taxonomy" value="AalborgWWTPs.tax"/> | |
111 <param name="fasta" value="AalborgWWTPs.fa" ftype="fasta"/> | |
112 <param name="tree" value="AalborgWWTPs.nwk" ftype="newick"/> | |
113 <output name="ampvis" value="AalborgWWTPs-complete.rds" ftype="ampvis2" compare="sim_size"/> | |
114 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/> | |
115 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/> | |
116 <assert_stdout> | |
117 <has_text text="575.79"/> | |
118 <has_text text="SampleID, Plant, Date, Year, Period"/> | |
119 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/> | |
120 </assert_stdout> | |
121 </test> | |
122 <!-- test biom 1/2 input (taken from https://github.com/biocore/biom-format/tree/master/examples) | |
123 metadata seems not to be loaded from a biom file https://github.com/MadsAlbertsen/ampvis2/issues/129 | |
124 taxonomy is loaded from all but 1 | |
125 --> | |
126 <test> | |
127 <param name="otutable" value="rich-dense.biom" ftype="biom1"/> | |
128 <output name="ampvis" ftype="ampvis2"> | |
129 <assert_contents> | |
130 <has_size value="748"/> | |
131 </assert_contents> | |
132 </output> | |
133 <assert_stdout> | |
134 <has_text text="4.5"/> | |
135 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/> | |
136 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> | |
137 </assert_stdout> | |
138 </test> | |
139 <test> | |
140 <param name="otutable" value="rich-sparse.biom" ftype="biom1"/> | |
141 <output name="ampvis" ftype="ampvis2"> | |
142 <assert_contents> | |
143 <has_size value="751"/> | |
144 </assert_contents> | |
145 </output> | |
146 <assert_stdout> | |
147 <has_text text="4.5"/> | |
148 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/> | |
149 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> | |
150 </assert_stdout> | |
151 </test> | |
152 <test> | |
153 <param name="otutable" value="min_sparse_otu_table_hdf5.biom" ftype="biom2"/> | |
154 <output name="ampvis" ftype="ampvis2"> | |
155 <assert_contents> | |
156 <has_size value="395"/> | |
157 </assert_contents> | |
158 </output> | |
159 <assert_stdout> | |
160 <has_text text="4.5"/> | |
161 <!-- input file seems to miss metadata check that no metadata & taxonomy is loaded (ampvis2 adds dummy metadata) --> | |
162 <has_text text="SampleID, DummyVariable"/> | |
163 <has_text text="0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%)"/> | |
164 </assert_stdout> | |
165 </test> | |
166 <test> | |
167 <param name="otutable" value="rich_sparse_otu_table_hdf5.biom" ftype="biom2"/> | |
168 <output name="ampvis" ftype="ampvis2"> | |
169 <assert_contents> | |
170 <has_size value="753"/> | |
171 </assert_contents> | |
172 </output> | |
173 <assert_stdout> | |
174 <has_text text="4.5"/> | |
175 <has_text text="SampleID, BODY_SITE, BarcodeSequence, Description, LinkerPrimerSequence"/> | |
176 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> | |
177 </assert_stdout> | |
178 </test> | |
179 </tests> | |
180 <help><![CDATA[ | |
181 | |
182 What it does | |
183 ============ | |
184 | |
185 This tool reads an OTU-table and corresponding sample metadata, and returns | |
186 a RDS data set for use in all ampvis2 tools. It is therefore required to load | |
187 data with this tool before any other ampvis2 tools can be used. | |
188 | |
189 The Galaxy tool calls the `amp_load <https://madsalbertsen.github.io/ampvis2/reference/amp_load.html>`_ | |
190 function of the ampvis2 package. This function validates and corrects the | |
191 provided data frames in different ways to make it suitable for the rest of the | |
192 ampvis2 tools. It is important that the provided data sets match the | |
193 requirements as described in the following to work properly. | |
194 | |
195 Input | |
196 ===== | |
197 | |
198 **The OTU-table** | |
199 | |
200 contains information about the OTUs, their read counts in each sample, and | |
201 optionally their assigned taxonomy. The OTU table can be given as | |
202 | |
203 - Tabular data set | |
204 - BIOM version (1 and 2) | |
205 | |
206 Metadata and taxonomy in the tabular or BIOM files that are given via the | |
207 ``OTU table`` parameter can is overwritten if by data presented via the | |
208 ``Sample metadata`` or ``Taxonomy table`` parameters. | |
209 | |
210 If given in tabular format the provided OTU-table must be a table with the | |
211 following requirements: | |
212 | |
213 - The rows are OTU IDs and the columns are samples. | |
214 - The OTU ID's are expected to be in a column called "OTU", "ASV", or "#OTU ID". | |
215 - The column names of the table are the sample IDs, exactly matching those in | |
216 the metadata | |
217 - The last 7 columns are optionally the corresponding taxonomy assigned to the | |
218 OTUs, named "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species". | |
219 | |
220 Generally avoid special characters and spaces in row- and column names. | |
221 | |
222 The OTU table can also contain the taxonomic information in additional columns: | |
223 Kingdom, Phylum, Class, Order, Family, Genus. | |
224 | |
225 Check `here <https://biom-format.org/>`_ for information on the BIOM formats. | |
226 | |
227 **The metadata** | |
228 | |
229 contains additional information about the samples, for example where each sample | |
230 was taken, date, pH, treatment etc, which is used to compare and group the | |
231 samples during analysis. The amount of information in the metadata is unlimited, | |
232 it can contain any number of columns (variables), however there are a few | |
233 requirements: | |
234 | |
235 - The sample IDs must be in the first column and the column must be named | |
236 ``SampleID``. These sample IDs must match exactly to those in the OTU-table. Any | |
237 unmatched samples between the otutable and metadata will be removed with a | |
238 warning. | |
239 - Generally avoid special characters and spaces in row- and column names. | |
240 | |
241 By default the data types of metadata columns are guessed with | |
242 ``readr::type_convert``. The guessed column types can be seen in the last (4th) | |
243 column of the ``metadata list`` output and also stdout of the tool. Guessing of | |
244 data types can be disabled using the parameter ``Guess metadata column types``. | |
245 If disabled matadata from separate tabular input is treated as character data, | |
246 and if loaded from biom files that data is used as is. Metadata types can be set | |
247 manually using the tool ``ampvis2: set metadata`` | |
248 | |
249 Dates should be given in the format ``YYYY-MM-DD`` (Y: year, M: month, D: day). | |
250 | |
251 In addition to the RDS data set a metadata (resp. taxonomy) list data set is returned | |
252 if metadata (resp. taxonomic information) is given to this tool. It contains | |
253 restructured metadata (taxonomic information) that is used in downstream ampvis2 | |
254 Galaxy tools in order to select metadata / metadata values (resp. taxonomic levels). | |
255 | |
256 **Taxonomy** | |
257 | |
258 is a tabular data set with 7 columns and one row per ASV/OTU: | |
259 | |
260 - the 1st column is identical to the 1st column of the OTU table parameter | |
261 - the remaining columns contain data for Kingdom, Phylum, Class, Order, Family, Genus | |
262 | |
263 Note that the taxonomic information can also be embedded in the OTU table. | |
264 | |
265 **Tree** | |
266 | |
267 a tree with branch lengths in Newick format. | |
268 | |
269 This is needed / usefull only if the data is used as input of: ``ampvis2: | |
270 ordination plot`` for ordination methods NNDS / MMDS with (un)weighted UniFrac | |
271 distances. Note that the loaded tree is also filtered by the ``ampvis2: subset | |
272 ...`` tools. | |
273 | |
274 **Fasta** | |
275 | |
276 a fasta file containing the sequences of the OTUs. Note that this information is | |
277 only used in ``ampvis2: export fasta``. If the OTU table is modified by | |
278 ``ampvis2: mergereplicates`` or the ``ampvis2: subset ...`` tools this might be | |
279 useful to obtain a filtered list of sequences. | |
280 | |
281 | |
282 Output | |
283 ====== | |
284 | |
285 **RDS** | |
286 | |
287 The main output of the tool is an RDS data set that contains the R representation of | |
288 the ampvis2 object containing the provided data (OTU table, metadata, taxonomy, | |
289 phylogenetic tree, and fasta). | |
290 | |
291 **List files** | |
292 | |
293 Summarize the metadata and taxonomy information: | |
294 | |
295 - the taxonomy list file lists all taxa in a 1 column tabular data set | |
296 - the metadata list file lists the Metadata variables (column 1), and the corresponding | |
297 available metadata values (column 2), if the variable is the SampleID (column 3), and | |
298 the data type of the corresponding metadata variable (column 4) | |
299 | |
300 These files are auxilliary files that are needed in downstream ``ampvis2`` Galaxy tools | |
301 to allow selecting metadata and taxonomy. They are not passed to the underlying R functions. | |
302 | |
303 Note that, if the no taxonomy (or metadata) is given then the underlying ``ampvis2`` R | |
304 function adds dummy taxonomy (resp. metadata). In this case the output of the list datasets | |
305 can be disabled with the ``Output list data sets`` parameter. | |
306 ]]></help> | |
307 <expand macro="citations"/> | |
308 </tool> |