comparison load.xml @ 0:474bbc45ddd9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ampvis2 commit 7c0ecbffdb5e993f5af7e3b52c424c2761fb91d3"
author iuc
date Mon, 04 Apr 2022 10:24:51 +0000
parents
children 8d77d277996e
comparison
equal deleted inserted replaced
-1:000000000000 0:474bbc45ddd9
1 <tool id="ampvis2_load" name="ampvis2 load" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="header"/>
7 <command detect_errors="exit_code"><![CDATA[
8 #if $otutable.ext.startswith("biom")
9 ln -s '$otutable' otutable.biom &&
10 #else
11 ln -s '$otutable' otutable.tsv &&
12 #end if
13 #if $taxonomy
14 ln -s '$taxonomy' taxonomy.tsv &&
15 #end if
16 Rscript '$rscript'
17 ]]></command>
18 <configfiles>
19 <configfile name="rscript"><![CDATA[
20 library(ampvis2, quietly = TRUE)
21 library(readr, quietly = TRUE)
22 ## 'manually' load metadata treating all columns as character
23 ## giving colClasses to amp_load seems not possible
24 #if $metadata
25 metadata <- read.table("$metadata", header = TRUE, sep = "\t", colClasses = "character")
26 #end if
27 data <- amp_load(
28 #if $otutable.ext.startswith("biom")
29 otutable = "otutable.biom",
30 #else
31 otutable = "otutable.tsv",
32 #end if
33 #if $metadata
34 metadata = metadata,
35 #end if
36 #if $taxonomy
37 taxonomy = "taxonomy.tsv",
38 #end if
39 #if $fasta
40 fasta = "$fasta",
41 #end if
42 #if $tree
43 tree = "$tree",
44 #end if
45 pruneSingletons = $pruneSingletons
46 )
47 ## try to guess column types with plyr::type.convert
48 #if $guess_column_types
49 data\$metadata <- readr::type_convert(data\$metadata, guess_integer=TRUE)
50 #end if
51 saveRDS(data, "$ampvis")
52 ## write metadata list for biom input or if metadata is given
53 #if "metadata" in $write_lists
54 @SAVE_METADATA_LIST@
55 #end if
56
57 #if "tax" in $write_lists
58 @SAVE_TAX_LIST@
59 #end if
60 ## print overview of the data to stdout
61 data
62 ]]></configfile>
63 </configfiles>
64 <inputs>
65 <param argument="otutable" type="data" format="tabular,biom1,biom2" label="OTU table"/>
66 <param argument="metadata" type="data" format="tsv" optional="true" label="Sample metadata">
67 <validator type="expression" message="Table must have at least 1 column"><![CDATA[value.metadata.columns > 0]]></validator>
68 <!-- TODO in future versions this might change https://github.com/MadsAlbertsen/ampvis2/pull/134
69 if so, then also adapt help text and test data -->
70 <validator type="expression" message="First column must be named SampleID"><![CDATA[value.metadata.column_names[0] == "SampleID"]]></validator>
71 </param>
72 <param name="guess_column_types" type="boolean" checked="true" label="Guess metadata column types" help="See help"/>
73 <param argument="taxonomy" type="data" format="tabular" optional="true" label="Taxonomy table"/>
74 <param argument="fasta" type="data" format="fasta" optional="true" label="Fasta file"/>
75 <param argument="tree" type="data" format="newick" optional="true" label="Phylogenetic tree"/>
76 <param argument="pruneSingletons" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove singleton OTUs"/>
77 <param name="write_lists" type="select" optional="true" multiple="true" label="Output list data sets" help="Needed by most downstream tools. Select if the inputs contain taxonomic / metadata information.">
78 <option value="tax" selected="true">Taxonomy list</option>
79 <option value="metadata" selected="true">Metadata list</option>
80 </param>
81 </inputs>
82 <outputs>
83 <data name="ampvis" format="ampvis2"/>
84 <data name="metadata_list_out" format="tabular" label="${tool.name} on ${on_string}: metadata list">
85 <filter>write_lists and "metadata" in write_lists</filter>
86 </data>
87 <data name="taxonomy_list_out" format="tabular" label="${tool.name} on ${on_string}: taxonomy list">
88 <filter>write_lists and "tax" in write_lists</filter>
89 </data>
90 </outputs>
91 <tests>
92 <!-- load otu table + metadata + taxonomy -->
93 <test expect_num_outputs="3">
94 <param name="otutable" value="AalborgWWTPs.otu.csv"/>
95 <param name="metadata" value="AalborgWWTPs.tsv" ftype="tsv"/>
96 <param name="taxonomy" value="AalborgWWTPs.tax"/>
97 <output name="ampvis" value="AalborgWWTPs.rds" ftype="ampvis2" compare="sim_size"/>
98 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/>
99 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/>
100 <assert_stdout>
101 <has_text text="575.79"/>
102 <has_text text="SampleID, Plant, Date, Year, Period"/>
103 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/>
104 </assert_stdout>
105 </test>
106 <!-- load otu table + metadata + taxonomy + tree + fasta -->
107 <test expect_num_outputs="3">
108 <param name="otutable" value="AalborgWWTPs.otu.csv"/>
109 <param name="metadata" value="AalborgWWTPs.tsv" ftype="tsv"/>
110 <param name="taxonomy" value="AalborgWWTPs.tax"/>
111 <param name="fasta" value="AalborgWWTPs.fa" ftype="fasta"/>
112 <param name="tree" value="AalborgWWTPs.nwk" ftype="newick"/>
113 <output name="ampvis" value="AalborgWWTPs-complete.rds" ftype="ampvis2" compare="sim_size"/>
114 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/>
115 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/>
116 <assert_stdout>
117 <has_text text="575.79"/>
118 <has_text text="SampleID, Plant, Date, Year, Period"/>
119 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/>
120 </assert_stdout>
121 </test>
122 <!-- test biom 1/2 input (taken from https://github.com/biocore/biom-format/tree/master/examples)
123 metadata seems not to be loaded from a biom file https://github.com/MadsAlbertsen/ampvis2/issues/129
124 taxonomy is loaded from all but 1
125 -->
126 <test>
127 <param name="otutable" value="rich-dense.biom" ftype="biom1"/>
128 <output name="ampvis" ftype="ampvis2">
129 <assert_contents>
130 <has_size value="748"/>
131 </assert_contents>
132 </output>
133 <assert_stdout>
134 <has_text text="4.5"/>
135 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/>
136 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
137 </assert_stdout>
138 </test>
139 <test>
140 <param name="otutable" value="rich-sparse.biom" ftype="biom1"/>
141 <output name="ampvis" ftype="ampvis2">
142 <assert_contents>
143 <has_size value="751"/>
144 </assert_contents>
145 </output>
146 <assert_stdout>
147 <has_text text="4.5"/>
148 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/>
149 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
150 </assert_stdout>
151 </test>
152 <test>
153 <param name="otutable" value="min_sparse_otu_table_hdf5.biom" ftype="biom2"/>
154 <output name="ampvis" ftype="ampvis2">
155 <assert_contents>
156 <has_size value="395"/>
157 </assert_contents>
158 </output>
159 <assert_stdout>
160 <has_text text="4.5"/>
161 <!-- input file seems to miss metadata check that no metadata & taxonomy is loaded (ampvis2 adds dummy metadata) -->
162 <has_text text="SampleID, DummyVariable"/>
163 <has_text text="0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%)"/>
164 </assert_stdout>
165 </test>
166 <test>
167 <param name="otutable" value="rich_sparse_otu_table_hdf5.biom" ftype="biom2"/>
168 <output name="ampvis" ftype="ampvis2">
169 <assert_contents>
170 <has_size value="753"/>
171 </assert_contents>
172 </output>
173 <assert_stdout>
174 <has_text text="4.5"/>
175 <has_text text="SampleID, BODY_SITE, BarcodeSequence, Description, LinkerPrimerSequence"/>
176 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
177 </assert_stdout>
178 </test>
179 </tests>
180 <help><![CDATA[
181
182 What it does
183 ============
184
185 This tool reads an OTU-table and corresponding sample metadata, and returns
186 a RDS data set for use in all ampvis2 tools. It is therefore required to load
187 data with this tool before any other ampvis2 tools can be used.
188
189 The Galaxy tool calls the `amp_load <https://madsalbertsen.github.io/ampvis2/reference/amp_load.html>`_
190 function of the ampvis2 package. This function validates and corrects the
191 provided data frames in different ways to make it suitable for the rest of the
192 ampvis2 tools. It is important that the provided data sets match the
193 requirements as described in the following to work properly.
194
195 Input
196 =====
197
198 **The OTU-table**
199
200 contains information about the OTUs, their read counts in each sample, and
201 optionally their assigned taxonomy. The OTU table can be given as
202
203 - Tabular data set
204 - BIOM version (1 and 2)
205
206 Metadata and taxonomy in the tabular or BIOM files that are given via the
207 ``OTU table`` parameter can is overwritten if by data presented via the
208 ``Sample metadata`` or ``Taxonomy table`` parameters.
209
210 If given in tabular format the provided OTU-table must be a table with the
211 following requirements:
212
213 - The rows are OTU IDs and the columns are samples.
214 - The OTU ID's are expected to be in a column called "OTU", "ASV", or "#OTU ID".
215 - The column names of the table are the sample IDs, exactly matching those in
216 the metadata
217 - The last 7 columns are optionally the corresponding taxonomy assigned to the
218 OTUs, named "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species".
219
220 Generally avoid special characters and spaces in row- and column names.
221
222 The OTU table can also contain the taxonomic information in additional columns:
223 Kingdom, Phylum, Class, Order, Family, Genus.
224
225 Check `here <https://biom-format.org/>`_ for information on the BIOM formats.
226
227 **The metadata**
228
229 contains additional information about the samples, for example where each sample
230 was taken, date, pH, treatment etc, which is used to compare and group the
231 samples during analysis. The amount of information in the metadata is unlimited,
232 it can contain any number of columns (variables), however there are a few
233 requirements:
234
235 - The sample IDs must be in the first column and the column must be named
236 ``SampleID``. These sample IDs must match exactly to those in the OTU-table. Any
237 unmatched samples between the otutable and metadata will be removed with a
238 warning.
239 - Generally avoid special characters and spaces in row- and column names.
240
241 By default the data types of metadata columns are guessed with
242 ``readr::type_convert``. The guessed column types can be seen in the last (4th)
243 column of the ``metadata list`` output and also stdout of the tool. Guessing of
244 data types can be disabled using the parameter ``Guess metadata column types``.
245 If disabled matadata from separate tabular input is treated as character data,
246 and if loaded from biom files that data is used as is. Metadata types can be set
247 manually using the tool ``ampvis2: set metadata``
248
249 Dates should be given in the format ``YYYY-MM-DD`` (Y: year, M: month, D: day).
250
251 In addition to the RDS data set a metadata (resp. taxonomy) list data set is returned
252 if metadata (resp. taxonomic information) is given to this tool. It contains
253 restructured metadata (taxonomic information) that is used in downstream ampvis2
254 Galaxy tools in order to select metadata / metadata values (resp. taxonomic levels).
255
256 **Taxonomy**
257
258 is a tabular data set with 7 columns and one row per ASV/OTU:
259
260 - the 1st column is identical to the 1st column of the OTU table parameter
261 - the remaining columns contain data for Kingdom, Phylum, Class, Order, Family, Genus
262
263 Note that the taxonomic information can also be embedded in the OTU table.
264
265 **Tree**
266
267 a tree with branch lengths in Newick format.
268
269 This is needed / usefull only if the data is used as input of: ``ampvis2:
270 ordination plot`` for ordination methods NNDS / MMDS with (un)weighted UniFrac
271 distances. Note that the loaded tree is also filtered by the ``ampvis2: subset
272 ...`` tools.
273
274 **Fasta**
275
276 a fasta file containing the sequences of the OTUs. Note that this information is
277 only used in ``ampvis2: export fasta``. If the OTU table is modified by
278 ``ampvis2: mergereplicates`` or the ``ampvis2: subset ...`` tools this might be
279 useful to obtain a filtered list of sequences.
280
281
282 Output
283 ======
284
285 **RDS**
286
287 The main output of the tool is an RDS data set that contains the R representation of
288 the ampvis2 object containing the provided data (OTU table, metadata, taxonomy,
289 phylogenetic tree, and fasta).
290
291 **List files**
292
293 Summarize the metadata and taxonomy information:
294
295 - the taxonomy list file lists all taxa in a 1 column tabular data set
296 - the metadata list file lists the Metadata variables (column 1), and the corresponding
297 available metadata values (column 2), if the variable is the SampleID (column 3), and
298 the data type of the corresponding metadata variable (column 4)
299
300 These files are auxilliary files that are needed in downstream ``ampvis2`` Galaxy tools
301 to allow selecting metadata and taxonomy. They are not passed to the underlying R functions.
302
303 Note that, if the no taxonomy (or metadata) is given then the underlying ``ampvis2`` R
304 function adds dummy taxonomy (resp. metadata). In this case the output of the list datasets
305 can be disabled with the ``Output list data sets`` parameter.
306 ]]></help>
307 <expand macro="citations"/>
308 </tool>