comparison load.xml @ 3:932d7573a561 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ampvis2 commit 5b6fb9641a1320e13aba367c4e7bc52ae064acc6
author iuc
date Mon, 26 Feb 2024 07:53:42 +0000
parents 8d77d277996e
children 576dd33588bf
comparison
equal deleted inserted replaced
2:8d77d277996e 3:932d7573a561
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="header"/> 6 <expand macro="header"/>
7 <command detect_errors="exit_code"><![CDATA[ 7 <command detect_errors="exit_code"><![CDATA[
8 #if $otutable.ext.startswith("biom") 8 #if $otutable.is_of_type("biom1") or $otutable.is_of_type("biom2")
9 ln -s '$otutable' otutable.biom && 9 ln -s '$otutable' otutable.biom &&
10 #else 10 #else if not $otutable.is_of_type("phyloseq")
11 ln -s '$otutable' otutable.tsv && 11 ## asv/otu column can not be specified so set the needed name
12 ## if empty https://github.com/KasperSkytte/ampvis2/issues/166
13 ## also done in taxonomy.tsv
14 #if $asv_otu_col_empty
15 sed -e '1 s/^\t/ASV\t/' '$otutable' > otutable.tsv &&
16 #else
17 ln -s '$otutable' otutable.tsv &&
18 #end if
12 #end if 19 #end if
13 #if $taxonomy 20 #if $taxonomy
14 ln -s '$taxonomy' taxonomy.tsv && 21 #if $asv_otu_col_empty
22 sed -e '1 s/^\t/ASV\t/' '$taxonomy' > taxonomy.tsv &&
23 #else
24 ln -s '$taxonomy' taxonomy.tsv &&
25 #end if
15 #end if 26 #end if
16 Rscript '$rscript' 27 Rscript '$rscript'
17 ]]></command> 28 ]]></command>
18 <configfiles> 29 <configfiles>
19 <configfile name="rscript"><![CDATA[ 30 <configfile name="rscript"><![CDATA[
20 library(ampvis2, quietly = TRUE) 31 library(ampvis2, quietly = TRUE)
21 library(readr, quietly = TRUE) 32 library(readr, quietly = TRUE)
22 ## 'manually' load metadata treating all columns as character 33 ## 'manually' load metadata treating all columns as character
23 ## giving colClasses to amp_load seems not possible 34 ## giving colClasses to amp_load seems not possible
35 ## - check.names=F: leave empty column names empty .. fixed below
24 #if $metadata 36 #if $metadata
25 metadata <- read.table("$metadata", header = TRUE, sep = "\t", colClasses = "character") 37 metadata <- read.table("$metadata", header = TRUE, sep = "\t", colClasses = "character", check.names=F)
38 ## we do not require the metadata to have a 1st column named "SampleID",
39 ## but it should not be empty
40 if(colnames(metadata)[1] == ""){
41 colnames(metadata)[1] <- "SampleID"
42 }
43 if(exists("SampleID", where = metadata)){
44 rownames(metadata) <- metadata[["SampleID"]]
45 }else{
46 rownames(metadata) <- metadata[[1]]
47 }
48 #end if
49
50 #if $otutable.is_of_type("phyloseq")
51 otutable <- readRDS("$otutable")
52 print(class(otutable))
26 #end if 53 #end if
27 data <- amp_load( 54 data <- amp_load(
28 #if $otutable.ext.startswith("biom") 55 #if $otutable.is_of_type("phyloseq")
56 otutable = otutable,
57 #else if $otutable.is_of_type("biom1") or $otutable.is_of_type("biom2")
29 otutable = "otutable.biom", 58 otutable = "otutable.biom",
30 #else 59 #else
31 otutable = "otutable.tsv", 60 otutable = "otutable.tsv",
32 #end if 61 #end if
33 #if $metadata 62 #if $metadata
42 #if $tree 71 #if $tree
43 tree = "$tree", 72 tree = "$tree",
44 #end if 73 #end if
45 pruneSingletons = $pruneSingletons 74 pruneSingletons = $pruneSingletons
46 ) 75 )
76
77 #if $asv_sequences
78 library(ape, quietly = TRUE)
79
80 seq <- as.DNAbin(strsplit(rownames(data\$abund), ""))
81 names(seq) <- paste0("ASV", seq_along(seq))
82 data\$refseq <- seq
83 data <- matchOTUs(data, seq)
84 #end if
85
47 ## try to guess column types with plyr::type.convert 86 ## try to guess column types with plyr::type.convert
48 #if $guess_column_types 87 #if $guess_column_types
49 data\$metadata <- readr::type_convert(data\$metadata, guess_integer=TRUE) 88 data\$metadata <- readr::type_convert(data\$metadata, guess_integer=TRUE)
50 #end if 89 #end if
90
51 saveRDS(data, "$ampvis") 91 saveRDS(data, "$ampvis")
52 ## write metadata list for biom input or if metadata is given 92 ## write metadata list for biom input or if metadata is given
53 #if "metadata" in $write_lists 93 #if "metadata" in $write_lists
54 @SAVE_METADATA_LIST@ 94 @SAVE_METADATA_LIST@
55 #end if 95 #end if
60 ## print overview of the data to stdout 100 ## print overview of the data to stdout
61 data 101 data
62 ]]></configfile> 102 ]]></configfile>
63 </configfiles> 103 </configfiles>
64 <inputs> 104 <inputs>
65 <param argument="otutable" type="data" format="tabular,biom1,biom2" label="OTU table"/> 105 <param argument="otutable" type="data" format="phyloseq,dada2_sequencetable,tabular,biom1,biom2" label="OTU table"/>
66 <param argument="metadata" type="data" format="tsv" optional="true" label="Sample metadata"> 106 <param name="asv_otu_col_empty" type="boolean" checked="false" label="OTU/ASV column has empty header" help="By default ampvis2 expects a column named ASV or OTU containing the ASV or OTU identifiers. By checking this a column with an empty header will be used (as produced by dada2)."/>
107 <param name="asv_sequences" type="boolean" checked="false" label="ASV identifiers are the ASV sequences" help="By checking this the identifiers will be renamed to ASV1, ASV2, etc and the sequences will be stored in the ampvis2 object." />
108 <param argument="metadata" type="data" format="tabular,tsv" optional="true" label="Sample metadata">
67 <validator type="expression" message="Table must have at least 1 column"><![CDATA[value.metadata.columns > 0]]></validator> 109 <validator type="expression" message="Table must have at least 1 column"><![CDATA[value.metadata.columns > 0]]></validator>
68 <!-- TODO in future versions this might change https://github.com/MadsAlbertsen/ampvis2/pull/134
69 if so, then also adapt help text and test data -->
70 <validator type="expression" message="First column must be named SampleID"><![CDATA[value.metadata.column_names[0] == "SampleID"]]></validator>
71 </param> 110 </param>
72 <param name="guess_column_types" type="boolean" checked="true" label="Guess metadata column types" help="See help"/> 111 <param name="guess_column_types" type="boolean" checked="true" label="Guess metadata column types" help="See help"/>
73 <param argument="taxonomy" type="data" format="tabular" optional="true" label="Taxonomy table"/> 112 <param argument="taxonomy" type="data" format="tabular" optional="true" label="Taxonomy table"/>
74 <param argument="fasta" type="data" format="fasta" optional="true" label="Fasta file"/> 113 <param argument="fasta" type="data" format="fasta" optional="true" label="Fasta file"/>
75 <param argument="tree" type="data" format="newick" optional="true" label="Phylogenetic tree"/> 114 <param argument="tree" type="data" format="newick" optional="true" label="Phylogenetic tree"/>
96 <param name="taxonomy" value="AalborgWWTPs.tax"/> 135 <param name="taxonomy" value="AalborgWWTPs.tax"/>
97 <output name="ampvis" value="AalborgWWTPs.rds" ftype="ampvis2" compare="sim_size"/> 136 <output name="ampvis" value="AalborgWWTPs.rds" ftype="ampvis2" compare="sim_size"/>
98 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/> 137 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/>
99 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/> 138 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/>
100 <assert_stdout> 139 <assert_stdout>
140 <has_text text="ampvis2 object with 3 elements."/>
101 <has_text text="575.79"/> 141 <has_text text="575.79"/>
102 <has_text text="SampleID, Plant, Date, Year, Period"/> 142 <has_text text="SampleID, Plant, Date, Year, Period"/>
103 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/> 143 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/>
104 </assert_stdout> 144 </assert_stdout>
105 </test> 145 </test>
112 <param name="tree" value="AalborgWWTPs.nwk" ftype="newick"/> 152 <param name="tree" value="AalborgWWTPs.nwk" ftype="newick"/>
113 <output name="ampvis" value="AalborgWWTPs-complete.rds" ftype="ampvis2" compare="sim_size"/> 153 <output name="ampvis" value="AalborgWWTPs-complete.rds" ftype="ampvis2" compare="sim_size"/>
114 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/> 154 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/>
115 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/> 155 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/>
116 <assert_stdout> 156 <assert_stdout>
157 <has_text text="ampvis2 object with 5 elements."/>
117 <has_text text="575.79"/> 158 <has_text text="575.79"/>
118 <has_text text="SampleID, Plant, Date, Year, Period"/> 159 <has_text text="SampleID, Plant, Date, Year, Period"/>
119 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/> 160 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/>
120 </assert_stdout> 161 </assert_stdout>
121 </test> 162 </test>
122 <!-- test biom 1/2 input (taken from https://github.com/biocore/biom-format/tree/master/examples) 163 <!-- test biom 1/2 input (taken from https://github.com/biocore/biom-format/tree/master/examples)
123 metadata seems not to be loaded from a biom file https://github.com/MadsAlbertsen/ampvis2/issues/129 164 metadata seems not to be loaded from a biom file https://github.com/MadsAlbertsen/ampvis2/issues/129
124 taxonomy is loaded from all but 1 165 taxonomy is loaded from all but 1
125 --> 166 -->
126 <test> 167 <test expect_num_outputs="1">
127 <param name="otutable" value="rich-dense.biom" ftype="biom1"/> 168 <param name="otutable" value="rich-dense.biom" ftype="biom1"/>
169 <param name="write_lists" value=""/>
128 <output name="ampvis" ftype="ampvis2"> 170 <output name="ampvis" ftype="ampvis2">
129 <assert_contents> 171 <assert_contents>
130 <has_size value="748"/> 172 <has_size value="748"/>
131 </assert_contents> 173 </assert_contents>
132 </output> 174 </output>
133 <assert_stdout> 175 <assert_stdout>
176 <has_text text="ampvis2 object with 3 elements."/>
134 <has_text text="4.5"/> 177 <has_text text="4.5"/>
135 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/> 178 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/>
136 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> 179 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
137 </assert_stdout> 180 </assert_stdout>
138 </test> 181 </test>
139 <test> 182 <test expect_num_outputs="1">
140 <param name="otutable" value="rich-sparse.biom" ftype="biom1"/> 183 <param name="otutable" value="rich-sparse.biom" ftype="biom1"/>
184 <param name="write_lists" value=""/>
141 <output name="ampvis" ftype="ampvis2"> 185 <output name="ampvis" ftype="ampvis2">
142 <assert_contents> 186 <assert_contents>
143 <has_size value="751"/> 187 <has_size value="751"/>
144 </assert_contents> 188 </assert_contents>
145 </output> 189 </output>
146 <assert_stdout> 190 <assert_stdout>
191 <has_text text="ampvis2 object with 3 elements."/>
147 <has_text text="4.5"/> 192 <has_text text="4.5"/>
148 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/> 193 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/>
149 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> 194 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
150 </assert_stdout> 195 </assert_stdout>
151 </test> 196 </test>
152 <test> 197 <!-- input file seems to miss metadata check that no metadata & taxonomy is loaded (ampvis2 adds dummy metadata) -->
198 <test expect_num_outputs="1">
153 <param name="otutable" value="min_sparse_otu_table_hdf5.biom" ftype="biom2"/> 199 <param name="otutable" value="min_sparse_otu_table_hdf5.biom" ftype="biom2"/>
154 <output name="ampvis" ftype="ampvis2"> 200 <output name="ampvis" ftype="ampvis2">
155 <assert_contents> 201 <assert_contents>
156 <has_size value="395"/> 202 <has_size value="395"/>
157 </assert_contents> 203 </assert_contents>
158 </output> 204 </output>
159 <assert_stdout> 205 <param name="write_lists" value=""/>
206 <assert_stdout>
207 <has_text text="ampvis2 object with 3 elements."/>
160 <has_text text="4.5"/> 208 <has_text text="4.5"/>
161 <!-- input file seems to miss metadata check that no metadata & taxonomy is loaded (ampvis2 adds dummy metadata) -->
162 <has_text text="SampleID, DummyVariable"/> 209 <has_text text="SampleID, DummyVariable"/>
163 <has_text text="0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%)"/> 210 <has_text text="0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%)"/>
164 </assert_stdout> 211 </assert_stdout>
165 </test> 212 </test>
166 <test> 213 <test expect_num_outputs="1">
167 <param name="otutable" value="rich_sparse_otu_table_hdf5.biom" ftype="biom2"/> 214 <param name="otutable" value="rich_sparse_otu_table_hdf5.biom" ftype="biom2"/>
168 <output name="ampvis" ftype="ampvis2"> 215 <output name="ampvis" ftype="ampvis2">
169 <assert_contents> 216 <assert_contents>
170 <has_size value="753"/> 217 <has_size value="753"/>
171 </assert_contents> 218 </assert_contents>
172 </output> 219 </output>
173 <assert_stdout> 220 <param name="write_lists" value=""/>
221 <assert_stdout>
222 <has_text text="ampvis2 object with 3 elements."/>
174 <has_text text="4.5"/> 223 <has_text text="4.5"/>
175 <has_text text="SampleID, BODY_SITE, BarcodeSequence, Description, LinkerPrimerSequence"/> 224 <has_text text="SampleID, BODY_SITE, BarcodeSequence, Description, LinkerPrimerSequence"/>
176 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> 225 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
177 </assert_stdout> 226 </assert_stdout>
178 </test> 227 </test>
228 <!-- load dada2 ASV table + metadata + taxonomy -->
229 <test expect_num_outputs="3">
230 <param name="otutable" value="dada2-removeBimeraDenovo.tab" ftype="dada2_sequencetable"/>
231 <param name="metadata" value="dada2-metadata.tsv" ftype="tsv"/>
232 <param name="taxonomy" value="dada2-assignTaxonomy.tabular"/>
233 <param name="asv_otu_col_empty" value="true"/>
234 <param name="asv_sequences" value="true"/>
235 <output name="ampvis" ftype="ampvis2">
236 <assert_contents>
237 <has_size min="100"/>
238 </assert_contents>
239 </output>
240 <output name="metadata_list_out">
241 <assert_contents>
242 <has_n_lines n="23"/>
243 <has_n_columns n="4"/>
244 <has_text text="Sample"/>
245 </assert_contents>
246 </output>
247 <output name="taxonomy_list_out">
248 <assert_contents>
249 <has_n_lines n="370"/>
250 <has_n_columns n="2"/>
251 <has_line line="Bacteria&#009;Kingdom"/>
252 </assert_contents>
253 </output>
254 <assert_stdout>
255 <has_text text="ampvis2 object with 4 elements."/> <!-- this also has fasta, i.e. 4 -->
256 <has_text text="6212.45"/>
257 <has_text text="Sample, time"/>
258 <has_text text="232(100%) 232(100%) 232(100%) 231(99.57%) 209(90.09%) 127(54.74%)"/>
259 </assert_stdout>
260 </test>
261 <!-- load data from phyloseq -->
262 <test expect_num_outputs="3">
263 <param name="otutable" value="output.phyloseq" ftype="phyloseq"/>
264 <output name="ampvis" ftype="ampvis2">
265 <assert_contents>
266 <has_size min="100"/>
267 </assert_contents>
268 </output>
269 <output name="metadata_list_out">
270 <assert_contents>
271 <has_n_lines n="6"/>
272 <has_n_columns n="4"/>
273 <has_text text="SampleID"/>
274 </assert_contents>
275 </output>
276 <output name="taxonomy_list_out">
277 <assert_contents>
278 <has_n_lines n="147"/>
279 <has_n_columns n="2"/>
280 <has_line line="Bacteria&#009;Kingdom"/>
281 </assert_contents>
282 </output>
283 <assert_stdout>
284 <has_text text="ampvis2 object with 4 elements."/> <!-- this also has fasta, i.e. 4 -->
285 <has_text text="SampleID, Property, Number"/>
286 <has_text text="64(100%) 64(100%) 64(100%) 64(100%) 62(96.88%) 56(87.5%) 0(0%)"/>
287 </assert_stdout>
288 </test>
179 </tests> 289 </tests>
180 <help><![CDATA[ 290 <help><![CDATA[
181 291
182 What it does 292 What it does
183 ============ 293 ============
184 294
185 This tool reads an OTU-table and corresponding sample metadata, and returns 295 This tool reads an OTU or ASV table and corresponding sample metadata, and returns
186 a RDS data set for use in all ampvis2 tools. It is therefore required to load 296 a RDS data set for use in all ampvis2 tools. It is therefore required to load
187 data with this tool before any other ampvis2 tools can be used. 297 data with this tool before any other ampvis2 tools can be used.
188 298
189 The Galaxy tool calls the `amp_load <https://kasperskytte.github.io/ampvis2/reference/amp_load.html>`_ 299 The Galaxy tool calls the `amp_load <https://kasperskytte.github.io/ampvis2/reference/amp_load.html>`_
190 function of the ampvis2 package. This function validates and corrects the 300 function of the ampvis2 package. This function validates and corrects the
195 Input 305 Input
196 ===== 306 =====
197 307
198 **The OTU-table** 308 **The OTU-table**
199 309
200 contains information about the OTUs, their read counts in each sample, and 310 contains information about the OTU/ASVs, their read counts in each sample, and
201 optionally their assigned taxonomy. The OTU table can be given as 311 optionally their assigned taxonomy. The OTU table can be given as
202 312
203 - Tabular data set 313 - Tabular data set
204 - BIOM version (1 and 2) 314 - BIOM version (1 and 2)
205 315
209 319
210 If given in tabular format the provided OTU-table must be a table with the 320 If given in tabular format the provided OTU-table must be a table with the
211 following requirements: 321 following requirements:
212 322
213 - The rows are OTU IDs and the columns are samples. 323 - The rows are OTU IDs and the columns are samples.
214 - The OTU ID's are expected to be in a column called "OTU", "ASV", or "#OTU ID". 324 - The OTU IDs are by default expected to be in a column called "OTU", "ASV", or "#OTU ID".
325 For data using an empty header for the OTU/ASV colum enable the option *OTU/ASV column has empty header*
326 (this allows to process data as produced e.g. by dada2).
215 - The column names of the table are the sample IDs, exactly matching those in 327 - The column names of the table are the sample IDs, exactly matching those in
216 the metadata 328 the metadata
217 - The last 7 columns are optionally the corresponding taxonomy assigned to the 329 - The last 7 columns are optionally the corresponding taxonomy assigned to the
218 OTUs, named "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species". 330 OTUs, named "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species".
331
332 If the ASV IDs are actually the ASV Sequences then enabling
333 *ASV identifiers are the ASV sequences* will rename the identifiers to ASV1, ASV2,...
334 (and save the sequences in the ampvis2 object).
219 335
220 Generally avoid special characters and spaces in row- and column names. 336 Generally avoid special characters and spaces in row- and column names.
221 337
222 The OTU table can also contain the taxonomic information in additional columns: 338 The OTU table can also contain the taxonomic information in additional columns:
223 Kingdom, Phylum, Class, Order, Family, Genus. 339 Kingdom, Phylum, Class, Order, Family, Genus.
230 was taken, date, pH, treatment etc, which is used to compare and group the 346 was taken, date, pH, treatment etc, which is used to compare and group the
231 samples during analysis. The amount of information in the metadata is unlimited, 347 samples during analysis. The amount of information in the metadata is unlimited,
232 it can contain any number of columns (variables), however there are a few 348 it can contain any number of columns (variables), however there are a few
233 requirements: 349 requirements:
234 350
235 - The sample IDs must be in the first column and the column must be named 351 - The sample IDs must be in the first column. The sample IDs must match exactly
236 ``SampleID``. These sample IDs must match exactly to those in the OTU-table. Any 352 to those in the OTU-table. Any unmatched samples between the otutable and
237 unmatched samples between the otutable and metadata will be removed with a 353 metadata will be removed with a warning.
238 warning.
239 - Generally avoid special characters and spaces in row- and column names. 354 - Generally avoid special characters and spaces in row- and column names.
240 355
241 By default the data types of metadata columns are guessed with 356 By default the data types of metadata columns are guessed with
242 ``readr::type_convert``. The guessed column types can be seen in the last (4th) 357 ``readr::type_convert``. The guessed column types can be seen in the last (4th)
243 column of the ``metadata list`` output and also stdout of the tool. Guessing of 358 column of the ``metadata list`` output and also stdout of the tool. Guessing of