Mercurial > repos > iuc > ampvis2_load

diff load.xml @ 3:932d7573a561 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ampvis2 commit 5b6fb9641a1320e13aba367c4e7bc52ae064acc6
author: iuc
date: Mon, 26 Feb 2024 07:53:42 +0000
parents: 8d77d277996e
children: 576dd33588bf
--- a/load.xml	Mon Dec 18 09:40:32 2023 +0000
+++ b/load.xml	Mon Feb 26 07:53:42 2024 +0000
@@ -5,13 +5,24 @@
     </macros>
     <expand macro="header"/>
     <command detect_errors="exit_code"><![CDATA[
-        #if $otutable.ext.startswith("biom")
+        #if $otutable.is_of_type("biom1") or $otutable.is_of_type("biom2")
             ln -s '$otutable' otutable.biom &&
-        #else
-            ln -s '$otutable' otutable.tsv &&
+        #else if not $otutable.is_of_type("phyloseq")
+            ## asv/otu column can not be specified so set the needed name
+            ## if empty https://github.com/KasperSkytte/ampvis2/issues/166 
+            ## also done in taxonomy.tsv
+            #if $asv_otu_col_empty
+                sed -e '1 s/^\t/ASV\t/' '$otutable' > otutable.tsv &&
+            #else
+                ln -s '$otutable' otutable.tsv &&
+            #end if
         #end if
         #if $taxonomy
-            ln -s '$taxonomy' taxonomy.tsv &&
+            #if $asv_otu_col_empty
+                sed -e '1 s/^\t/ASV\t/' '$taxonomy' > taxonomy.tsv &&
+            #else
+                ln -s '$taxonomy' taxonomy.tsv &&
+            #end if
         #end if
         Rscript '$rscript'
     ]]></command>
@@ -21,11 +32,29 @@
             library(readr, quietly = TRUE)
             ## 'manually' load metadata treating all columns as character
             ## giving colClasses to amp_load seems not possible
+            ## - check.names=F: leave empty column names empty .. fixed below
             #if $metadata
-                metadata <- read.table("$metadata", header = TRUE, sep = "\t", colClasses = "character")
+                metadata <- read.table("$metadata", header = TRUE, sep = "\t", colClasses = "character", check.names=F)
+                ## we do not require the metadata to have a 1st column named "SampleID",
+                ## but it should not be empty
+                if(colnames(metadata)[1] == ""){
+                    colnames(metadata)[1] <- "SampleID"
+                }
+                if(exists("SampleID", where = metadata)){
+                    rownames(metadata) <- metadata[["SampleID"]]
+                }else{
+                    rownames(metadata) <- metadata[[1]]
+                }
+            #end if
+
+            #if $otutable.is_of_type("phyloseq")
+                otutable <- readRDS("$otutable")
+                print(class(otutable))
             #end if
             data <- amp_load(
-                #if $otutable.ext.startswith("biom")
+                #if $otutable.is_of_type("phyloseq")
+                    otutable = otutable,
+                #else if $otutable.is_of_type("biom1") or $otutable.is_of_type("biom2")
                     otutable = "otutable.biom",
                 #else
                     otutable = "otutable.tsv",
@@ -44,10 +73,21 @@
                 #end if
                 pruneSingletons = $pruneSingletons
             )
+
+            #if $asv_sequences
+                library(ape, quietly = TRUE)
+
+                seq <- as.DNAbin(strsplit(rownames(data\$abund), ""))
+                names(seq) <- paste0("ASV", seq_along(seq))
+                data\$refseq <- seq
+                data <- matchOTUs(data, seq)
+            #end if
+
             ## try to guess column types with plyr::type.convert
             #if $guess_column_types
                 data\$metadata <- readr::type_convert(data\$metadata, guess_integer=TRUE)
             #end if
+
             saveRDS(data, "$ampvis")
             ## write metadata list for biom input or if metadata is given 
             #if "metadata" in $write_lists
@@ -62,12 +102,11 @@
         ]]></configfile>
     </configfiles>
     <inputs>
-        <param argument="otutable" type="data" format="tabular,biom1,biom2" label="OTU table"/>
-        <param argument="metadata" type="data" format="tsv" optional="true" label="Sample metadata">
+        <param argument="otutable" type="data" format="phyloseq,dada2_sequencetable,tabular,biom1,biom2" label="OTU table"/>
+        <param name="asv_otu_col_empty" type="boolean" checked="false" label="OTU/ASV column has empty header" help="By default ampvis2 expects a column named ASV or OTU containing the ASV or OTU identifiers. By checking this a column with an empty header will be used (as produced by dada2)."/>
+        <param name="asv_sequences" type="boolean" checked="false" label="ASV identifiers are the ASV sequences" help="By checking this the identifiers will be renamed to ASV1, ASV2, etc and the sequences will be stored in the ampvis2 object." />
+        <param argument="metadata" type="data" format="tabular,tsv" optional="true" label="Sample metadata">
             <validator type="expression" message="Table must have at least 1 column"><![CDATA[value.metadata.columns > 0]]></validator>
-            <!-- TODO in future versions this might change https://github.com/MadsAlbertsen/ampvis2/pull/134
-                 if so, then also adapt help text and test data -->
-            <validator type="expression" message="First column must be named SampleID"><![CDATA[value.metadata.column_names[0] == "SampleID"]]></validator>
         </param>
         <param name="guess_column_types" type="boolean" checked="true" label="Guess metadata column types" help="See help"/>
         <param argument="taxonomy" type="data" format="tabular" optional="true" label="Taxonomy table"/>
@@ -98,6 +137,7 @@
             <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/>
             <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/>
             <assert_stdout>
+                <has_text text="ampvis2 object with 3 elements."/>
                 <has_text text="575.79"/>
                 <has_text text="SampleID, Plant, Date, Year, Period"/>
                 <has_text text="200(100%)   194(97%) 177(88.5%)   170(85%)   152(76%) 113(56.5%)      2(1%)"/>
@@ -114,6 +154,7 @@
             <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/>
             <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/>
             <assert_stdout>
+                <has_text text="ampvis2 object with 5 elements."/>
                 <has_text text="575.79"/>
                 <has_text text="SampleID, Plant, Date, Year, Period"/>
                 <has_text text="200(100%)   194(97%) 177(88.5%)   170(85%)   152(76%) 113(56.5%)      2(1%)"/>
@@ -123,66 +164,135 @@
              metadata seems not to be loaded from a biom file https://github.com/MadsAlbertsen/ampvis2/issues/129
              taxonomy is loaded from all but 1 
             -->
-        <test>
+        <test expect_num_outputs="1">
             <param name="otutable" value="rich-dense.biom" ftype="biom1"/>
+            <param name="write_lists" value=""/>
             <output name="ampvis" ftype="ampvis2">
                 <assert_contents>
                     <has_size value="748"/>
                 </assert_contents>
             </output>
             <assert_stdout>
+                <has_text text="ampvis2 object with 3 elements."/>
                 <has_text text="4.5"/>
                 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/>
                 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%)  1(20%)"/>
             </assert_stdout>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="otutable" value="rich-sparse.biom" ftype="biom1"/>
+            <param name="write_lists" value=""/>
             <output name="ampvis" ftype="ampvis2">
                 <assert_contents>
                     <has_size value="751"/>
                 </assert_contents>
             </output>
             <assert_stdout>
+                <has_text text="ampvis2 object with 3 elements."/>
                 <has_text text="4.5"/>
                 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/>
                 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%)  1(20%)"/>
             </assert_stdout>
         </test>
-        <test>
+        <!-- input file seems to miss metadata check that no metadata & taxonomy is loaded (ampvis2 adds dummy metadata) -->
+        <test expect_num_outputs="1">
             <param name="otutable" value="min_sparse_otu_table_hdf5.biom" ftype="biom2"/>
             <output name="ampvis" ftype="ampvis2">
                 <assert_contents>
                     <has_size value="395"/>
                 </assert_contents>
             </output>
+            <param name="write_lists" value=""/>
             <assert_stdout>
+                <has_text text="ampvis2 object with 3 elements."/>
                 <has_text text="4.5"/>
-                <!-- input file seems to miss metadata check that no metadata & taxonomy is loaded (ampvis2 adds dummy metadata) -->
                 <has_text text="SampleID, DummyVariable"/>
                 <has_text text="0(0%)   0(0%)   0(0%)   0(0%)   0(0%)   0(0%)   0(0%)"/>
             </assert_stdout>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="otutable" value="rich_sparse_otu_table_hdf5.biom" ftype="biom2"/>
             <output name="ampvis" ftype="ampvis2">
                 <assert_contents>
                     <has_size value="753"/>
                 </assert_contents>
             </output>
+            <param name="write_lists" value=""/>
             <assert_stdout>
+                <has_text text="ampvis2 object with 3 elements."/>
                 <has_text text="4.5"/>
                 <has_text text="SampleID, BODY_SITE, BarcodeSequence, Description, LinkerPrimerSequence"/>
                 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%)  1(20%)"/>
             </assert_stdout>
         </test>
+        <!-- load dada2 ASV table + metadata + taxonomy -->
+        <test expect_num_outputs="3">
+            <param name="otutable" value="dada2-removeBimeraDenovo.tab" ftype="dada2_sequencetable"/>
+            <param name="metadata" value="dada2-metadata.tsv" ftype="tsv"/>
+            <param name="taxonomy" value="dada2-assignTaxonomy.tabular"/>
+            <param name="asv_otu_col_empty" value="true"/>
+            <param name="asv_sequences" value="true"/>
+            <output name="ampvis" ftype="ampvis2">
+                <assert_contents>
+                    <has_size min="100"/>
+                </assert_contents>
+            </output>
+            <output name="metadata_list_out">
+                <assert_contents>
+                    <has_n_lines n="23"/>
+                    <has_n_columns n="4"/>
+                    <has_text text="Sample"/>
+                </assert_contents>
+            </output>
+            <output name="taxonomy_list_out">
+                <assert_contents>
+                    <has_n_lines n="370"/>
+                    <has_n_columns n="2"/>
+                    <has_line line="Bacteria&#009;Kingdom"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="ampvis2 object with 4 elements."/> <!-- this also has fasta, i.e. 4 -->
+                <has_text text="6212.45"/>
+                <has_text text="Sample, time"/>
+                <has_text text="232(100%)   232(100%)   232(100%) 231(99.57%) 209(90.09%) 127(54.74%)"/>
+            </assert_stdout>
+        </test>
+        <!-- load data from phyloseq -->
+        <test expect_num_outputs="3">
+            <param name="otutable" value="output.phyloseq" ftype="phyloseq"/>
+            <output name="ampvis" ftype="ampvis2">
+                <assert_contents>
+                    <has_size min="100"/>
+                </assert_contents>
+            </output>
+            <output name="metadata_list_out">
+                <assert_contents>
+                    <has_n_lines n="6"/>
+                    <has_n_columns n="4"/>
+                    <has_text text="SampleID"/>
+                </assert_contents>
+            </output>
+            <output name="taxonomy_list_out">
+                <assert_contents>
+                    <has_n_lines n="147"/>
+                    <has_n_columns n="2"/>
+                    <has_line line="Bacteria&#009;Kingdom"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="ampvis2 object with 4 elements."/> <!-- this also has fasta, i.e. 4 -->
+                <has_text text="SampleID, Property, Number"/>
+                <has_text text="64(100%)   64(100%)   64(100%)   64(100%) 62(96.88%)  56(87.5%)      0(0%)"/>
+            </assert_stdout>
+        </test>
     </tests>
     <help><![CDATA[
 
 What it does
 ============
 
-This tool reads an OTU-table and corresponding sample metadata, and returns
+This tool reads an OTU or ASV table and corresponding sample metadata, and returns
 a RDS data set for use in all ampvis2 tools. It is therefore required to load
 data with this tool before any other ampvis2 tools can be used.
 
@@ -197,7 +307,7 @@
 
 **The OTU-table**
 
-contains information about the OTUs, their read counts in each sample, and
+contains information about the OTU/ASVs, their read counts in each sample, and
 optionally their assigned taxonomy. The OTU table can be given as
 
 - Tabular data set
@@ -211,12 +321,18 @@
 following requirements:
 
 - The rows are OTU IDs and the columns are samples.
-- The OTU ID's are expected to be in a column called "OTU", "ASV", or "#OTU ID".
+- The OTU IDs are by default expected to be in a column called "OTU", "ASV", or "#OTU ID".
+  For data using an empty header for the OTU/ASV colum enable the option *OTU/ASV column has empty header*
+  (this allows to process data as produced e.g. by dada2).
 - The column names of the table are the sample IDs, exactly matching those in
   the metadata
 - The last 7 columns are optionally the corresponding taxonomy assigned to the
   OTUs, named "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species".
 
+If the ASV IDs are actually the ASV Sequences then enabling
+*ASV identifiers are the ASV sequences* will rename the identifiers to ASV1, ASV2,...
+(and save the sequences in the ampvis2 object).
+
 Generally avoid special characters and spaces in row- and column names.
 
 The OTU table can also contain the taxonomic information in additional columns:
@@ -232,10 +348,9 @@
 it can contain any number of columns (variables), however there are a few
 requirements:
     
-- The sample IDs must be in the first column and the column must be named
-  ``SampleID``. These sample IDs must match exactly to those in the OTU-table. Any
-  unmatched samples between the otutable and metadata will be removed with a
-  warning.
+- The sample IDs must be in the first column. The sample IDs must match exactly
+  to those in the OTU-table. Any unmatched samples between the otutable and
+  metadata will be removed with a warning.
 - Generally avoid special characters and spaces in row- and column names.
 
 By default the data types of metadata columns are guessed with
author	iuc
date	Mon, 26 Feb 2024 07:53:42 +0000
parents	8d77d277996e
children	576dd33588bf