Mercurial > repos > rnateam > sortmerna

diff sortmerna.xml @ 10:10b84b577117 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit 7dca92baf6a91f10bced28b883f14d3f4d7630c5
author: rnateam
date: Sat, 21 Oct 2023 09:07:21 +0000
parents: eb35257d2e29
--- a/sortmerna.xml	Wed Mar 20 03:03:08 2019 -0400
+++ b/sortmerna.xml	Sat Oct 21 09:07:21 2023 +0000
@@ -1,43 +1,9 @@
-<tool id="bg_sortmerna" name="Filter with SortMeRNA" version="@VERSION@.6">
+<tool id="bg_sortmerna" name="Filter with SortMeRNA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
     <description>of ribosomal RNAs in metatranscriptomic data</description>
     <macros>
-        <token name="@VERSION@">2.1b</token>
-        <xml name="db_prep">
-            <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/>
-            <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/>
-        </xml>
-        <xml name="output_alignments">
-            <param name="print_all_reads" type="boolean" checked="false" truevalue="--print_all_reads" falsevalue="" label="Output null alignment strings for non-aligned reads"/>
-            <conditional name="blast">
-                <param name="blast_output" type="select" label="Output BLAST report?">
-                    <option value="True">Yes</option>
-                    <option value="False" selected="True">No</option>
-                </param>
-                <when value="True">
-                    <param name="blast_format" type="select" label="BLAST-like format?">
-                        <option value="0">pairwise (--blast '0')</option>
-                        <option value="1">tabular BLAST -m 8 format (--blast '1')</option>
-                        <option value="1 cigar">tabular + column for CIGAR (--blast '1 cigar')</option>
-                        <option value="1 cigar qcov">tabular + columns for CIGAR and query coverage (--blast '1 cigar qcov')</option>
-                        <option value="1 cigar qcov qstrand">tabular + columns for CIGAR, query coverage and strand (--blast '1 cigar qcov qstrand')</option>
-                    </param>
-                </when>
-                <when value="False"/>
-            </conditional>
-        </xml>
-        <token name="@ALIGNMENTS@">
-            $report.print_all_reads
-            --sam
-            --SQ
-            #if $report.blast.blast_output == 'True'
-                --blast '$report.blast.blast_format'
-            #end if
-        </token>
+        <import>macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="@VERSION@">sortmerna</requirement>
-        <requirement type="package" version="1.5">samtools</requirement>
-    </requirements>
+    <expand macro="requirements" />
     <stdio>
         <regex match="This program builds a Burst trie on an input rRNA database"
             source="both"
@@ -59,135 +25,100 @@
     </version_command>
     <command>
 <![CDATA[
-    #set $ref = ''
-    #set $sep=''
-    #if str( $databases_type.databases_selector ) == 'history'
-        #for $db in $databases_type.database_name
-            #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0]
-            #set $sep = ':'
-        #end for
-    #else if str( $databases_type.databases_selector ) == 'cached_to_index'
-        #for $db in $databases_type.input_databases.fields.path.split(",")
-            #set $ref += $sep + $db + ',' + $os.path.splitext($db)[0] + '-reindexed'
-            #set $sep = ':'
-        #end for
-    #else
-        #for $db in $databases_type.input_databases.fields.path.split(",")
-            #set $ref += $sep + $db + ',' + $os.path.splitext($db)[0]
-            #set $sep = ':'
-        #end for
-    #end if
+
+    @PREPROCESSING@
+    
+    mkdir -p './aligned' &&
+    mkdir -p './kvdb_folder' &&
+    mkdir -p './readb_folder' &&
+    mkdir -p './idx_folder' &&
 
-    #if str( $databases_type.databases_selector ) != 'cached'
-        indexdb_rna 
-            --ref '$ref'
-            -L '$databases_type.seed_length'
-            --max_pos '$databases_type.max_pos'
-        &&
-    #end if
-
-    #if str( $sequencing_type.sequencing_type_selector ) == 'paired'
-        merge-paired-reads.sh
-            '$sequencing_type.forward_reads'
-            '$sequencing_type.reverse_reads'
-            merged-reads
-        &&
-    #end if
-
-    sortmerna 
-        --ref '$ref'
+    sortmerna
+        #for $reference in $ref
+            --ref '$reference'
+        #end for
+        -L '$databases_type.seed_length'
+        --max_pos '$databases_type.max_pos'
         --aligned 'aligned'
-        #if str( $sequencing_type.sequencing_type_selector ) == 'paired'
-            --reads 'merged-reads'
+        #if $sequencing_type.sequencing_type_selector == 'paired'
+            --reads '$sequencing_type.forward_reads'
+            --reads '$sequencing_type.reverse_reads'
+            $sequencing_type.paired_type
+        #elif $sequencing_type.sequencing_type_selector == 'interleaved'
+            --reads '$sequencing_type.reads'
+            --paired
+            $sequencing_type.paired_type
+        #else if $sequencing_type.sequencing_type_selector == 'paired_collection'
+            --reads '${$sequencing_type.reads.forward}'
+            --reads '${$sequencing_type.reads.reverse}'
             $sequencing_type.paired_type
         #else
             --reads '$sequencing_type.reads'
         #end if
         $strand_search
-        $log
         $aligned_fastx.aligned_fastx_selector
         #if $aligned_fastx.aligned_fastx_selector == '--fastx'
             #if $aligned_fastx.other
                 --other 'unaligned'
             #end if
         #end if
-        #if $report.report_type == 'best'
+        #if $report.report_type == 'number_alignments'
             @ALIGNMENTS@
-            #if $report.otu.otu_map == 'True'
-                --otu_map
-                --id '$report.otu.id'
-                --coverage '$report.otu.coverage'
-                $report.otu.de_novo_otu
+            @OTU_PICKING@
+
+            #if $report.report_num_alignments.output_alignments == 'all'
+                --num_alignments '0'
+            #else
+                --num_alignments $report.report_num_alignments.num_alignments
             #end if
-            #if $report.report_best.report_best_type == '1'
-                --best 1
-                --min_lis '$report.report_best.report_best_min_lis'
-            #else
-                --best '$report.report_best.report_best_value'
-                --min_lis '$report.report_best.report_best_min_lis'
-            #end if
-        #elif $report.report_type == 'num_alignments'
+            $report.no_best
+        #elif $report.report_type == 'min_lis'
             @ALIGNMENTS@
-            #if $report.report_num_alignments.report_num_alignments_type == 'other_value'
-                --num_alignments '$report.report_num_alignments.report_num_alignments_value'
-            #else
-                --num_alignments '$report.report_num_alignments.report_num_alignments_type'
-            #end if
+            @OTU_PICKING@
+            --min_lis $report.min_lis
         #end if
         -e '$e_value'
         --match '$match'
         --mismatch '$mismatch'
         --gap_open '$gap_open'
         --gap_ext '$gap_ext'
-        -N '$ambiguous_letter'
-        -a \${GALAXY_SLOTS:-1}
+        --threads \${GALAXY_SLOTS:-1}
+        -m \${GALAXY_MEMORY_MB:-8192}
+        --kvdb 'kvdb_folder'
+        --idx-dir 'idx_folder'
+        --readb 'readb_folder'
+        #if $aligned_fastx.aligned_fastx_selector == '--fastx' and str($sequencing_type.sequencing_type_selector) == 'paired'
+            --out2
+        #end if
     #if $report.report_type != 'None'
-        &&
-        samtools view -@ "\${GALAXY_SLOTS:-4}" -u aligned.sam | samtools sort -@ "\${GALAXY_SLOTS:-4}" -T tmp -O bam -o '$output_bam'
+        && samtools sort aligned.sam -@ "\${GALAXY_SLOTS:-4}" -T tmp -O bam -o '$output_bam'
     #end if
-
-    #if $aligned_fastx.aligned_fastx_selector == '--fastx' and str($sequencing_type.sequencing_type_selector) == 'paired'
-        #if str($sequencing_type.paired_type) != ''
-            &&
-            unmerge-paired-reads.sh
-                aligned.fast*
-                '$aligned_forward'
-                '$aligned_reverse'
-            #if $aligned_fastx.other
-                &&
-                unmerge-paired-reads.sh
-                    unaligned.fast*
-                    '$unaligned_forward'
-                    '$unaligned_reverse'
-            #end if
-        #else
-            &&
-            mv aligned.fast* '$aligned_paired'
-            #if $aligned_fastx.other
-                &&
-                mv unaligned.fast* '$unaligned_paired'
-            #end if
-        #end if
-    #end if
+    && ls -lah
 ]]>
     </command>
     <inputs>
         <conditional name="sequencing_type">
             <param name="sequencing_type_selector" type="select" label="Sequencing type">
-                <option value="not_paired">Reads are not paired</option>
-                <option value="paired">Reads are paired</option>
+                <option value="not_paired">Single-end reads</option>
+                <option value="paired">Paired-end reads</option>
+                <option value="interleaved">Interleaved paired-end reads</option>
+                <option value="paired_collection">Paired collection</option>
             </param>
             <when value="not_paired">
-                <param argument="--reads" type="data" format="fasta,fastq" label="Querying sequences"/>
+                <expand macro="reads_macro"/>
             </when>
             <when value="paired">
-                <param name="forward_reads" type="data" format="fasta,fastq" label="Forward reads"/>
-                <param name="reverse_reads" type="data" format="fasta,fastq" label="Reverse reads"/>
-                <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not">
-                    <option value="">Leave the reads split between aligned and rejected files</option>
-                    <option value="--paired_in">Output both reads to aligned file (--paired_in)</option>
-                    <option value="--paired_out">Output both reads to rejected file (--paired_out)</option>
-                </param>
+                <param name="forward_reads" type="data" format="fasta,fastq,fastq.gz,fasta.gz" label="Forward reads"/>
+                <param name="reverse_reads" type="data" format="fasta,fastq,fastq.gz,fasta.gz" label="Reverse reads"/>
+                <expand macro="paired_type_macro"/>
+            </when>
+            <when value="interleaved">
+                <expand macro="reads_macro"/>
+                <expand macro="paired_type_macro"/>
+            </when>
+            <when value="paired_collection">
+                <param name="reads" format="fasta,fastq,fasta.gz,fastq.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
+                <expand macro="paired_type_macro"/>
             </when>
         </conditional>
         <param name="strand_search" type="select" label="Which strands to search">
@@ -198,25 +129,21 @@
         <conditional name="databases_type">
             <param name="databases_selector" type="select" label="Databases to query"
                 help="Public rRNA databases provided with SortMeRNA have been indexed. On the contrary, personal databases must be indexed each time SortMeRNA is launched. Please be patient, this may take some time depending on the size of the given database.">
-                <option value="cached" selected="true">Public pre-indexed ribosomal databases</option>
-                <option value="cached_to_index">Public ribosomal databases to index with non default parameters</option>
+                <option value="cached" selected="true">Public ribosomal databases</option>
                 <option value="history">Databases from your history</option>
             </param>
             <when value="cached">
-                <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
-                    <options from_data_table="rRNA_databases" />
-                    <validator type="no_options" message="Select at least one database"/>
-                </param>
-            </when>
-            <when value="cached_to_index">
-                <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
-                    <options from_data_table="rRNA_databases" />
-                    <validator type="no_options" message="Select at least one database"/>
+                <param name="input_databases" label="rRNA databases" type="select" optional="false" multiple="true">
+                    <options from_data_table="rRNA_databases">
+                        <column name="name" index="1"/>
+                        <column name="value" index="2"/>
+                    </options>
+                    <validator type="no_options" message="No options available. Contact your Galaxy administrator."/>
                 </param>
                 <expand macro="db_prep"/>
             </when>
             <when value="history">
-                <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases" help="Your databases will be indexed first, which may take up to several minutes."/>
+                <param name="database_name" type="data" format="fasta,fasta.gz" multiple="true" label="rRNA databases" help="Your databases will be indexed first, which may take up to several minutes."/>
                 <expand macro="db_prep"/>
             </when>
         </conditional>
@@ -231,92 +158,94 @@
             </when>
             <when value=""/>
         </conditional>
-        <param argument="--log" type="boolean" checked="false" truevalue="--log" falsevalue="" label="Generate statistics file" help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution">
-        </param>
         <conditional name="report">
             <param name="report_type" type="select" label="Alignment report">
                 <option value="None">Do not report alignments</option>
-                <option value="best">Report best alignments per read reaching E-value</option>
-                <option value="num_alignments">Report first alignements per read reaching E-value</option>
+                <option value="number_alignments">Report N number of alignments reaching E-value</option>
+                <option value="min_lis">Report alignments that have the LIS of at least N seeds long reaching E-value</option>
             </param>
             <when value="None"/>
-            <when value="best">
-                <expand macro="output_alignments"/>
-                <conditional name="otu">
-                    <param name="otu_map" type="select" label="Pick OTUs?">
-                        <option value="True">Yes</option>
-                        <option value="False" selected="true">No</option>
+            <when value="number_alignments">
+                <conditional name="report_num_alignments">
+                    <param argument="output_alignments" type="select" label="Number of output alignments" help="Report all alignments can be time consuming; this option is not suggested for high similarity rRNA databases.">
+                        <option value="all">All alignments reaching the E-value threshold are reported</option>
+                        <option value="custom">Custom number of alignments</option>
                     </param>
-                    <when value="True">
-                        <param argument="--id" type="float" value="0.97" label="Percentage id similarity threshold" help="The alignment must still pass the E-value threshold" />
-                        <param argument="--coverage" type="float" value="0.97" label="Percentage query coverage threshold" help="The alignment must still pass the E-value threshold" />
-                        <param name="de_novo_otu" type="boolean" truevalue="--de_novo_otu" falsevalue="" label="FASTA/FASTQ file for reads matching database below percentage id" help="--de_novo_otu" />
-                    </when>
-                    <when value="False"/>
-                </conditional>
-                <conditional name="report_best">
-                    <param argument="report_best_type" type="select" label="Number of searched alignments" help="Only the best alignment is reported (--best)">
-                        <option value="1" selected="true">Only one high-candidate reference sequence is searched for alignments (fast). The high-candidate sequences are determined heuristically using a LIS of seed matches)</option>
-                        <option value="other_value">A custom number of reference sequences are searched for alignments (speed decrease for high value)</option>
-                    </param>
-                    <when value="1">
-                        <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/>
-                    </when>
-                    <when value="other_value">
-                        <param name="report_best_value" type="integer" min="2" max="100" value="2" label="Number of alignments to be made" help="Only the best one is reported. The computation speed decrease with high value"/>
-                        <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/>
+                    <when value="all" />
+                    <when value="custom">
+                        <param argument="--num_alignments" type="integer" min="1" max="100" value="1" label="Number of alignments to be reported"/>
                     </when>
                 </conditional>
-            </when>
-            <when value="num_alignments">
+                <param argument="--no-best" type="boolean" truevalue="--no-best" falsevalue="" checked="false" label="Disable best alignments search" help="The 'best' alignment is the highest 
+                    scoring alignment out of All alignments of a read, and the read can potentially be aligned (reaching E-value threshold) to multiple reference sequences. By default the 
+                    program searches for best alignments i.e. performs an exhaustive search over all references. Using '-no-best' will make the program to search just the first N alignments." />
                 <expand macro="output_alignments"/>
-                <conditional name="report_num_alignments">
-                    <param name="report_num_alignments_type" type="select" label="Number of output alignments" help="(--num_alignments)">
-                        <option value="0">All alignments reaching the E-value threshold are reported (very slow, this option is not suggested for high similarity rRNA databases)</option>
-                        <option value="1" selected="true">The first alignment passing E-value threshold are reported (very fast, best choice if only filtering is needed)</option>
-                        <option value="other_value">A custom number of alignments are made and reported (speed decrease for high value)</option>
-                    </param>
-                    <when value="0" />
-                    <when value="1" />
-                    <when value="other_value">
-                        <param name="report_num_alignments_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made and reported" help=""/>
-                    </when>
-                </conditional>
+                <expand macro="otu_picking"/>
+            </when>
+            <when value="min_lis">
+                <param argument="--min_lis" type="integer" min="0" value="" label="Minimum Longest Increasing Subsequence (LIS)" help="It is computed using seeds, which are k-mers common to 
+                    the read and the reference sequence. Sorted sequences of such seeds are used to filter the candidate references prior performing the Smith-Waterman alignment." />
+                <expand macro="output_alignments"/>
+                <expand macro="otu_picking"/>
             </when>
         </conditional>
-        <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold" help="(-e)"/>
         <param argument="--match" type="integer" min="0" max="10" value="2" label="SW score for a match"/>
         <param argument="--mismatch" type="integer" min="-10" max="0" value="-3" label="SW penalty for a mismatch"/>
         <param name="gap_open" type="integer" min="0" max="10" value="5" label="SW penalty for introducing a gap" help="(--gap_open)"/>
         <param name="gap_ext" type="integer" min="0" max="10" value="2" label="SW penalty for extending a gap" help="(--gap_ext)"/>
-        <param name="ambiguous_letter" type="integer" min="-10" max="0" value="-3" label="SW penalty for ambiguous letters (N's)" help="(-N)"/>
+        <param name="e_value" argument="-e" type="float" min="0" max="10" value="1" label="E-value threshold" help="Defines the 'statistical significance' of a local alignment. Exponentially 
+            correllates with the Minimal Alignment score. Higher E-values (100, 1000, ...) cause more reads to pass the alignment threshold."/>
+        <param argument="-N" type="boolean" truevalue="True" falsevalue="False" checked="false" label="SW penalty for ambiguous letters (N's)" help="Scored as --mismatch" />
     </inputs>
     <outputs>
-        <data name="output_fastx" format_source="reads" from_work_dir="aligned.dat" label="${tool.name} on ${on_string}: Aligned reads">
-            <filter>aligned_fastx['aligned_fastx_selector'] != '' and sequencing_type['sequencing_type_selector'] != 'paired'</filter>
+        <data name="aligned" format_source="reads" from_work_dir="aligned.f*" label="${tool.name} on ${on_string}: Aligned reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != ''</filter>
+            <filter>sequencing_type['sequencing_type_selector'] != 'paired'</filter>
         </data>
-        <data name="aligned_paired" format_source="forward_reads" label="${tool.name} on ${on_string}: Aligned reads">
-            <filter>aligned_fastx['aligned_fastx_selector'] != '' and sequencing_type['sequencing_type_selector'] == 'paired' and sequencing_type['paired_type'] == ''</filter>
+        <data name="aligned_forward" format_source="forward_reads" from_work_dir="aligned_fwd*" label="${tool.name} on ${on_string}: Aligned forward reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != '' and sequencing_type['sequencing_type_selector'] == 'paired'</filter>
+        </data>
+        <data name="aligned_reverse" format_source="reverse_reads" from_work_dir="aligned_rev*" label="${tool.name} on ${on_string}: Aligned reverse reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != '' and sequencing_type['sequencing_type_selector'] == 'paired'</filter>
         </data>
-        <data name="aligned_forward" format_source="forward_reads" label="${tool.name} on ${on_string}: Aligned forward reads">
-            <filter>aligned_fastx['aligned_fastx_selector'] != '' and sequencing_type['sequencing_type_selector'] == 'paired' and sequencing_type['paired_type'] != ''</filter>
+        <data name="aligned_forward_singleton" format_source="forward_reads" from_work_dir="aligned_singleton_fwd*" label="${tool.name} on ${on_string}: Aligned forward singleton reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != ''</filter>
+            <filter>sequencing_type['sequencing_type_selector'] == 'paired'</filter>
+            <filter>sequencing_type['paired_type'] == '--sout'</filter>
         </data>
-        <data name="aligned_reverse" format_source="reverse_reads" label="${tool.name} on ${on_string}: Aligned reverse reads">
-            <filter>aligned_fastx['aligned_fastx_selector'] != '' and sequencing_type['sequencing_type_selector'] == 'paired' and sequencing_type['paired_type'] != ''</filter>
+        <data name="aligned_reverse_singleton" format_source="reverse_reads" from_work_dir="aligned_singleton_rev*" label="${tool.name} on ${on_string}: Aligned reverse singleton reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != ''</filter>
+            <filter>sequencing_type['sequencing_type_selector'] == 'paired'</filter>
+            <filter>sequencing_type['paired_type'] == '--sout'</filter>
         </data>
-        <data name="output_other" format_source="reads" from_work_dir="unaligned.dat" label="${tool.name} on ${on_string}: Unaligned reads">
-            <filter>aligned_fastx['aligned_fastx_selector'] != '' and aligned_fastx['other'] == True and sequencing_type['sequencing_type_selector'] != 'paired'</filter>
+        <data name="unaligned" format_source="reads" from_work_dir="aligned.f*" label="${tool.name} on ${on_string}: Unaligned reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != ''</filter>
+            <filter>aligned_fastx['other'] == True</filter>
+            <filter>sequencing_type['sequencing_type_selector'] != 'paired'</filter>
         </data>
-        <data name="unaligned_paired" format_source="forward_reads" label="${tool.name} on ${on_string}: Unaligned reads">
-            <filter>aligned_fastx['aligned_fastx_selector'] != '' and aligned_fastx['other'] == True and sequencing_type['sequencing_type_selector'] == 'paired' and sequencing_type['paired_type'] == ''</filter>
+        <data name="unaligned_forward" format_source="forward_reads" from_work_dir="unaligned_fwd*" label="${tool.name} on ${on_string}: Unaligned forward reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != ''</filter>
+            <filter>aligned_fastx['other'] == True</filter>
+            <filter>sequencing_type['sequencing_type_selector'] == 'paired'</filter>
         </data>
-        <data name="unaligned_forward" format_source="forward_reads" label="${tool.name} on ${on_string}: Unaligned forward reads">
-            <filter>aligned_fastx['aligned_fastx_selector'] != '' and aligned_fastx['other'] == True and sequencing_type['sequencing_type_selector'] == 'paired' and sequencing_type['paired_type'] != ''</filter>
+        <data name="unaligned_reverse" format_source="reverse_reads" from_work_dir="unaligned_rev*" label="${tool.name} on ${on_string}: Unaligned reverse reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != ''</filter>
+            <filter>aligned_fastx['other'] == True</filter>
+            <filter>sequencing_type['sequencing_type_selector'] == 'paired'</filter>
         </data>
-        <data name="unaligned_reverse" format_source="reverse_reads" label="${tool.name} on ${on_string}: Unaligned reverse reads">
-            <filter>aligned_fastx['aligned_fastx_selector'] != '' and aligned_fastx['other'] == True and sequencing_type['sequencing_type_selector'] == 'paired' and sequencing_type['paired_type'] != ''</filter>
+        <data name="unaligned_forward_singleton" format_source="forward_reads" from_work_dir="unaligned_fwd*" label="${tool.name} on ${on_string}: Unaligned forward singleton reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != ''</filter>
+            <filter>aligned_fastx['other'] == True</filter>
+            <filter>sequencing_type['sequencing_type_selector'] == 'paired'</filter>
+            <filter>sequencing_type['paired_type'] == '--sout'</filter>
         </data>
-        <data name="output_bam" format="bam" label="${tool.name} on ${on_string}: Alignments">
+        <data name="unaligned_reverse_singleton" format_source="reverse_reads" from_work_dir="unaligned_rev*" label="${tool.name} on ${on_string}: Unaligned reverse singleton reads">
+            <filter>aligned_fastx['aligned_fastx_selector'] != ''</filter>
+            <filter>aligned_fastx['other'] == True</filter>
+            <filter>sequencing_type['sequencing_type_selector'] == 'paired'</filter>
+            <filter>sequencing_type['paired_type'] == '--sout'</filter>
+        </data>
+        <data name="output_bam" format="bam" label="${tool.name} on ${on_string}: Alignments (BAM)">
             <filter>report['report_type'] != 'None'</filter>
         </data>
         <data name="output_blast" format="tabular" from_work_dir="aligned.blast" label="${tool.name} on ${on_string}: BLAST report">
@@ -325,18 +254,16 @@
                 <when input="aligned_blast.aligned_blast_format" value="0" format="txt" />
             </change_format>
         </data>
-        <data name="output_biom" format="txt" from_work_dir="aligned_otus.txt" label="${tool.name} on ${on_string}: OTU map">
-            <filter>report['report_type'] != 'None' and report['report_type'] == 'best' and report['otu']['otu_map'] == 'True'</filter>
+        <data name="output_biom" format="txt" from_work_dir="aligned/otu_map.txt" label="${tool.name} on ${on_string}: OTU map">
+            <filter>report['report_type'] != 'None' and report['otu']['otu_map'] == 'True'</filter>
         </data>
-        <data name="output_de_novo" format_source="reads" from_work_dir="aligned_denovo.dat" label="${tool.name} on ${on_string}: De novo reads matching database">
-            <filter>report['report_type'] != 'None' and report['report_type'] == 'best' and report['otu']['otu_map'] == 'True' and report['otu']['de_novo_otu'] == True</filter>
-        </data>
-        <data name="output_log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="aligned.log">
-            <filter>log == True</filter>
+        
+        <data name="output_de_novo" format_source="reads" from_work_dir="aligned_denovo*" label="${tool.name} on ${on_string}: De novo reads matching database">
+            <filter>report['report_type'] != 'None' and report['otu']['otu_map'] == 'True' and report['otu']['de_novo_otu'] == True</filter>
         </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="4">
             <conditional name="sequencing_type">
                 <param name="sequencing_type_selector" value="not_paired" />
                 <param name="reads" value="read_small.fastq" />
@@ -350,17 +277,52 @@
                 <param name="aligned_fastx_selector" value="--fastx" />
                 <param name="other" value="True" />
             </conditional>
-            <param name="log" value="False" />
             <conditional name="report">
-                <param name="report_type" value="num_alignments" />
-                <param name="report_best_type" value="1" />
-                <param name="print_all_reads" value="False" />
+                <param name="report_type" value="number_alignments" />
+                <conditional name="report_num_alignments">
+                    <param name="output_alignments" value="custom"/>
+                    <param name="num_alignments" value="1"/>
+                </conditional>
                 <conditional name="blast">
                     <param name="blast_output" value="True"/>
                     <param name="blast_format" value="1 cigar qcov"/>
                 </conditional>
-                <conditional name="otu">
-                    <param name="otu_map" value="False"/>
+            </conditional>
+            <param name="e_value" value="1"/>
+            <param name="match" value="2"/>
+            <param name="mismatch" value="-3" />
+            <param name="gap_open" value="5"/>
+            <param name="gap_ext" value="2"/>
+            <param name="N" value="True"/>
+            <output name="aligned" file="test1_aligned.fastq" />
+            <output name="unaligned" file="test1_unaligned.fastq" />
+            <output name="output_bam" file="test1_bam.bam" lines_diff="6" ftype="bam"/>
+            <output name="output_blast" file="test1_blast.tabular"/>
+        </test>
+        <!-- test cached reference data -->
+        <test expect_num_outputs="4">
+            <conditional name="sequencing_type">
+                <param name="sequencing_type_selector" value="not_paired" />
+                <param name="reads" value="read_small.fastq" />
+            </conditional>
+            <param name="strand_search" value="" />
+            <conditional name="databases_type">
+                <param name="databases_selector" value="cached" />
+                <param name="input_databases" value="ref_small" />
+            </conditional>
+            <conditional name="aligned_fastx">
+                <param name="aligned_fastx_selector" value="--fastx" />
+                <param name="other" value="True" />
+            </conditional>
+            <conditional name="report">
+                <param name="report_type" value="number_alignments" />
+                <conditional name="report_num_alignments">
+                    <param name="output_alignments" value="custom"/>
+                    <param name="num_alignments" value="1"/>
+                </conditional>
+                <conditional name="blast">
+                    <param name="blast_output" value="True"/>
+                    <param name="blast_format" value="1 cigar qcov"/>
                 </conditional>
             </conditional>
             <param name="e_value" value="1"/>
@@ -368,16 +330,102 @@
             <param name="mismatch" value="-3" />
             <param name="gap_open" value="5"/>
             <param name="gap_ext" value="2"/>
-            <param name="ambiguous_letter" value="-3"/>
-            <output name="output_fastx" file="test1_aligned.fastq" />
-            <output name="output_other" file="test1_other.fastq" />
-            <output name="output_bam" file="test1_bam.bam" compare="sim_size" delta="200" />
+            <param name="N" value="True"/>
+            <output name="aligned" file="test1_aligned.fastq" />
+            <output name="unaligned" file="test1_unaligned.fastq" />
+            <output name="output_bam" file="test1_bam.bam" lines_diff="6" ftype="bam"/>
             <output name="output_blast" file="test1_blast.tabular"/>
         </test>
-        <test>
+        <!-- testing 2 reference files from history .. apart from this same as previous test -->
+        <test expect_num_outputs="4">
             <conditional name="sequencing_type">
                 <param name="sequencing_type_selector" value="not_paired" />
-                <param name="reads" value="read_small.fasta" />
+                <param name="reads" value="read_small.fastq" />
+            </conditional>
+            <param name="strand_search" value="" />
+            <conditional name="databases_type">
+                <param name="databases_selector" value="history" />
+                <param name="database_name" value="ref_small.fasta,ref_small_copy.fasta" />
+            </conditional>
+            <conditional name="aligned_fastx">
+                <param name="aligned_fastx_selector" value="--fastx" />
+                <param name="other" value="True" />
+            </conditional>
+            <conditional name="report">
+                <param name="report_type" value="number_alignments" />
+                <conditional name="report_num_alignments">
+                    <param name="output_alignments" value="custom"/>
+                    <param name="num_alignments" value="1"/>
+                </conditional>
+                <conditional name="blast">
+                    <param name="blast_output" value="True"/>
+                    <param name="blast_format" value="1 cigar qcov"/>
+                </conditional>
+            </conditional>
+            <param name="e_value" value="1"/>
+            <param name="match" value="2"/>
+            <param name="mismatch" value="-3" />
+            <param name="gap_open" value="5"/>
+            <param name="gap_ext" value="2"/>
+            <param name="N" value="True"/>
+            <output name="aligned" file="test1_aligned.fastq" />
+            <output name="unaligned" file="test1_unaligned.fastq" />
+            <output name="output_bam" file="test1_bam.bam" lines_diff="6" ftype="bam"/>
+            <output name="output_blast" file="test1_blast.tabular"/>
+            <assert_command>
+                <has_text text="--ref" n="2"/>
+            </assert_command>
+            <assert_stdout>
+                <has_text text="Processing reference [2] out of total [2] references"/>
+            </assert_stdout>
+        </test>
+        <!-- testing 2 cached reference -->
+        <test expect_num_outputs="4">
+            <conditional name="sequencing_type">
+                <param name="sequencing_type_selector" value="not_paired" />
+                <param name="reads" value="read_small.fastq" />
+            </conditional>
+            <param name="strand_search" value="" />
+            <conditional name="databases_type">
+                <param name="databases_selector" value="cached" />
+                <param name="input_databases" value="ref_small,ref_small_copy" />
+            </conditional>
+            <conditional name="aligned_fastx">
+                <param name="aligned_fastx_selector" value="--fastx" />
+                <param name="other" value="True" />
+            </conditional>
+            <conditional name="report">
+                <param name="report_type" value="number_alignments" />
+                <conditional name="report_num_alignments">
+                    <param name="output_alignments" value="custom"/>
+                    <param name="num_alignments" value="1"/>
+                </conditional>
+                <conditional name="blast">
+                    <param name="blast_output" value="True"/>
+                    <param name="blast_format" value="1 cigar qcov"/>
+                </conditional>
+            </conditional>
+            <param name="e_value" value="1"/>
+            <param name="match" value="2"/>
+            <param name="mismatch" value="-3" />
+            <param name="gap_open" value="5"/>
+            <param name="gap_ext" value="2"/>
+            <param name="N" value="True"/>
+            <output name="aligned" file="test1_aligned.fastq" />
+            <output name="unaligned" file="test1_unaligned.fastq" />
+            <output name="output_bam" file="test1_bam.bam" lines_diff="6" ftype="bam"/>
+            <output name="output_blast" file="test1_blast.tabular"/>
+            <assert_command>
+                <has_text text="--ref" n="2"/>
+            </assert_command>
+            <assert_stdout>
+                <has_text text="Processing reference [2] out of total [2] references"/>
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="sequencing_type">
+                <param name="sequencing_type_selector" value="not_paired" />
+                <param name="reads" value="read_small.fastq" />
             </conditional>
             <param name="strand_search" value="" />
             <conditional name="databases_type">
@@ -388,7 +436,6 @@
                 <param name="aligned_fastx_selector" value="--fastx" />
                 <param name="other" value="False" />
             </conditional>
-            <param name="log" value="True" />
             <conditional name="report">
                 <param name="report_type" value="None" />
             </conditional>
@@ -397,11 +444,10 @@
             <param name="mismatch" value="-3" />
             <param name="gap_open" value="5"/>
             <param name="gap_ext" value="2"/>
-            <param name="ambiguous_letter" value="-3"/>
-            <output name="output_fastx" file="test2_aligned.fasta" />
-            <output name="output_log" file="test2_log.txt"  compare="sim_size" />
+            <param name="N" value="True"/>
+            <output name="aligned" file="test2_aligned.fasta" />
         </test>
-        <test>
+        <test expect_num_outputs="5">
             <conditional name="sequencing_type">
                 <param name="sequencing_type_selector" value="paired" />
                 <param name="forward_reads" value="forward_reads.fastq" />
@@ -419,10 +465,11 @@
                 <param name="aligned_fastx_selector" value="--fastx" />
                 <param name="other" value="True" />
             </conditional>
-            <param name="log" value="False" />
             <conditional name="report">
-                <param name="report_type" value="best" />
-                <param name="report_num_alignments_type" value="1"/>
+                <param name="report_type" value="number_alignments" />
+                <conditional name="report_num_alignments">
+                    <param name="output_alignments" value="all"/>
+                </conditional>
                 <param name="print_all_reads" value="False" />
                 <conditional name="blast">
                     <param name="blast_output" value="False"/>
@@ -436,14 +483,14 @@
             <param name="mismatch" value="-3" />
             <param name="gap_open" value="5"/>
             <param name="gap_ext" value="2"/>
-            <param name="ambiguous_letter" value="-3"/>
+            <param name="N" value="True"/>
             <output name="aligned_forward" file="test3_aligned_forward.fastq" />
             <output name="aligned_reverse" file="test3_aligned_reverse.fastq" />
             <output name="unaligned_forward" file="test3_unaligned_forward.fastq" />
             <output name="unaligned_reverse" file="test3_unaligned_reverse.fastq" />
-            <output name="output_bam" file="test3_bam.bam" compare="sim_size" delta="200" />
+            <output name="output_bam" file="test3_bam.bam" lines_diff="6" ftype="bam"/>
         </test>
-        <test>
+        <test expect_num_outputs="3">
             <conditional name="sequencing_type">
                 <param name="sequencing_type_selector" value="not_paired" />
                 <param name="reads" value="test4_input.fasta" />
@@ -458,10 +505,11 @@
             <conditional name="aligned_fastx">
                 <param name="aligned_fastx_selector" value="" />
             </conditional>
-            <param name="log" value="False" />
             <conditional name="report">
-                <param name="report_type" value="best" />
-                <param name="report_num_alignments_type" value="1"/>
+                <param name="report_type" value="number_alignments" />
+                <conditional name="report_num_alignments">
+                    <param name="output_alignments" value="all"/>
+                </conditional>
                 <param name="print_all_reads" value="False" />
                 <conditional name="blast">
                     <param name="blast_output" value="False"/>
@@ -478,12 +526,12 @@
             <param name="mismatch" value="-3" />
             <param name="gap_open" value="5"/>
             <param name="gap_ext" value="2"/>
-            <param name="ambiguous_letter" value="-3"/>
+            <param name="N" value="True"/>
             <output name="output_bam" file="test4_bam.bam" compare="sim_size" delta="200" />
             <output name="output_biom" file="test4_biom.txt"/>
             <output name="output_de_novo" file="test4_de_novo.fasta"/>
         </test>
-        <test>
+        <test expect_num_outputs="5">
             <conditional name="sequencing_type">
                 <param name="sequencing_type_selector" value="paired" />
                 <param name="forward_reads" value="forward_reads.fasta" />
@@ -501,10 +549,8 @@
                 <param name="aligned_fastx_selector" value="--fastx" />
                 <param name="other" value="True" />
             </conditional>
-            <param name="log" value="False" />
             <conditional name="report">
-                <param name="report_type" value="best" />
-                <param name="report_num_alignments_type" value="1"/>
+                <param name="report_type" value="number_alignments" />
                 <param name="print_all_reads" value="False" />
                 <conditional name="blast">
                     <param name="blast_output" value="False"/>
@@ -518,11 +564,12 @@
             <param name="mismatch" value="-3" />
             <param name="gap_open" value="5"/>
             <param name="gap_ext" value="2"/>
-            <param name="ambiguous_letter" value="-3"/>
-            <output name="aligned_paired" file="test5_aligned.fasta" />
-            <output name="unaligned_paired" file="test5_unaligned.fasta" />
+            <param name="N" value="True"/>
+            <output name="aligned_forward" file="test5_aligned_forward.fasta" />
+            <output name="aligned_reverse" file="test5_aligned_reverse.fasta" />
+            <output name="output_bam" file="test5_bam.bam" lines_diff="6" ftype="bam"/>
         </test>
-        <test>
+        <test expect_num_outputs="5">
             <conditional name="sequencing_type">
                 <param name="sequencing_type_selector" value="paired" />
                 <param name="forward_reads" value="forward_reads.fasta" />
@@ -540,10 +587,11 @@
                 <param name="aligned_fastx_selector" value="--fastx" />
                 <param name="other" value="True" />
             </conditional>
-            <param name="log" value="False" />
             <conditional name="report">
-                <param name="report_type" value="best" />
-                <param name="report_num_alignments_type" value="1"/>
+                <param name="report_type" value="number_alignments" />
+                <conditional name="report_num_alignments">
+                    <param name="output_alignments" value="all"/>
+                </conditional>
                 <param name="print_all_reads" value="False" />
                 <conditional name="blast">
                     <param name="blast_output" value="False"/>
@@ -557,11 +605,105 @@
             <param name="mismatch" value="-3" />
             <param name="gap_open" value="5"/>
             <param name="gap_ext" value="2"/>
-            <param name="ambiguous_letter" value="-3"/>
+            <param name="N" value="True"/>
             <output name="aligned_forward" file="test6_aligned_forward.fasta" />
             <output name="aligned_reverse" file="test6_aligned_reverse.fasta" />
             <output name="unaligned_forward" file="test6_unaligned_forward.fasta" />
             <output name="unaligned_reverse" file="test6_unaligned_reverse.fasta" />
+            <output name="output_bam" file="test6_bam.bam" lines_diff="6" ftype="bam"/>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="sequencing_type">
+                <param name="sequencing_type_selector" value="paired" />
+                <param name="forward_reads" value="forward_reads.fasta" />
+                <param name="reverse_reads" value="reverse_reads.fasta" />
+            </conditional>
+            <conditional name="databases_type">
+                <param name="databases_selector" value="history" />
+                <param name="database_name" value="ref_small.fasta" />
+            </conditional>
+            <conditional name="aligned_fastx">
+                <param name="aligned_fastx_selector" value="" />
+            </conditional>
+            <conditional name="report">
+                <param name="report_type" value="min_lis" />
+                <param name="min_lis" value="1"/>
+            </conditional>
+            <output name="output_bam" file="test7_bam.bam" lines_diff="6" ftype="bam"/>
+        </test>
+        <test expect_num_outputs="9">
+            <conditional name="sequencing_type">
+                <param name="sequencing_type_selector" value="paired" />
+                <param name="forward_reads" value="forward_reads.fastq" />
+                <param name="reverse_reads" value="reverse_reads.fastq" />
+                <param name="paired_type" value="--sout"/>
+            </conditional>
+            <param name="strand_search" value="" />
+            <conditional name="databases_type">
+                <param name="databases_selector" value="history" />
+                <param name="database_name" value="ref_small.fasta" />
+                <param name="seed_length" value="18" />
+                <param name="max_pos" value="100000"/>
+            </conditional>
+            <conditional name="aligned_fastx">
+                <param name="aligned_fastx_selector" value="--fastx" />
+                <param name="other" value="True" />
+            </conditional>
+            <conditional name="report">
+                <param name="report_type" value="number_alignments" />
+                <conditional name="report_num_alignments">
+                    <param name="output_alignments" value="all"/>
+                </conditional>
+                <param name="print_all_reads" value="False" />
+                <conditional name="blast">
+                    <param name="blast_output" value="False"/>
+                </conditional>
+                <conditional name="otu">
+                    <param name="otu_map" value="False"/>
+                </conditional>
+            </conditional>
+            <param name="e_value" value="1"/>
+            <param name="match" value="2"/>
+            <param name="mismatch" value="-3" />
+            <param name="gap_open" value="5"/>
+            <param name="gap_ext" value="2"/>
+            <param name="N" value="True"/>
+            <output name="aligned_forward_singleton" file="test8_aligned_forward_singleton.fastq" />
+            <output name="aligned_reverse_singleton" file="test8_aligned_reverse_singleton.fastq" />
+            <output name="output_bam" file="test8_bam.bam" lines_diff="6" ftype="bam"/>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="sequencing_type">
+                <param name="sequencing_type_selector" value="interleaved" />
+                <param name="reads" value="interlaced_reads.fastq.gz" />
+            </conditional>
+            <conditional name="databases_type">
+                <param name="databases_selector" value="history" />
+                <param name="database_name" value="ref_small.fasta" />
+            </conditional>
+            <output name="aligned" file="test9_aligned.fastq.gz" compare="sim_size"/>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="sequencing_type">
+                <param name="sequencing_type_selector" value="paired_collection" />
+                <param name="reads">
+                    <collection type="paired">
+                        <element name="forward" value="forward_reads.fastq" />
+                        <element name="reverse" value="reverse_reads.fastq" />
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="databases_type">
+                <param name="databases_selector" value="history" />
+                <param name="database_name" value="ref_small.fasta" />
+            </conditional>
+            <conditional name="aligned_fastx">
+                <param name="aligned_fastx_selector" value="" />
+            </conditional>
+            <conditional name="report">
+                <param name="report_type" value="number_alignments" />
+            </conditional>
+            <output name="output_bam" file="test10_bam.bam" lines_diff="8" ftype="bam"/>
         </test>
     </tests>
     <help>
@@ -634,12 +776,5 @@
 
 ]]>
     </help>
-
-    <citations>
-        <citation type="doi">10.1093/bioinformatics/bts611</citation>
-        <citation type="doi">10.1093/nar/gks1219</citation>
-        <citation type="doi">10.1093/nar/gks1005</citation>
-        <citation type="doi">10.1093/bioinformatics/btq461</citation>
-        <citation type="doi">10.1038/nbt.2198</citation>
-    </citations>
+    <expand macro="citations" />
 </tool>
author	rnateam
date	Sat, 21 Oct 2023 09:07:21 +0000
parents	eb35257d2e29
children