Mercurial > repos > iuc > barcode_splitter

diff barcode_splitter.xml @ 0:ff12d2c1f5d6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/barcode_splitter commit 8f4f88267b8adfa035539230eab3d8eea6896e10
author: iuc
date: Wed, 29 May 2019 10:24:27 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/barcode_splitter.xml	Wed May 29 10:24:27 2019 -0400
@@ -0,0 +1,795 @@
+<tool id="barcode_splitter" name="Barcode Splitter" version="0.18.4.0">
+    <description>Split sequence files using multiple sets of barcodes</description>
+    <requirements>
+        <requirement type="package" version="0.18.4">barcode_splitter</requirement>
+    </requirements>
+    <version_command>barcode_splitter --version</version_command>
+    <command detect_errors="exit_code">
+        <![CDATA[
+            mkdir split &&
+            barcode_splitter
+
+            --bcfile '$bcfile' --mismatches '$mismatches' --galaxy $barcodes_at_end --prefix 'split/'
+
+            #set $auto_split_all = $split_all
+            #set $format = ""
+            #set $paired_match = "0"
+            #set $num_index_files = 0
+
+            #if str($runinterface.run_type) == "flexible":
+
+                ## Determine the format based on the first input file's extension
+                ## #for $indf in $runinterface.seqfiles
+                ##     #set $format = $indf.input.extension
+                ##     #break
+                ## #end for
+                #set $format = $runinterface.seqfiles[0].input.extension
+
+                ## Add the sequence files, count the number of forward, reverse, and other non-index read names, and auto-set split_all
+                #set $numforwards = 0
+                #set $numreverses = 0
+                #for $sf in $runinterface.seqfiles
+                    #if str($sf.nameinterface.readtype) == "forward" or str($sf.nameinterface.readtype) == "forwardindex":
+                        #set $numforwards += 1
+                    #elif str($sf.nameinterface.readtype) == "reverse" or str($sf.nameinterface.readtype) == "reverseindex":
+                        #set $numreverses += 1
+                    #end if
+                    #if str($sf.nameinterface.readtype) == "singleindex" or str($sf.nameinterface.readtype) == "forwardindex" or str($sf.nameinterface.readtype) == "reverseindex":
+                        #set $auto_split_all = "--split_all"
+                    #end if
+                    '${sf.input}'
+                #end for
+
+                ## Determine whether valid paired-end data exists
+                #if $numforwards == 1 and $numreverses == 1:
+                    #set $paired_match = "1"
+                #end if
+
+                ##Add the relative file argument positions of the index files in the list included above
+                --idxread
+                #set $n = 0
+                #for $sf in $runinterface.seqfiles
+                    #set $n += 1
+                    #if str($sf.nameinterface.readtype) == 'index' or str($sf.nameinterface.readtype) == "singleindex" or str($sf.nameinterface.readtype) == "forwardindex" or str($sf.nameinterface.readtype) == "reverseindex":
+                        #set $num_index_files += 1
+                        '${n}'
+                    #end if
+                #end for
+
+            #elif str($runinterface.run_type) == "single":
+
+                #set $format = $runinterface.snglinput.extension
+
+                ## Add the sequence files
+                '${runinterface.snglinput}'
+                #for $sf in $runinterface.idxfiles
+                    '${sf.idxinput}'
+                #end for
+
+                ##Add the relative file argument positions of the index files
+                --idxread
+                #set $n = 1
+                #for $sf in $runinterface.idxfiles
+                    #set $num_index_files += 1
+                    #set $n += 1
+                    '${n}'
+                #end for
+
+            #elif str($runinterface.run_type) == "paired":
+
+                #set $format = $runinterface.fwdinput.extension
+                #set $paired_match = "1"
+
+                ## Add the sequence files
+                '${runinterface.fwdinput}' '${runinterface.revinput}'
+                #for $sf in $runinterface.idxfiles
+                    '${sf.idxinput}'
+                #end for
+
+                ##Add the relative file argument positions of the index files
+                --idxread
+                #set $n = 2
+                #for $sf in $runinterface.idxfiles
+                    #set $num_index_files += 1
+                    #set $n += 1
+                    '${n}'
+                #end for
+
+            #end if
+            
+            --format '${format}' --suffix '.${format}' $auto_split_all
+
+            > '$summary';
+
+            ## Determine validity of repeat params
+            #if $num_index_files == 0:
+                echo "ERROR: At least 1 read file must have a 'Read Type' selected as 'Index', 'Single-End with Index', 'Forward with index', or 'Reverse with Index')."
+            #end if
+
+            ## Give the output split file names the name specified by the user (to be turned into collection identifiers)
+            rc=0;
+            #set $n = 0
+
+            #if str($runinterface.run_type) == "flexible":
+
+                #for $repeat in $runinterface.seqfiles
+
+                    #set $n += 1
+
+                    #set $readname = "read-" + str($n)
+                    #set $readtype = "index"
+                    #if str($repeat.nameinterface.readtype) == "forward" or str($repeat.nameinterface.readtype) == "forwardindex":
+                        #set $readname = "forward"
+                        #set $readtype = "forward"
+                    #elif str($repeat.nameinterface.readtype) == "reverse" or str($repeat.nameinterface.readtype) == "reverseindex":
+                        #set $readname = "reverse"
+                        #set $readtype = "reverse"
+                    #else:
+                        #if (str($repeat.nameinterface.readtype) == "single" or str($repeat.nameinterface.readtype) == "singleindex" or str($repeat.nameinterface.readtype) == "index") and str($repeat.nameinterface.readname) != "":
+                            #set $readname = $repeat.nameinterface.readname
+                        #end if
+                        #if str($repeat.nameinterface.readtype) == "single" or str($repeat.nameinterface.readtype) == "singleindex":
+                            #set $readtype = "single"
+                        #end if
+                    #end if
+
+                    for f in split/*-read-'${n}'.'${format}';
+                    do
+                        if [ -e "\$f" ]; then
+                            ## Space-delimited file name structure: "SAMPLE READNAME READTYPE MATCHEDPAIR.EXT"
+                            mv "\$f" "\${f/-read-${n}/ ${readname} ${readtype} ${paired_match}}" || rc=\$?;
+                        fi;
+                    done;
+                #end for
+
+            #elif str($runinterface.run_type) == "single":
+
+                #set $n = 1
+
+                ## Rename the single-end split read files
+                #set $readname = "read-" + str($n)
+                #set $readtype = 'single'
+                for f in split/*-read-'${n}'.'${format}';
+                do
+                    mv "\$f" "\${f/-read-${n}/ ${readname} ${readtype} ${paired_match}}" || rc=\$?;
+                done;
+
+                ## Rename the index read files
+                #set $readtype = "index"
+                #for $repeat in $runinterface.idxfiles
+                    #set $n += 1
+
+                    #set $readname = "read-" + str($n)
+                    #if str($repeat.idxreadname) != "":
+                        #set $readname = $repeat.idxreadname
+                    #end if
+
+                    for f in split/*-read-'${n}'.'${format}';
+                    do
+                        if [ -e "\$f" ]; then
+                            ## Space-delimited file name structure: "SAMPLE READNAME READTYPE MATCHEDPAIR.EXT"
+                            mv "\$f" "\${f/-read-${n}/ ${readname} ${readtype} ${paired_match}}" || rc=\$?;
+                        fi;
+                    done;
+                #end for
+
+            #elif str($runinterface.run_type) == "paired":
+
+                #set $n = 1
+
+                ## Rename the forward split read files
+                #set $readname = "forward"
+                #set $readtype = "forward"
+                for f in split/*-read-'${n}'.'${format}';
+                do
+                    mv "\$f" "\${f/-read-${n}/ ${readname} ${readtype} ${paired_match}}" || rc=\$?;
+                done;
+
+                #set $n += 1
+
+                ## Rename the reverse split read files
+                #set $readname = "reverse"
+                #set $readtype = "reverse"
+                for f in split/*-read-'${n}'.'${format}';
+                do
+                    mv "\$f" "\${f/-read-${n}/ ${readname} ${readtype} ${paired_match}}" || rc=\$?;
+                done;
+
+                ## Rename the index read files
+                #set $readtype = "index"
+                #for $repeat in $runinterface.idxfiles
+                    #set $n += 1
+
+                    #set $readname = "read-" + str($n)
+                    #if str($repeat.idxreadname) != "":
+                        #set $readname = $repeat.idxreadname
+                    #end if
+
+                    for f in split/*-read-'${n}'.'${format}';
+                    do
+                        if [ -e "\$f" ]; then
+                            ## Space-delimited file name structure: "SAMPLE READNAME READTYPE MATCHEDPAIR.EXT"
+                            mv "\$f" "\${f/-read-${n}/ ${readname} ${readtype} ${paired_match}}" || rc=\$?;
+                        fi;
+                    done;
+                #end for
+
+            #end if
+            
+            ## Exit non-zero if rc is not 0
+            [ \$rc == 0 ];
+        ]]>
+    </command>
+
+    <inputs>
+        <param name="bcfile" type="data" format="tabular" label="Barcode File" help="Tab-delimited text file where the first column is a sample ID and subsequent columns are barcodes.  Note, files with indexes/barcode sequences must be supplied in the same order as the barcode columns in this file and their 'Read Type' must have 'Index'." />
+
+        <conditional name="runinterface">
+            <param label="Run Type" name="run_type" type="select">
+                <option value="single" selected="true">Single-End</option>
+                <option value="paired">Paired-End</option>
+                <option value="flexible">Flexible</option>
+            </param>
+            <when value="single">
+                <!-- Simplified interface for common use-case.  Can still use "Flexible" for alternative single-end options. -->
+                <param name="snglinput" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Read File" help="Typically, 'Read 1'." />
+                <repeat name="idxfiles" title="Index File(s)" min="1" default="1">
+                    <param name="idxinput" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Index File" help="Typically, 'Read 2 Index'." />
+                    <param name="idxreadname" type="text" label="Index Name" help="E.g. index1, index2, etc..  Letters and numbers only.  No spaces.  Default is 'read-x' where 'x' is a number corresponding to the order in which the file was supplied (including the read file above).">
+                        <sanitizer>
+                            <valid initial="string.ascii_letters,string.digits" />
+                        </sanitizer>
+                    </param>
+                </repeat>
+            </when>
+            <when value="paired">
+                <!-- No embedded index allowed in the forward/reverse reads for simplicity.  Can still use "Flexible" for that. -->
+                <param name="fwdinput" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Forward Read File" help="Typically, 'Read 1'." />
+                <param name="revinput" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Reverse Read File" help="Typically, 'Read 3'." />
+
+                <repeat name="idxfiles" title="Index Files" min="0" default="1">
+                    <param name="idxinput" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Index File" help="Typically, 'Read 2 Index' or 'Read 4 Index'." />
+                    <param name="idxreadname" type="text" label="Index Name" help="E.g. index1, index2, etc..  Letters and numbers only.  No spaces.  Default is 'read-x' where 'x' is a number corresponding to the order in which the file was supplied (including the forward and reverse files).">
+                        <sanitizer>
+                            <valid initial="string.ascii_letters,string.digits" />
+                        </sanitizer>
+                    </param>
+                </repeat>
+            </when>
+            <when value="flexible">
+                <repeat name="seqfiles" title="Read Files" min="1" default="1">
+                    <param name="input" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Read File" help="Forward or single-end read files are typically named 'Read 1'.  Index files are typically named 'Read 2 Index' or 'Read 4 Index'.  Reverse read files are typically named 'Read 3'." />
+                    <conditional name="nameinterface">
+                        <param name="readtype" type="select" label="Read Type" help="E.g. If one file is 'Forward' and one is 'Reverse', split reads from those files will appear in an output paired collection.  If your file has reads with barcodes embedded in them, select their 'with index' variant, e.g. 'Forward with Index'.">
+                            <option value="single" selected="true">Single-End</option>
+                            <option value="forward">Forward</option>
+                            <option value="reverse">Reverse</option>
+                            <option value="index">Index</option>
+                            <option value="singleindex">Single-End with Index</option>
+                            <option value="forwardindex">Forward with Index</option>
+                            <option value="reverseindex">Reverse with Index</option>
+                        </param>
+                        <when value="single">
+                            <param name="readname" type="text" label="Read Name" help="E.g. index1, index2, etc..  Ignored for paired-collections.  Letters and numbers only.  No spaces.  Default is 'read-x' where 'x' is a number corresponding to the order in which the file was supplied.">
+                                <sanitizer>
+                                    <valid initial="string.ascii_letters,string.digits" />
+                                </sanitizer>
+                            </param>
+                        </when>
+                        <when value="singleindex">
+                            <param name="readname" type="text" label="Read Name" help="E.g. index1, index2, etc..  Ignored for paired-collections.  Letters and numbers only.  No spaces.  Default is 'read-x' where 'x' is a number corresponding to the order in which the file was supplied.">
+                                <sanitizer>
+                                    <valid initial="string.ascii_letters,string.digits" />
+                                </sanitizer>
+                            </param>
+                        </when>
+                        <when value="index">
+                            <param name="readname" type="text" label="Read Name" help="E.g. index1, index2, etc..  Ignored for paired-collections.  Letters and numbers only.  No spaces.  Default is 'read-x' where 'x' is a number corresponding to the order in which the file was supplied.">
+                                <sanitizer>
+                                    <valid initial="string.ascii_letters,string.digits" />
+                                </sanitizer>
+                            </param>
+                        </when>
+                    </conditional>
+                </repeat>
+            </when>
+        </conditional>
+
+        <param name="mismatches" type="integer" value="1" max="2" min="0" label="Number of allowed mismatches" help="An integer between 0 and 2 (inclusive).  Warning: Make sure all your barcodes differ by at least double this value plus 1, otherwise sequences that match both barcodes equally well will be discarded as 'multimatched' reads." />
+        <param argument="--barcodes_at_end" type="boolean" truevalue="--barcodes_at_end" falsevalue="" checked="false" label="Barcodes are at the end of all index sequences" help="Default is the beginning of all sequences" />
+        <param argument="--split_all" type="boolean" truevalue="--split_all" falsevalue="" checked="false" label="Split index files too" help="This creates an output collection for split files whose selected 'Read Type' is 'Index'." />
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary" />
+        <!-- Collection for single-end split reads, regardless of run_type -->
+        <collection name="split_output_single" type="list" format_source="input" label="${tool.name} on ${on_string}: single-end">
+            <!-- run_type is single OR flexible and there's only 1 output read type -->
+            <filter>str(runinterface['run_type']) == "single" or (str(runinterface['run_type']) == "flexible" and len(list(filter(lambda x: str(x['nameinterface']['readtype']) != "index", runinterface['seqfiles']))) == 1)</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;\S+)\ \S+\ (single|forward|reverse) 0\.(?P&lt;ext&gt;.*)" directory="split" visible="false" />
+        </collection>
+        <!-- Collection for paired-end split reads, regardless of run_type -->
+        <collection name="split_output_paired" type="list:paired" format_source="input" label="${tool.name} on ${on_string}: paired-end">
+            <!-- run_type is paired OR flexible and there's 1 forward and 1 reverse readtype -->
+            <filter>str(runinterface['run_type']) == "paired" or (str(runinterface['run_type']) == "flexible" and len(list(filter(lambda x: str(x['nameinterface']['readtype']).startswith('forward'), runinterface['seqfiles']))) == 1 and len(list(filter(lambda x: str(x['nameinterface']['readtype']).startswith('reverse'), runinterface['seqfiles']))) == 1)</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;\S+)\ \S+\ (?P&lt;identifier_1&gt;forward|reverse) 1\.(?P&lt;ext&gt;.*)" directory="split" visible="false" />
+        </collection>
+        <!-- Collection for single-end split reads that sit alongside a paired-end collection -->
+        <collection name="split_output_paired_other" type="list" format_source="input" label="${tool.name} on ${on_string}: other-end">
+            <!-- run_type is flexible, there are single-end reads, and there's a single valid read pair -->
+            <filter>str(runinterface['run_type']) == "flexible" and len(list(filter(lambda x: str(x['nameinterface']['readtype']).startswith('single'), runinterface['seqfiles']))) > 0 and len(list(filter(lambda x: str(x['nameinterface']['readtype']).startswith('forward'), runinterface['seqfiles']))) == 1 and len(list(filter(lambda x: str(x['nameinterface']['readtype']).startswith('reverse'), runinterface['seqfiles']))) == 1</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;\S+\ \S+)\ single 1\.(?P&lt;ext&gt;.*)" directory="split" visible="false" />
+        </collection>
+        <!-- Collection for multi-end split reads -->
+        <collection name="split_output_multi" type="list" format_source="input" label="${tool.name} on ${on_string}: multi-end">
+            <!-- run_type is flexible, there are multiple output split readtypes, and there's not a single valid read pair -->
+            <filter>str(runinterface['run_type']) == "flexible" and (len(runinterface['seqfiles']) - len(list(filter(lambda x: str(x['nameinterface']['readtype']) == "index", runinterface['seqfiles'])))) > 1 and (len(list(filter(lambda x: str(x['nameinterface']['readtype']).startswith('forward'), runinterface['seqfiles']))) != 1 or len(list(filter(lambda x: str(x['nameinterface']['readtype']).startswith('reverse'), runinterface['seqfiles']))) != 1)</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;\S+ \S+) (single (0|1)|(forward|reverse) 0)\.(?P&lt;ext&gt;.*)" directory="split" visible="false" />
+        </collection>
+        <!-- Collection for split_all index-only reads -->
+        <collection name="index_only" type="list" format_source="input" label="${tool.name} on ${on_string}: indexes">
+            <!-- split_all is true and ((run_type is not flexible and there are index only files) or (run_type is flexible and there are index only files)) -->
+            <filter>split_all and ((str(runinterface['run_type']) != "flexible" and len(runinterface['idxfiles']) > 0) or (str(runinterface['run_type']) == "flexible" and len(list(filter(lambda x: str(x['nameinterface']['readtype']) == "index", runinterface['seqfiles']))) > 0))</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?) index (0|1)\.(?P&lt;ext&gt;.*)" directory="split" visible="false" />
+        </collection>
+    </outputs>
+
+    <tests>
+        <test>
+            <!-- single end with 1 index -->
+            <param name="run_type" value="single" />
+            <param name="bcfile" value="barcode_splitter_barcodes.txt" />
+            <param name="mismatches" value="2" />
+            <param name="barcodes_at_end" value="" />
+            <param name="split_all" value="False" />
+            <param name="snglinput" value="barcode_splitter1.fastq" ftype="fastq" />
+
+            <repeat name="idxfiles">
+                <param name="idxinput" value="barcode_splitter_index.fastq" ftype="fastq" />
+                <param name="idxreadname" value="read" />
+            </repeat>
+
+            <output name="summary" file="test_1_summary.out" />
+            <output_collection name="split_output_single" type="list">
+                <element name="BC1" ftype="fastq" file="test_1_BC1-read-1.out" />
+                <element name="BC2" ftype="fastq" file="test_1_BC2-read-1.out" />
+                <element name="BC3" ftype="fastq" file="test_1_BC3-read-1.out" />
+                <element name="BC4" ftype="fastq" file="test_1_BC4-read-1.out" />
+                <element name="unmatched" ftype="fastq" file="test_1_unmatched-read-1.out" />
+                <element name="multimatched" ftype="fastq" file="test_1_multimatched-read-1.out" />
+            </output_collection>
+        </test>
+        <test>
+            <!-- single end with 1 index and split all -->
+            <param name="run_type" value="single" />
+            <param name="bcfile" value="barcode_splitter_barcodes_dual.txt" />
+            <param name="mismatches" value="2" />
+            <param name="barcodes_at_end" value="" />
+            <param name="split_all" value="True" />
+            <param name="snglinput" value="barcode_splitter1.fastq" ftype="fastq" />
+
+            <repeat name="idxfiles">
+                <param name="idxinput" value="barcode_splitter_index.fastq" ftype="fastq" />
+                <param name="idxreadname" value="index1" />
+            </repeat>
+            <repeat name="idxfiles">
+                <param name="idxinput" value="barcode_splitter_index_2.fastq" ftype="fastq" />
+                <param name="idxreadname" value="index2" />
+            </repeat>
+
+            <output name="summary" file="test_2_summary.out" />
+            <output_collection name="split_output_single" type="list">
+                <element name="BC1" ftype="fastq" file="test_2_BC1-read-1.fastq" />
+                <element name="BC2" ftype="fastq" file="test_2_BC2-read-1.fastq" />
+                <element name="BC3" ftype="fastq" file="test_2_BC3-read-1.fastq" />
+                <element name="BC4" ftype="fastq" file="test_2_BC4-read-1.fastq" />
+                <element name="unmatched" ftype="fastq" file="test_2_unmatched-read-1.fastq" />
+                <element name="multimatched" ftype="fastq" file="test_2_multimatched-read-1.fastq" />
+            </output_collection>
+            <output_collection name="index_only" type="list">
+                <element name="BC1 index1" ftype="fastq" file="test_2_BC1_index1.fastq" />
+                <element name="BC2 index1" ftype="fastq" file="test_2_BC2_index1.fastq" />
+                <element name="BC3 index1" ftype="fastq" file="test_2_BC3_index1.fastq" />
+                <element name="BC4 index1" ftype="fastq" file="test_2_BC4_index1.fastq" />
+                <element name="unmatched index1" ftype="fastq" file="test_2_unmatched_index1.fastq" />
+                <element name="multimatched index1" ftype="fastq" file="test_2_multimatched_index1.fastq" />
+                <element name="BC1 index2" ftype="fastq" file="test_2_BC1_index2.fastq" />
+                <element name="BC2 index2" ftype="fastq" file="test_2_BC2_index2.fastq" />
+                <element name="BC3 index2" ftype="fastq" file="test_2_BC3_index2.fastq" />
+                <element name="BC4 index2" ftype="fastq" file="test_2_BC4_index2.fastq" />
+                <element name="unmatched index2" ftype="fastq" file="test_2_unmatched_index2.fastq" />
+                <element name="multimatched index2" ftype="fastq" file="test_2_multimatched_index2.fastq" />
+            </output_collection>
+        </test>
+        <test>
+            <!-- paired end with one index -->
+            <param name="run_type" value="paired" />
+            <param name="bcfile" value="barcode_splitter_barcodes.txt" />
+            <param name="mismatches" value="2" />
+            <param name="barcodes_at_end" value="" />
+            <param name="split_all" value="False" />
+            <param name="fwdinput" value="barcode_splitter1.fastq" ftype="fastq" />
+            <param name="revinput" value="barcode_splitter_index_2.fastq" ftype="fastq" />
+
+            <repeat name="idxfiles">
+                <param name="idxinput" value="barcode_splitter_index.fastq" ftype="fastq" />
+                <param name="idxreadname" value="index" />
+            </repeat>
+
+            <output name="summary" file="test_1_summary.out" />
+            <output_collection name="split_output_paired" type="list:paired">
+                <element name="BC1">
+                    <element name="forward" ftype="fastq" file="test_3_BC1-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC1-reverse.fastq" />
+                </element>
+                <element name="BC2">
+                    <element name="forward" ftype="fastq" file="test_3_BC2-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC2-reverse.fastq" />
+                </element>
+                <element name="BC3">
+                    <element name="forward" ftype="fastq" file="test_3_BC3-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC3-reverse.fastq" />
+                </element>
+                <element name="BC4">
+                    <element name="forward" ftype="fastq" file="test_3_BC4-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC4-reverse.fastq" />
+                </element>
+                <element name="unmatched">
+                    <element name="forward" ftype="fastq" file="test_3_unmatched-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_unmatched-reverse.fastq" />
+                </element>
+                <element name="multimatched">
+                    <element name="forward" ftype="fastq" file="test_3_multimatched-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_multimatched-reverse.fastq" />
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <!-- paired end 2 indexes and split_all -->
+            <param name="run_type" value="paired" />
+            <param name="bcfile" value="barcode_splitter_barcodes_dual.txt" />
+            <param name="mismatches" value="2" />
+            <param name="barcodes_at_end" value="" />
+            <param name="split_all" value="True" />
+            <param name="fwdinput" value="barcode_splitter1.fastq" ftype="fastq" />
+            <param name="revinput" value="barcode_splitter_index_2.fastq" ftype="fastq" />
+
+            <repeat name="idxfiles">
+                <param name="idxinput" value="barcode_splitter_index.fastq" ftype="fastq" />
+                <param name="idxreadname" value="index1" />
+            </repeat>
+            <repeat name="idxfiles">
+                <param name="idxinput" value="barcode_splitter_index_2.fastq" ftype="fastq" />
+                <param name="idxreadname" value="index2" />
+            </repeat>
+
+            <output name="summary" file="test_2_summary.out" />
+            <output_collection name="split_output_paired" type="list:paired">
+                <element name="BC1">
+                    <element name="forward" ftype="fastq" file="test_3_BC1-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC1-reverse.fastq" />
+                </element>
+                <element name="BC2">
+                    <element name="forward" ftype="fastq" file="test_3_BC2-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC2-reverse.fastq" />
+                </element>
+                <element name="BC3">
+                    <element name="forward" ftype="fastq" file="test_3_BC3-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC3-reverse.fastq" />
+                </element>
+                <element name="BC4">
+                    <element name="forward" ftype="fastq" file="test_3_BC4-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC4-reverse.fastq" />
+                </element>
+                <element name="unmatched">
+                    <element name="forward" ftype="fastq" file="test_3_unmatched-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_unmatched-reverse.fastq" />
+                </element>
+                <element name="multimatched">
+                    <element name="forward" ftype="fastq" file="test_3_multimatched-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_multimatched-reverse.fastq" />
+                </element>
+            </output_collection>
+            <output_collection name="index_only" type="list">
+                <element name="BC1 index1" ftype="fastq" file="test_2_BC1_index1.fastq" />
+                <element name="BC2 index1" ftype="fastq" file="test_2_BC2_index1.fastq" />
+                <element name="BC3 index1" ftype="fastq" file="test_2_BC3_index1.fastq" />
+                <element name="BC4 index1" ftype="fastq" file="test_2_BC4_index1.fastq" />
+                <element name="unmatched index1" ftype="fastq" file="test_2_unmatched_index1.fastq" />
+                <element name="multimatched index1" ftype="fastq" file="test_2_multimatched_index1.fastq" />
+                <element name="BC1 index2" ftype="fastq" file="test_2_BC1_index2.fastq" />
+                <element name="BC2 index2" ftype="fastq" file="test_2_BC2_index2.fastq" />
+                <element name="BC3 index2" ftype="fastq" file="test_2_BC3_index2.fastq" />
+                <element name="BC4 index2" ftype="fastq" file="test_2_BC4_index2.fastq" />
+                <element name="unmatched index2" ftype="fastq" file="test_2_unmatched_index2.fastq" />
+                <element name="multimatched index2" ftype="fastq" file="test_2_multimatched_index2.fastq" />
+            </output_collection>
+        </test>
+        <test>
+            <!-- flexible with single end containing index -->
+            <param name="bcfile" value="barcode_splitter_barcodes.txt" />
+            <param name="mismatches" value="2" />
+            <param name="barcodes_at_end" value="" />
+            <param name="split_all" value="False" />
+
+            <conditional name="runinterface">
+                <param name="run_type" value="flexible" />
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter1.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="singleindex" />
+                        <param name="readname" value="read" />
+                    </conditional>
+                </repeat>
+            </conditional>
+
+            <output name="summary" file="test_5_summary.out" />
+            <output_collection name="split_output_single" type="list">
+                <element name="BC1" ftype="fastq" file="test_1_BC1-read-1.out" />
+                <element name="BC2" ftype="fastq" file="test_1_BC2-read-1.out" />
+                <element name="BC3" ftype="fastq" file="test_1_BC3-read-1.out" />
+                <element name="BC4" ftype="fastq" file="test_5_BC4-read-1.out" />
+                <element name="unmatched" ftype="fastq" file="test_5_unmatched-read-1.out" />
+                <element name="multimatched" ftype="fastq" file="test_1_multimatched-read-1.out" />
+            </output_collection>
+        </test>
+        <test>
+            <!-- flexible with first single end no index, second containing index, 1 separate index, and split all -->
+            <param name="bcfile" value="barcode_splitter_barcodes_dual.txt" />
+            <param name="mismatches" value="2" />
+            <param name="barcodes_at_end" value="" />
+            <param name="split_all" value="True" />
+
+            <conditional name="runinterface">
+                <param name="run_type" value="flexible" />
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter1.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="single" />
+                        <param name="readname" value="read1" />
+                    </conditional>
+                </repeat>
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter_index.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="singleindex" />
+                        <param name="readname" value="read2" />
+                    </conditional>
+                </repeat>
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter_index_2.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="index" />
+                        <param name="readname" value="index2" />
+                    </conditional>
+                </repeat>
+            </conditional>
+
+            <output name="summary" file="test_6_summary.out" />
+            <output_collection name="split_output_multi" type="list">
+                <element name="BC1 read1" ftype="fastq" file="test_6_BC1_read1.fastq" />
+                <element name="BC2 read1" ftype="fastq" file="test_6_BC2_read1.fastq" />
+                <element name="BC3 read1" ftype="fastq" file="test_6_BC3_read1.fastq" />
+                <element name="BC4 read1" ftype="fastq" file="test_6_BC4_read1.fastq" />
+                <element name="unmatched read1" ftype="fastq" file="test_6_unmatched_read1.fastq" />
+                <element name="multimatched read1" ftype="fastq" file="test_6_multimatched_read1.fastq" />
+                <element name="BC1 read2" ftype="fastq" file="test_6_BC1_read2.fastq" />
+                <element name="BC2 read2" ftype="fastq" file="test_6_BC2_read2.fastq" />
+                <element name="BC3 read2" ftype="fastq" file="test_6_BC3_read2.fastq" />
+                <element name="BC4 read2" ftype="fastq" file="test_6_BC4_read2.fastq" />
+                <element name="unmatched read2" ftype="fastq" file="test_6_unmatched_read2.fastq" />
+                <element name="multimatched read2" ftype="fastq" file="test_6_multimatched_read2.fastq" /> <!-- FAILS -->
+            </output_collection>
+            <output_collection name="index_only" type="list">
+                <element name="BC1 index2" ftype="fastq" file="test_6_BC1_index2.fastq" />
+                <element name="BC2 index2" ftype="fastq" file="test_6_BC2_index2.fastq" />
+                <element name="BC3 index2" ftype="fastq" file="test_6_BC3_index2.fastq" />
+                <element name="BC4 index2" ftype="fastq" file="test_6_BC4_index2.fastq" />
+                <element name="unmatched index2" ftype="fastq" file="test_6_unmatched_index2.fastq" />
+                <element name="multimatched index2" ftype="fastq" file="test_6_multimatched_index2.fastq" />
+            </output_collection>
+        </test>
+        <test>
+            <!-- flexible with paired end with one index and no split all -->
+            <param name="bcfile" value="barcode_splitter_barcodes.txt" />
+            <param name="mismatches" value="2" />
+            <param name="barcodes_at_end" value="" />
+            <param name="split_all" value="False" />
+
+            <conditional name="runinterface">
+                <param name="run_type" value="flexible" />
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter1.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="forward" />
+                        <param name="readname" value="" />
+                    </conditional>
+                </repeat>
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter_index.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="index" />
+                        <param name="readname" value="index" />
+                    </conditional>
+                </repeat>
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter_index_2.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="reverse" />
+                        <param name="readname" value="" />
+                    </conditional>
+                </repeat>
+            </conditional>
+
+            <output name="summary" file="test_1_summary.out" />
+            <output_collection name="split_output_paired" type="list:paired">
+                <element name="BC1">
+                    <element name="forward" ftype="fastq" file="test_3_BC1-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC1-reverse.fastq" />
+                </element>
+                <element name="BC2">
+                    <element name="forward" ftype="fastq" file="test_3_BC2-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC2-reverse.fastq" />
+                </element>
+                <element name="BC3">
+                    <element name="forward" ftype="fastq" file="test_3_BC3-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC3-reverse.fastq" />
+                </element>
+                <element name="BC4">
+                    <element name="forward" ftype="fastq" file="test_3_BC4-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC4-reverse.fastq" />
+                </element>
+                <element name="unmatched">
+                    <element name="forward" ftype="fastq" file="test_3_unmatched-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_unmatched-reverse.fastq" />
+                </element>
+                <element name="multimatched">
+                    <element name="forward" ftype="fastq" file="test_3_multimatched-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_multimatched-reverse.fastq" />
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <!-- flexible with paired end with reverseindex, single end, separate index, and no split all -->
+            <param name="bcfile" value="barcode_splitter_barcodes_dual.txt" />
+            <param name="mismatches" value="2" />
+            <param name="barcodes_at_end" value="" />
+            <param name="split_all" value="False" />
+
+            <conditional name="runinterface">
+                <param name="run_type" value="flexible" />
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter1.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="forward" />
+                        <param name="readname" value="" />
+                    </conditional>
+                </repeat>
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter1.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="single" />
+                        <param name="readname" value="read" />
+                    </conditional>
+                </repeat>
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter_index_2.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="index" />
+                        <param name="readname" value="index" />
+                    </conditional>
+                </repeat>
+                <repeat name="seqfiles">
+                    <param name="input" ftype="fastq" value="barcode_splitter_index_2.fastq" />
+                    <conditional name="nameinterface">
+                        <param name="readtype" value="reverseindex" />
+                        <param name="readname" value="" />
+                    </conditional>
+                </repeat>
+            </conditional>
+
+            <output name="summary" file="test_2_summary.out" />
+            <output_collection name="split_output_paired" type="list:paired">
+                <element name="BC1">
+                    <element name="forward" ftype="fastq" file="test_3_BC1-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC1-reverse.fastq" />
+                </element>
+                <element name="BC2">
+                    <element name="forward" ftype="fastq" file="test_3_BC2-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC2-reverse.fastq" />
+                </element>
+                <element name="BC3">
+                    <element name="forward" ftype="fastq" file="test_3_BC3-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC3-reverse.fastq" />
+                </element>
+                <element name="BC4">
+                    <element name="forward" ftype="fastq" file="test_3_BC4-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_BC4-reverse.fastq" />
+                </element>
+                <element name="unmatched">
+                    <element name="forward" ftype="fastq" file="test_3_unmatched-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_unmatched-reverse.fastq" />
+                </element>
+                <element name="multimatched">
+                    <element name="forward" ftype="fastq" file="test_3_multimatched-forward.fastq" />
+                    <element name="reverse" ftype="fastq" file="test_3_multimatched-reverse.fastq" />
+                </element>
+            </output_collection>
+            <output_collection name="split_output_paired_other" type="list">
+                <element name="BC1 read" ftype="fastq" file="test_1_BC1-read-1.out" />
+                <element name="BC2 read" ftype="fastq" file="test_1_BC2-read-1.out" />
+                <element name="BC3 read" ftype="fastq" file="test_1_BC3-read-1.out" />
+                <element name="BC4 read" ftype="fastq" file="test_1_BC4-read-1.out" />
+                <element name="unmatched read" ftype="fastq" file="test_1_unmatched-read-1.out" />
+                <element name="multimatched read" ftype="fastq" file="test_1_multimatched-read-1.out" />
+            </output_collection>
+        </test>
+    </tests>
+
+    <help>
+        <![CDATA[
+**What it does**
+
+This tool splits a FASTQ file into several files, using barcodes as the split criteria.  Barcodes in one file can be used to split multiple sorted files.  Multiple sets of barcodes, each located in a different file, can be used.
+
+--------
+
+**How it works**
+
+Given a number of allowed mismatches, all possible mismatching barcode combinations are pre-computed and stored in a hash lookup table.  Each barcode column in the barcode file (--bcfile) adds another level to the hash table data structure.  For each read group (e.g. forward, reverse, index1, and index2), the index sequence(s) are used to look up the sample they belong to.  No pattern matching takes place - it's a simple hash table lookup where the keys being looked up are taken from the sequences in the index files.  Barcode collisions are detected during the construction of the hash table before any sequences are processed, which results in warnings and/or errors and reads that match collided barcodes end up in a "multimatched" file.  (A barcode collision is when 2 barcodes can match each other when each has an allowed number of mismatches).
+
+The length of the barcode sequences in the barcodes file must be less than or equal to the length of the sequences in the corresponding index files and all barcodes in 1 column must be the same length (though the lengths of the barcodes between columns may differ).
+
+There can only be 1 number of mismatches and it is applied per barcode.  E.g. If the number of mismatches is set to 1, and there are 2 barcode columns, then two barcodes on the same row may each have 1 mismatch.  There is no way (currently) to set a different number of mismatches for different barcode columns.
+
+If there are 2 barcode columns, the output summary table can have multiple rows where a single sample could not be identified.  Ignoring multimatched and error states for the moment, the following 4 rows are possible, but only those with counts greater than 0 will be included in the summary table:
+
+    unmatched	unmatched	unmatched	1
+    unmatched	matched	unmatched	2
+    unmatched	unmatched	matched	3
+    unmatched	matched	matched	4
+
+The first column is the ID, which is 'unmatched' in all cases (except the error row).  Here's what each row means in the above example:
+
+    1. For 1 read group, neither of the index sequences matched any barcodes in either barcode column.
+    2. For 2 read groups, a barcode in the first barcode column matched but none from the second were matched.
+    3. For 3 read groups, no barcodes in the first column matched but a barcode in the second barcode column did match.
+    4. For 4 read groups, a barcode from each column matched, but they were not in the same row.
+
+If you encounter large counts in case 4, then barcodes are likely not paired correctly in the barcodes file.
+
+Two other states can also be reported: multimatched & error.  Read groups with 'multimatch' in one or more columns means that with the allowed number of mismatches, the affected index read can match multiple barcodes in the corresponding column.  A multimatch will only be reported if the number of mismatches in the 2 matched barcodes are the same.  If they are different, barcode_splitter will assign the read group to the better match.  If you have any multimatch barcodes or barcode collision warnings, then the barcode design should be improved.  The number of differences between any pair of barcodes in a single column should be greater than double the number of allowed mismatches, or else you may end up with numerous multimatch scenarios.  A match in another barcode column will not resolve a multimatch in a different column.
+
+**Barcode file Format**
+
+Barcode files are simple text files.
+Each line should contain an identifier (descriptive name for the barcode), and at least 1 barcode, separated by TAB characters. Multiple columns of barcodes are supported (each corresponding to a separate barcoded read file), though there's usually just 1.  An example of the usage of multiple sets of barcodes could be the first set of barcodes can denote user and the second set can be each user's sample barcodes.
+Example::
+
+    #This line is a comment (starts with a 'number' sign)
+    BC1	GATCT	TTGCAT
+    BC2	ATCGT	GCGCAT
+    BC3	GTGAT	AGGTCA
+    BC4	TGTCT	CTTTGG
+
+For each barcode, a new FASTQ file will be created (with the barcodes' identifier as part of the file name).
+Sequences matching the barcodes in a row will be stored in the appropriate file.
+
+The first sequence file submitted must contain sequences with the barcodes in the first column of the barcode file.  The second sequence file must contain sequences with the barcodes in the second column, and so on.  The Number of Index Files supplied must match the number of actual columns in the barcode file and the order in which they are supplied must match the order of the barcode columns as well.
+
+As many as 2 additional FASTQ output files will be created for each read/index file: the 'unmatched' file and the 'multimatched' file, where sequences not matching any barcode or matching more than 1 barcode (when mismatches are taken into account) will be stored.
+
+The output of this tool is a summary table displaying the split counts for each barcode identifier and the percentage of the total reads those represent.
+In addition, each FASTQ file produced will be loaded into the galaxy history as part of a collection list.
+        ]]>
+    </help>
+    <citations>
+        <citation type="bibtex">
+          @misc{paired_sequence_utils,
+              title = {{Barcode}-{Splitter}},
+              url = {https://bitbucket.org/princeton_genomics/barcode_splitter/},
+              author = "Parsons, Lance and Leach, Robert"
+          }
+        </citation>
+    </citations>
+</tool>