diff stacks_assembleperead.xml @ 0:e3ca198820a8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit f3a59c91c231cc1582479109e776d05602b7f24d-dirty
author iuc
date Tue, 14 Jun 2016 14:05:40 -0400
parents
children ee52d9cfaffc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/stacks_assembleperead.xml	Tue Jun 14 14:05:40 2016 -0400
@@ -0,0 +1,169 @@
+<tool id="stacks_assembleperead" name="Stacks: assemble read pairs by locus" version="@WRAPPER_VERSION@.0">
+    <description>run the STACKS sort_read_pairs.pl and exec_velvet.pl wrappers</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+
+        mkdir stacks_inputs reads stacks_outputs
+
+        &&
+
+        #for $input_file in $stacks_col:
+            #set $ext = ""
+            #if not str($input_file.name).endswith('.tsv'):
+                #set $ext = ".tsv"
+            #end if
+            ln -s "${input_file}" "stacks_inputs/${input_file.name}${ext}" &&
+        #end for
+
+        #for $input_file in $reads:
+            #set $name = str($input_file.name)
+            #if $name.endswith('.2.fq'):
+                ## sort_read_pairs is expecting strange fastq names...
+                #set $name = $name[:-5]+".fq_2"
+            #end if
+            ln -s "${input_file}" "reads/${name}" &&
+        #end for
+
+        sort_read_pairs.pl
+            -p stacks_inputs
+            -s 'reads'
+
+            #if $whitelist:
+                -w '$whitelist'
+            #end if
+
+            #if $threshold:
+                -r $threshold
+            #end if
+
+            -o stacks_outputs
+
+        #if $velvet.use_velvet:
+            ## remove possible empty files
+            && find stacks_outputs -type f -size 0 -delete
+
+            &&
+            mkdir assembled
+            &&
+            velvet_path=`which velveth` && velvet_path=`dirname "\$velvet_path"`
+            &&
+            exec_velvet.pl -s stacks_outputs -o assembled -c -M ${velvet.contig_length} -e "\$velvet_path"
+        #end if
+
+    ]]></command>
+    <inputs>
+        <param name="stacks_col" argument="-p" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map or refmap)" />
+        <param name="reads" argument="-s" format="fastqsanger" type="data" multiple="true" label="Files containing sequences" help="Files containing parent sequences from a mapping cross" />
+
+        <param name="whitelist" argument="-w" format="txt,tabular" type="data" optional="true" label="White list of catalog IDs to include" />
+        <param name="threshold" argument="-r" type="integer" value="" optional="true" label="Minimum number of reads by locus"/>
+
+       <conditional name="velvet">
+            <param name="use_velvet" type="boolean" checked="false" label="Perform assembly with Velvet" help="If not selected, the tool will only produce of collection of fasta files (one per locus) containing reads ready to assemble." />
+            <when value="false"></when>
+            <when value="true">
+                <param name="contig_length" type="integer" value="200" label="Minimum length for asssembled contigs"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <collection name="collated" type="list" label="Collated FASTA files per locus on ${on_string}">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa(sta)?" ext="fasta" directory="stacks_outputs" />
+        </collection>
+
+        <data format="fasta" name="contigs" label="Assembled contigs on ${on_string}" from_work_dir="assembled/collated.fa">
+            <filter>velvet['use_velvet']</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="stacks_col">
+                <collection type="list">
+                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
+                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
+                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
+                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
+                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
+                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
+                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
+                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
+                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
+                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
+                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
+                </collection>
+            </param>
+            <param name="reads" value="demultiplexed/PopA_01.2.fq,demultiplexed/PopA_02.2.fq" ftype="fastqsanger" />
+
+            <output_collection name="collated">
+                <element name="1">
+                    <assert_contents>
+                        <has_text text="CCGATCAGCATCAGTAGTTTTCAACGAGCTGGCCCAATGGTGTATAACTATGTGGTAGAGAGAAACTGCTGCTATCACTCACGATATAAGCCCTCTGACG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="stacks_col">
+                <collection type="list">
+                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
+                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
+                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
+                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
+                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
+                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
+                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
+                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
+                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
+                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
+                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
+                </collection>
+            </param>
+            <param name="reads" value="demultiplexed/PopA_01.2.fq,demultiplexed/PopA_02.2.fq" ftype="fastqsanger" />
+            <param name="velvet|use_velvet" value="true" />
+            <param name="velvet|contig_length" value="20" />
+
+            <output_collection name="collated">
+                <element name="1">
+                    <assert_contents>
+                        <has_text text="CCGATCAGCATCAGTAGTTTTCAACGAGCTGGCCCAATGGTGTATAACTATGTGGTAGAGAGAAACTGCTGCTATCACTCACGATATAAGCCCTCTGACG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+
+            <output name="contigs">
+                <assert_contents>
+                    <has_text text="TGTATTCTCCCATGCGACAGCAGGACATCCCATCCCCCTCTGATGTTATCAATCATAAGA" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help>
+<![CDATA[
+.. class:: infomark
+
+**What it does**
+
+This program will run each of the Stacks sort_read_pairs.pl and exec_velvet.pl utilities to assemble pair-end reads from STACKS pipeline results
+
+--------
+
+**Input file**
+
+Output from denovo_map or ref_map
+
+
+**Output file**
+
+A collated.fa file containing assembled contigs for each locus
+
+@STACKS_INFOS@
+]]>
+    </help>
+    <expand macro="citation" />
+</tool>