comparison stacks_denovomap.xml @ 0:b9822370f843 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit b395fa36fa826e26085820ba3a9faacaeddcb460
author iuc
date Mon, 01 Jul 2019 11:04:37 -0400
parents
children afdbc7fcce70
comparison
equal deleted inserted replaced
-1:000000000000 0:b9822370f843
1 <tool id="stacks2_denovomap" name="Stacks2: de novo map" profile="@PROFILE@" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@">
2 <description>the Stacks pipeline without a reference genome (denovo_map.pl)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="version_cmd"/>
8 <command detect_errors="aggressive"><![CDATA[
9 @FASTQ_INPUT_FUNCTIONS@
10 mkdir stacks_inputs stacks_outputs&&
11
12 #set ($link_command, $inputype) = $fastq_input_nonbatch( $input_type.fqinputs, $input_type.input_type_select, ".%d" )
13 $link_command
14
15 denovo_map.pl
16 --samples stacks_inputs
17 #if str($popmap) != 'None':
18 --popmap '$popmap'
19 #end if
20 -o stacks_outputs
21 -T \${GALAXY_SLOTS:-1}
22
23 -M $assembly_options.M
24 -n $assembly_options.n
25 --var-alpha $model_options.var_alpha
26 --gt-alpha $model_options.gt_alpha
27 #if $input_type.input_type_select == "paired"
28 --paired
29 #end if
30 $pe_options.rm_pcr_duplicates
31 --min-samples-per-pop $popfilter_options.min_samples_per_pop
32 --min-populations $popfilter_options.min_populations
33
34 ## the catalog.calls output is a gzip-ed vcf extract it
35 ## to make it usable in Galaxy (with the downside that we
36 ## need to gzip it again for downstream calls like populations)
37 && gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf
38 && mv stacks_outputs/denovo_map.log $output_log
39 ]]></command>
40
41 <inputs>
42 <expand macro="fastq_input"/>
43 <param argument="--popmap" type="data" optional="true" format="tabular,txt" label="Population map" />
44 <section name="assembly_options" title="Assembly options" expanded="true">
45 <param name="M" argument="-M" type="integer" value="2" label="Number of mismatches allowed between loci when processing a single individual" help="used in ustacks"/>
46 <param name="n" argument="-n" type="integer" value="1" label="Number of mismatches allowed between loci when building the catalog" help="used in cstacks; suggested: set to -M"/>
47 </section>
48 <section name="model_options" title="SNP model options" expanded="true">
49 <expand macro="variant_calling_options_vg" varalpha_default="0.01"/>
50 </section>
51 <section name="pe_options" title="Paired-end options" expanded="true">
52 <param argument="--rm-pcr-duplicates" name="rm_pcr_duplicates" type="boolean" checked="false" truevalue="--rm-pcr-duplicates" falsevalue="" label="remove all but one set of read pairs of the same sample that have the same insert length" help="" />
53 </section>
54 <section name="popfilter_options" title="Population filtering options" expanded="true">
55 <param argument="--min-samples-per-pop" name="min_samples_per_pop" type="integer" value="0" label="minimum percentage of individuals in a population required to process a locus for that population" help="(for populations; default: 0)"/>
56 <param argument="--min-populations" name="min_populations" type="integer" value="1" label="minimum number of populations a locus must be present in to process a locus" help="(for populations; default: 1)" />
57 </section>
58 <expand macro="in_log"/>
59 </inputs>
60 <outputs>
61 <expand macro="out_log"/> <!-- pipeline also writes tsv2bam.log, gstacks.log, populations.log .. could be a collection -->
62 <expand macro="ustacks_outputs_macro" tooladd="(ustacks)"/>
63 <expand macro="cstacks_outputs_macro" tooladd="(cstacks)"/>
64 <expand macro="sstacks_outputs_macro" tooladd="(sstacks)"/>
65 <expand macro="tsv2bam_outputs_macro" tooladd="(tsv2bam)"/>
66 <expand macro="gstacks_outputs_macro" tooladd="(gstacks)"/>
67 <expand macro="populations_output_light" tooladd="(populations)"/>
68 </outputs>
69
70 <tests>
71 <!-- paired input using also reverse reads,
72 the results are tested for equality, the tools corresponding the the programs
73 used in the pipeline have also a test (should be the first) that test for
74 equality thereby it is "ensured" that the pipeline w defaults is doing the same
75 as the components w defaults. -->
76 <test>
77 <param name="input_type|input_type_select" value="paired"/>
78 <param name="input_type|fqinputs">
79 <collection type="list:paired">
80 <element name="PopA_01">
81 <collection type="paired">
82 <element name="forward" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" />
83 <element name="reverse" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger"/>
84 </collection>
85 </element>
86 <element name="PopA_02">
87 <collection type="paired">
88 <element name="forward" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger" />
89 <element name="reverse" value="demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/>
90 </collection>
91 </element>
92 </collection>
93 </param>
94 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
95 <output_collection name="tabs" count="6">
96 <element name="PopA_01.tags" file="ustacks/PopA_01.tags.tsv" ftype="tabular" lines_diff="2"/>
97 <element name="PopA_01.snps" file="ustacks/PopA_01.snps.tsv" ftype="tabular" lines_diff="2"/>
98 <element name="PopA_01.alleles" file="ustacks/PopA_01.alleles.tsv" ftype="tabular" lines_diff="2"/>
99 <element name="PopA_02.tags" file="ustacks/PopA_02.tags.tsv" ftype="tabular" lines_diff="2"/>
100 <element name="PopA_02.snps" file="ustacks/PopA_02.snps.tsv" ftype="tabular" lines_diff="2"/>
101 <element name="PopA_02.alleles" file="ustacks/PopA_02.alleles.tsv" ftype="tabular" lines_diff="2"/>
102 </output_collection>
103 <output_collection name="catalog" type="list" count="3">
104 <element name="catalog.alleles" file="cstacks/catalog.alleles.tsv" ftype="tabular" lines_diff="2"/>
105 <element name="catalog.snps" file="cstacks/catalog.snps.tsv" ftype="tabular" lines_diff="2" />
106 <element name="catalog.tags" file="cstacks/catalog.tags.tsv" ftype="tabular" lines_diff="2" />
107 </output_collection>
108 <output_collection name="matches" type="list" count="2">
109 <element name="PopA_01.matches" file="sstacks/PopA_01.matches.tsv" ftype="tabular" lines_diff="2"/>
110 <element name="PopA_02.matches" file="sstacks/PopA_02.matches.tsv" ftype="tabular" lines_diff="2"/>
111 </output_collection>
112 <output_collection name="bams" type="list" count="2">
113 <element name="PopA_01.matches" file="tsv2bam/PopA_01.matches.bam" ftype="bam" compare="sim_size"/>
114 <element name="PopA_02.matches" file="tsv2bam/PopA_02.matches.bam" ftype="bam" compare="sim_size" />
115 </output_collection>
116 <output_collection name="gstacks_out" type="list" count="2">
117 <element name="catalog.calls.vcf" file="gstacks/catalog.calls.vcf" ftype="vcf" lines_diff="2"/>
118 <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz" compare="sim_size"/>
119 </output_collection>
120 <output ftype="tabular" name="out_haplotypes" value="populations/populations.haplotypes.tsv"/>
121 <output ftype="tabular" name="out_hapstats" value="populations/populations.hapstats.tsv" compare="sim_size"/>
122 <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs" compare="sim_size"/>
123 <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/>
124 <output ftype="tabular" name="out_sumstats" value="populations/populations.sumstats.tsv" compare="sim_size"/>
125 <output ftype="tabular" name="out_sql" value="populations/populations.markers.tsv"/>
126 </test>
127 <!-- SE input as multi selection, defaults testing against the output of the pipeline components -->
128 <test>
129 <param name="input_type|input_type_select" value="single"/>
130 <param name="input_type|fqinputs" value="demultiplexed/PopA_01.1.fq,demultiplexed/PopA_02.1.fq" ftype="fastqsanger" />
131 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
132 <output_collection name="tabs" count="6"/>
133 <output_collection name="catalog" type="list" count="3"/>
134 <output_collection name="matches" type="list" count="2"/>
135 <output_collection name="bams" type="list" count="2"/>
136 <output_collection name="gstacks_out" type="list" count="2"/>
137 <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text="#"/></assert_contents></output>
138 <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text="#"/></assert_contents></output>
139 <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text="#"/></assert_contents></output>
140 <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text="#"/></assert_contents></output>
141 <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text="#"/></assert_contents></output>
142 <output ftype="tabular" name="out_sql"><assert_contents><has_text text="#"/></assert_contents></output>
143 </test>
144 <!-- SE input as list, non-defaults, testing only correct size of the collections -->
145 <test>
146 <param name="input_type|input_type_select" value="paired"/>
147 <param name="input_type|fqinputs">
148 <collection type="list">
149 <element name="PopA_01" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" />
150 <element name="PopA_02" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger"/>
151 </collection>
152 </param>
153 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
154 <param name="assembly_options|M" value="3" />
155 <param name="assembly_options|n" value="3" />
156 <param name="model_options|var_alpha" value="0.1" />
157 <param name="model_options|gt_alpha" value="0.1" />
158 <param name="pe_options|rm_pcr_duplicates" value="--rm-pcr-duplicates" />
159 <param name="popfilter_options|min_samples_per_pop" value="1"/>
160 <param name="popfilter_options|min_populations" value="0" />
161 <assert_command>
162 <has_text text="-M 3" />
163 <has_text text="-n 3" />
164 <has_text text="\-\-var-alpha 0.1" />
165 <has_text text="\-\-gt-alpha 0.1" />
166 <has_text text="\-\-rm-pcr-duplicates" />
167 <has_text text="\-\-min-samples-per-pop 1" />
168 <has_text text="\-\-min-populations 0" />
169 </assert_command>
170 <output_collection name="tabs" count="6"/>
171 <output_collection name="catalog" type="list" count="3"/>
172 <output_collection name="matches" type="list" count="2"/>
173 <output_collection name="bams" type="list" count="2"/>
174 <output_collection name="gstacks_out" type="list" count="2"/>
175 <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text="#"/></assert_contents></output>
176 <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text="#"/></assert_contents></output>
177 <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text="#"/></assert_contents></output>
178 <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text="#"/></assert_contents></output>
179 <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text="#"/></assert_contents></output>
180 <output ftype="tabular" name="out_sql"><assert_contents><has_text text="#"/></assert_contents></output>
181 </test>
182 </tests>
183
184 <help>
185 <![CDATA[
186 .. class:: infomark
187
188 **What it does**
189
190 This program will run each of the Stacks components: first, running ustacks on each of the samples specified, building loci and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci that were marked as 'parents' or 'samples' on the command line, and finally, sstacks will be executed to match each sample against the catalog. A bit more detail on this process can be found in the FAQ. The denovo_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching, the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.
191
192 --------
193
194 **Input files**
195
196 FASTQ, FASTA
197
198 - Population map::
199
200 indv_01 1
201 indv_02 1
202 indv_03 1
203 indv_04 2
204 indv_05 2
205 indv_06 2
206
207
208 **Output files**
209
210
211 - XXX.tags.tsv file:
212
213 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
214
215 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
216
217
218 - XXX.snps.tsv file:
219
220 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
221
222 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
223
224
225 - XXX.alleles.tsv file:
226
227 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
228
229
230 - XXX.matches.tsv file:
231
232 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
233
234 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
235
236
237 - other files:
238
239 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
240
241 @STACKS_INFOS@
242 ]]>
243 </help>
244 <expand macro="citation" />
245 </tool>