Mercurial > repos > iuc > purge_dups
comparison purge_dups.xml @ 4:a315c25dc813 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/purge_dups commit d2ef7bd6598695a681446eaf9c5b8c142e8a0199"
author | iuc |
---|---|
date | Tue, 12 Oct 2021 19:07:05 +0000 |
parents | 76d4cbefff85 |
children |
comparison
equal
deleted
inserted
replaced
3:76d4cbefff85 | 4:a315c25dc813 |
---|---|
1 <tool id="purge_dups" name="Purge overlaps" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> | 1 <tool id="purge_dups" name="Purge overlaps" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> |
2 <description>and haplotigs in an assembly based on read depth (purge_dups)</description> | 2 <description>and haplotigs in an assembly based on read depth (purge_dups)</description> |
3 <macros> | 3 <macros> |
4 <token name="@TOOL_VERSION@">1.2.5</token> | 4 <import>macros.xml</import> |
5 <token name="@VERSION_SUFFIX@">3</token> | |
6 <xml name="trimmers"> | |
7 <section name="section_hist" title="Histogram plot options" > | |
8 <!--<param name="cutoffs_his" type="data" optional="true" format="txt" label="Read depth cutoffs file" />--> | |
9 <param argument="--ymin" type="integer" optional="true" min="0" label="Specify a minimum for the Y axis"/> | |
10 <param argument="--ymax" type="integer" optional="true" label="Specify a maximum for the Y axis"/> | |
11 <param argument="--xmin" type="integer" optional="true" min="0" label="Specify a minimum for the X axis"/> | |
12 <param argument="--xmax" type="integer" optional="true" label="Specify a maximum for the X axis"/> | |
13 <param argument="--title" type="text" value="Read depth histogram plot" label="Histogram title"/> | |
14 </section> | |
15 </xml> | |
16 <token name="@HIST_PLOT@"><![CDATA[ | |
17 python '$__tool_directory__/hist_plot.py' | |
18 --cutoffs cutoffs.tsv | |
19 #if $function_select.section_hist.ymin | |
20 --ymin $function_select.section_hist.ymin | |
21 #end if | |
22 #if $function_select.section_hist.ymax | |
23 --ymax $function_select.section_hist.ymax | |
24 #end if | |
25 #if $function_select.section_hist.xmin | |
26 --xmin $function_select.section_hist.xmin | |
27 #end if | |
28 #if $function_select.section_hist.xmax | |
29 --xmax $function_select.section_hist.xmax | |
30 #end if | |
31 #if $function_select.section_hist.title | |
32 --title '${function_select.section_hist.title}' | |
33 #end if | |
34 depth.stat hist.png | |
35 ]]></token> | |
36 <token name="@CALCUTS@"><![CDATA[ | |
37 calcuts | |
38 #if $function_select.section_calcuts.min_depth: | |
39 -f $function_select.section_calcuts.min_depth | |
40 #end if | |
41 #if $function_select.section_calcuts.low_depth: | |
42 -l $function_select.section_calcuts.low_depth | |
43 #end if | |
44 #if $function_select.section_calcuts.transition: | |
45 -m $function_select.section_calcuts.transition | |
46 #end if | |
47 #if $function_select.section_calcuts.upper_depth: | |
48 -u $function_select.section_calcuts.upper_depth | |
49 #end if | |
50 $function_select.section_calcuts.ploidy | |
51 ]]></token> | |
52 <xml name="calcuts"> | |
53 <section name="section_calcuts" title="Calcuts options"> | |
54 <param name="min_depth" type="float" label="Minimum depth count fraction to maximum depth coun" min="0" max="1" argument="-f" optional="true" help="Default = 0.1"/> | |
55 <param name="low_depth" label="Lower bound for read depth" type="integer" argument="-l" optional="true"/> | |
56 <param name="transition" label="Transition between haploid and diploid" type="integer" argument="-m" optional="true"/> | |
57 <param name="upper_depth" label="Upper bound for read depth" type="integer" argument="-u" optional="true"/> | |
58 <param name="ploidy" argument="-d" type="select" label="Ploidy"> | |
59 <option value="-d 0" selected="true">Diploid [0]</option> | |
60 <option value="-d 1">Haploid [1]</option> | |
61 </param> | |
62 </section> | |
63 </xml> | |
64 </macros> | 5 </macros> |
65 <requirements> | 6 <expand macro="xrefs"/> |
66 <requirement type="package" version="@TOOL_VERSION@">purge_dups</requirement> | 7 <expand macro="requirements"/> |
67 <requirement type="package" version="3.4.2">matplotlib-base</requirement> | |
68 </requirements> | |
69 <command detect_errors="exit_code"><![CDATA[ | 8 <command detect_errors="exit_code"><![CDATA[ |
70 #if $function_select.functions == 'purge_dups': | 9 #if $function_select.functions == 'purge_dups': |
71 #for $i, $file in enumerate($function_select.input): | 10 #for $i, $file in enumerate($function_select.input): |
72 #if $file.is_of_type("paf"): | 11 #if $file.is_of_type("paf"): |
73 gzip -c '${file}' > '${i}.gz' && | 12 gzip -c '${file}' > '${i}.gz' && |
80 -c '${function_select.coverage}' | 19 -c '${function_select.coverage}' |
81 #end if | 20 #end if |
82 #if $function_select.cutoffs: | 21 #if $function_select.cutoffs: |
83 -T '${function_select.cutoffs}' | 22 -T '${function_select.cutoffs}' |
84 #end if | 23 #end if |
85 #if $function_select.min_bad: | 24 -f $function_select.min_bad |
86 -f $function_select.min_bad | 25 -a $function_select.min_align |
87 #end if | 26 -b $function_select.min_match |
88 #if $function_select.min_align: | 27 -m $function_select.min_chain |
89 -a $function_select.min_align | 28 -M $function_select.max_gap |
90 #end if | |
91 #if $function_select.min_match: | |
92 -b $function_select.min_match | |
93 #end if | |
94 #if $function_select.min_chain: | |
95 -m $function_select.min_chain | |
96 #end if | |
97 #if $function_select.max_gap: | |
98 -M $function_select.max_gap | |
99 #end if | |
100 #if $function_select.double_chain.chaining_rounds == 'two': | 29 #if $function_select.double_chain.chaining_rounds == 'two': |
101 -2 | 30 -2 |
102 #if $function_select.double_chain.max_gap_2: | 31 -G $function_select.double_chain.max_gap_2 |
103 -G $function_select.double_chain.max_gap_2 | |
104 #end if | |
105 #end if | 32 #end if |
106 #if $function_select.min_chain_score: | 33 -l $function_select.min_chain_score |
107 -l $function_select.min_chain_score | 34 -E $function_select.max_extend |
108 #end if | |
109 #if $function_select.max_extend: | |
110 -E $function_select.max_extend | |
111 #end if | |
112 #for $i, $file in enumerate($function_select.input): | 35 #for $i, $file in enumerate($function_select.input): |
113 '${i}.gz' | 36 '${i}.gz' |
114 #end for | 37 #end for |
115 > dups.bed 2> purge_dups.log | 38 > dups.bed 2> purge_dups.log |
116 #else if $function_select.functions == 'split_fa': | 39 #else if $function_select.functions == 'split_fa': |
117 split_fa | 40 split_fa |
118 #if $function_select.split: | |
119 -n $function_select.split | |
120 #end if | |
121 '${function_select.input}' > split.fasta | 41 '${function_select.input}' > split.fasta |
122 #else if $function_select.functions == 'pbcstat': | 42 #else if $function_select.functions == 'pbcstat': |
123 #for $i, $file in enumerate($function_select.input): | 43 #for $i, $file in enumerate($function_select.input): |
124 #if $file.is_of_type('paf'): | 44 #if $file.is_of_type('paf'): |
125 gzip -c '${file}' > '${i}.gz' && | 45 gzip -c '${file}' > '${i}.gz' && |
126 #else | 46 #else |
127 ln -s '${file}' '${i}.gz' && | 47 ln -s '${file}' '${i}.gz' && |
128 #end if | 48 #end if |
129 #end for | 49 #end for |
130 pbcstat | 50 pbcstat |
131 #if $function_select.max_cov: | 51 -M $function_select.pbcstat_options.max_cov |
132 -M $function_select.max_cov | 52 -f $function_select.pbcstat_options.min_map_ratio |
53 #if $function_select.pbcstat_options.min_map_qual: | |
54 -q $function_select.pbcstat_options.min_map_qual | |
133 #end if | 55 #end if |
134 #if $function_select.min_map_ratio: | 56 -l $function_select.pbcstat_options.flank |
135 -f $function_select.min_map_ratio | 57 $function_select.pbcstat_options.primary_alignments |
136 #end if | |
137 #if $function_select.min_map_qual: | |
138 -q $function_select.min_map_qual | |
139 #end if | |
140 #if $function_select.flank: | |
141 -l $function_select.flank | |
142 #end if | |
143 $function_select.primary_alignments | |
144 | |
145 #for $i, $file in enumerate($function_select.input): | 58 #for $i, $file in enumerate($function_select.input): |
146 '${i}.gz' | 59 '${i}.gz' |
147 #end for | 60 #end for |
148 && mv PB.stat depth.stat | 61 && mv PB.stat depth.stat |
149 && @CALCUTS@ depth.stat > cutoffs.tsv 2>calcuts.log | 62 && @CALCUTS@ depth.stat > cutoffs.tsv 2>calcuts.log |
150 && @HIST_PLOT@ | 63 && @HIST_PLOT@ |
151 | 64 |
152 #else if $function_select.functions == 'ngscstat': | 65 #else if $function_select.functions == 'ngscstat': |
153 ngscstat | 66 ngscstat |
154 #if $function_select.min_align_qual: | 67 -q $function_select.ngscstat_options.min_align_qual |
155 -q $function_select.min_align_qual | |
156 #end if | |
157 ## #if $function_select.max_depth: | 68 ## #if $function_select.max_depth: |
158 ## -M $function_select.max_depth | 69 ## -M $function_select.max_depth |
159 ## #end if | 70 ## #end if |
160 #if $function_select.max_insert: | 71 -L $function_select.ngscstat_options.max_insert |
161 -L $function_select.max_insert | |
162 #end if | |
163 '${function_select.input}' | 72 '${function_select.input}' |
164 && mv TX.stat depth.stat | 73 && mv TX.stat depth.stat |
165 && @CALCUTS@ depth.stat > cutoffs.tsv 2>calcuts.log | 74 && @CALCUTS@ depth.stat > cutoffs.tsv 2>calcuts.log |
166 && @HIST_PLOT@ | 75 && @HIST_PLOT@ |
167 | 76 |
168 #else if $function_select.functions == 'calcuts': | 77 #else if $function_select.functions == 'calcuts': |
169 @CALCUTS@ '${function_select.input}' > cutoffs.tsv 2>calcuts.log | 78 @CALCUTS@ '${function_select.input}' > cutoffs.tsv 2>calcuts.log |
170 | 79 |
171 #else if $function_select.functions == 'get_seqs': | 80 #else if $function_select.functions == 'get_seqs': |
172 get_seqs | 81 get_seqs |
173 $function_select.coverage | 82 $function_select.advanced_options.coverage |
174 $function_select.haplotigs | 83 $function_select.advanced_options.haplotigs |
175 $function_select.end_trim | 84 $function_select.advanced_options.end_trim |
176 $function_select.split | 85 $function_select.advanced_options.split |
177 #if $function_select.length: | 86 -l $function_select.advanced_options.length |
178 -l $function_select.length | 87 -m $function_select.advanced_options.min_ratio |
179 #end if | 88 -g $function_select.advanced_options.min_gap |
180 #if $function_select.min_ratio: | |
181 -m $function_select.min_ratio | |
182 #end if | |
183 #if $function_select.min_gap: | |
184 -g $function_select.min_gap | |
185 #end if | |
186 '${function_select.bed_input}' '${function_select.fasta_input}' | 89 '${function_select.bed_input}' '${function_select.fasta_input}' |
187 #end if | 90 #end if |
188 ]]></command> | 91 ]]></command> |
189 <inputs> | 92 <inputs> |
190 <conditional name="function_select"> | 93 <conditional name="function_select"> |
191 <param type="select" name="functions" label="Select the purge_dups function"> | 94 <param type="select" name="functions" label="Function mode"> |
95 <option value="pbcstat">Calculate coverage cutoff, base-level read depth and create read depth histogram for PacBio data (calcuts+pbcstat)</option> | |
96 <option value="ngscstat">Calculate coverage cutoff, base-level read depth and create read depth histogram for Illumina data (calcuts+ngscstat)</option> | |
97 <option value="calcuts">Calculate coverage cutoffs (calcuts)</option> | |
98 <option value="split_fa">Split assembly FASTA files by 'N's (split_fa)</option> | |
192 <option value="purge_dups">Purge haplotigs and overlaps for an assembly (purge_dups)</option> | 99 <option value="purge_dups">Purge haplotigs and overlaps for an assembly (purge_dups)</option> |
193 <option value="split_fa">Split FASTA file by 'N's (split_fa)</option> | 100 <option value="get_seqs">Obtain sequences after purging (get_seqs)</option> |
194 <option value="pbcstat">Calculate coverage cutoff and create read depth histogram and base-levelread depth for PacBio data (calcuts+pbcstats)</option> | |
195 <option value="ngscstat">Calculate coverage cutoff and create read depth histogram and base-level read detph for Illumina data (calcuts+ngscstat)</option> | |
196 <option value="calcuts">calculate coverage cutoffs (calcuts)</option> | |
197 <option value="get_seqs">Obtain seqeuences after purging (get_seqs)</option> | |
198 </param> | 101 </param> |
199 <when value="purge_dups"> | 102 <when value="purge_dups"> |
200 <param name="input" type="data" format="paf,paf.gz" multiple="true" label="PAF input file"/> | 103 <param name="input" type="data" format="paf,paf.gz" multiple="true" label="PAF input file"/> |
201 <param name="coverage" type="data" format="tabular" optional="true" argument="-c" label="Base-level coverage file" /> | 104 <param argument="-c" name="coverage" type="data" |
202 <param name="cutoffs" type="data" format="tabular" label ="Cutoffs file" optional="true" argument="-T"/> | 105 format="tabular" optional="true" label="Base-level coverage file" |
203 <param name="min_bad" type="float" min="0" max="1" argument="-f" optional="true" label="Minimum fraction of haploid/diploid/bad/repetitive bases in a sequence" help="Default = 0.8"/> | 106 help="This file is generated with purge_dups by using the 'Calculate coverage cutoff, base-level |
204 <param name="min_align" type="integer" label="Minimum alignment score" argument="-a" optional="true"/> | 107 read depth and create read depth histogram' option"/> |
205 <param name="min_match" type="integer" label="Minimum max match score" argument="-b" optional="true"/> | 108 <param argument="-T" name="cutoffs" type="data" |
206 <param name="min_chain" label="Minimum matching bases for chaining" type="integer" argument="-m" optional="true"/> | 109 format="tabular" optional="true" label ="Cutoffs file" |
207 <param name="max_gap" label="Maximum gap size for chaining" type="integer" argument="-M" optional="true"/> | 110 help="This file is generated with purge_dups by using the 'Calculate coverage cutoff, base-level |
111 read depth and create read depth histogram' option"/> | |
112 <param argument="-f" name="min_bad" type="float" | |
113 min="0" max="1" value="0.8" label="Minimum fraction of haploid/diploid/bad/repetitive bases in a sequence" | |
114 help="This parameter is set for a suspect haplotigs. If 80% (default value) of a scaffold is high coverage | |
115 (defined by the sixth column of the cutoff file), it's a repetitive contig. If 80% is low coverage (defined | |
116 in the first column of the cutoff file), it's a junk contig. If 80% is above diploid coverage(defined in | |
117 the fourth column of the cutoff file), it's a diploid, otherwise it's a suspect haplotig"/> | |
118 <param argument="-a" name="min_align" type="integer" | |
119 min="0" value="70" label="Minimum alignment score" | |
120 help="If its alignment score is larger than this parameter and max match score larger than | |
121 the 'mininum max match score' (-b), it is marked as a repeat; if the alignment score is larger than this parameter | |
122 and max match score no larger than the 'mininum max match score', it is marked as a haplotig. | |
123 Otherwise it is left as a candidate primary contig. If after purging, the complete genes reported by BUSCO are | |
124 too low, you can try to increase this parameter"/> | |
125 <param argument="-b" name="min_match" type="integer" | |
126 min="0" value="200" label="Minimum max match score" | |
127 help="If its alignment score is larger than the 'minimum align score' (-a) and max match score larger than | |
128 this parameter, it is marked as a repeat; if the alignment score is larger than the 'minimum | |
129 align' and max match score no larger than this parameter, it is marked as a haplotig. | |
130 Otherwise it is left as a candidate primary contig."/> | |
131 <param argument="-m" name="min_chain" type="integer" | |
132 min="0" value="500" label="Minimum matching bases for chaining" | |
133 help="In the first round, it will asset chains consistent alignments within this parameter value"/> | |
134 <param argument="-M" name="max_gap" type="integer" | |
135 min="0" value="20000" label="Maximum gap size for chaining" | |
136 help="In the first round, it will asset chains consistent alignments within this parameter value"/> | |
208 <conditional name="double_chain"> | 137 <conditional name="double_chain"> |
209 <param type="select" name="chaining_rounds" label="Rounds of chaining"> | 138 <param type="select" name="chaining_rounds" label="Rounds of chaining"> |
210 <option value="one">1 round</option> | 139 <option value="one">1 round</option> |
211 <option value="two">2 rounds</option> | 140 <option value="two">2 rounds</option> |
212 </param> | 141 </param> |
213 <when value="two"> | 142 <when value="two"> |
214 <param name="max_gap_2" argument="-G" optional="true" label="Maximum gap size for second round of chaining" type="integer"/> | 143 <param argument="-G" name="max_gap_2" type="integer" |
144 min="0" value="50000" label="Maximum gap size for second round of chaining" | |
145 help="In the second round, it will asset chains consistent alignments within this parameter value"/> | |
215 </when> | 146 </when> |
216 <when value="one"/> | 147 <when value="one"/> |
217 </conditional> | 148 </conditional> |
218 <param name="min_chain_score" argument="-l" optional="true" label="Minimum chaining score for a match" type="integer" /> | 149 <param argument="-l" name="min_chain_score" type="integer" |
219 <param name="max_extend" argument="-E" optional="true" label="Maximum extension for contig ends" type="integer" /> | 150 min="0" value="10000" label="Minimum chaining score for a match" |
151 help="This parameter control the overlap size. You should decrease its value to allow more overlaps"/> | |
152 <param argument="-E" name="max_extend" type="integer" | |
153 min="0" value="15000" label="Maximum extension for contig ends" | |
154 help="If the chained alignment is within this value to the contig ends, it will extended to the ends"/> | |
155 <param name="log_file" type="boolean" truevalue="true" falsevalue="false" label="Generate log file"/> | |
220 </when> | 156 </when> |
221 <when value="split_fa"> | 157 <when value="split_fa"> |
222 <param name="input" type="data" format="fasta" label="Base-level coverage file"/> | 158 <param name="input" type="data" format="fasta,fasta.gz" label="Assembly FASTA file" help="The sequence will be cleaved in those position in which the nucleotides is an 'N' or an 'n'."/> |
223 <param name="split" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Base-level coverage file" /> | 159 <!-- This option disables the cleaving process, and yield the original sequence |
160 <param argument="-n" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Block split by N" help="Enable this option if you do not want break your scaffols into contigs."/> | |
161 --> | |
224 </when> | 162 </when> |
225 <when value="pbcstat"> | 163 <when value="pbcstat"> |
226 <param name="input" type="data" format="paf,paf.gz" multiple="true" label="PAF input file"/> | 164 <param name="input" type="data" format="paf,paf.gz" multiple="true" label="PAF input file"/> |
227 <param name="max_cov" type="integer" label="Maximum coverage" argument="-M" optional="true"/> | 165 <section name="pbcstat_options" title="PBCSTAT options" expanded="true"> |
228 <param name="min_map_ratio" argument="-f" type="float" min="0" max="1" value="0" label="Minimum mapping length ratio"/> | 166 <param argument="-M" name="max_cov" type="integer" min="0" value="500" label="Maximum coverage"/> |
229 <param name="min_map_qual" type="integer" argument="-q" optional="true" label="Minimum mapping quality"/> | 167 <param argument="-f" name="min_map_ratio" type="float" min="0" max="1" value="0" label="Minimum mapping length ratio"/> |
230 <param name="flank" type="integer" argument="-l" optional="true" label="Flanking space" /> | 168 <param argument="-q" name="min_map_qual" type="integer" optional="true" label="Minimum mapping quality"/> |
231 <param name="primary_alignments" argument="-p" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Use only primary alignments" /> | 169 <param argument="-l" name="flank" type="integer" min="0" value="0" label="Flanking space" /> |
170 <param argument="-p" name="primary_alignments" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Use only primary alignments" /> | |
171 </section> | |
232 <expand macro="calcuts" /> | 172 <expand macro="calcuts" /> |
233 <expand macro="trimmers"/> | 173 <expand macro="trimmers"/> |
174 <expand macro="output_macro"> | |
175 <option value="pbcstat_coverage" selected="true">PBCSTAT base coverage</option> | |
176 <option value="pbcstat_wig">PBCSTAT base coverage (WIG)</option> | |
177 <option value="depth_stats">PBCSTAT depths</option> | |
178 </expand> | |
234 </when> | 179 </when> |
235 <when value="ngscstat"> | 180 <when value="ngscstat"> |
236 <param name="input" type="data" format="bam" label="BAM input file"/> | 181 <param name="input" type="data" format="bam" label="BAM input file"/> |
237 <param name="min_align_qual" type="integer" argument="-q" optional="true" label="Minimum alignment quality" /> | 182 <section name="ngscstat_options" title="NGSCSTAT options" expanded="true"> |
238 <!-- Param exists in help text, but isn't actually part of the code. Maybe in the next release? --> | 183 <param argument="-q" name="min_align_qual" type="integer" min="0" value="30" label="Minimum alignment quality" /> |
239 <!-- <param name="max_depth" type="integer" label="Maximum read depth" argument="-M" optional="true"/> --> | 184 <!-- Param exists in help text, but isn't actually part of the code. Maybe in the next release? --> |
240 <param name="max_insert" type="integer" argument="-L" optional="true" label="Maximum insert size"/> | 185 <!-- <param name="max_depth" type="integer" label="Maximum read depth" argument="-M" optional="true"/> --> |
186 <param argument="-L" name="max_insert" type="integer" min="0" value="1000" label="Maximum insert size"/> | |
187 </section> | |
241 <expand macro="calcuts" /> | 188 <expand macro="calcuts" /> |
242 <expand macro="trimmers"/> | 189 <expand macro="trimmers"/> |
190 <expand macro="output_macro"> | |
191 <option value="ngscstat_coverage" selected="true">NGSCSTAT base coverage</option> | |
192 </expand> | |
243 </when> | 193 </when> |
244 | 194 |
245 <when value="calcuts"> | 195 <when value="calcuts"> |
246 <param name="input" type="data" format="tabular" label="STAT input file"/> | 196 <param name="input" type="data" format="tabular" label="Depths input file"/> |
247 <expand macro="calcuts" /> | 197 <expand macro="calcuts" /> |
248 </when> | 198 </when> |
249 | |
250 <when value="get_seqs"> | 199 <when value="get_seqs"> |
251 <param name="fasta_input" type="data" format="fasta" label="Fasta input file"/> | 200 <param name="fasta_input" type="data" format="fasta" label="Assembly FASTA file"/> |
252 <param name="bed_input" type="data" format="bed" label="Bed input file"/> | 201 <param name="bed_input" type="data" format="bed" label="BED input file" help="Generated by the 'purge_dups' function mode."/> |
253 <param name="coverage" type="boolean" argument="-c" truevalue="-c" falsevalue="" checked="false" label="Keep high coverage contigs in the primary contig set"/> | 202 <section name="advanced_options" title="Advanced options"> |
254 <param name="haplotigs" type="boolean" argument="-a" truevalue="-a" falsevalue="" checked="false" label="Do not add prefix to haplotigs"/> | 203 <param argument="-c" name="coverage" type="boolean" |
255 <param name="length" type="integer" argument="-l" optional="true" label="Minimum primary contig length" help="Default: 1000"/> | 204 truevalue="-c" falsevalue="" checked="false" label="Keep high coverage contigs in the primary contig set"/> |
256 <param name="min_ratio" type="float" min="0" max="1" argument="-m" optional="true" label="Minimum ratio of remaining primary contig length to the original contig length"/> | 205 <param argument="-a" name="haplotigs" type="boolean" truevalue="-a" falsevalue="" checked="false" label="Do not add prefix to haplotigs"/> |
257 <param name="end_trim" type="boolean" argument="-e" truevalue="-e" falsevalue="" checked="true" label="Trim end sequences" help="Only remove sequences at end of halplotigs If you also want to remove the duplications in the middle, set to false, however that may delete false positive duplications."/> | 206 <param argument="-l" name="length" type="integer" min="0" value="10000" label="Minimum primary contig length" /> |
258 <param name="split" type="boolean" argument="-s" truevalue="-s" falsevalue="" checked="false" label="Split contigs"/> | 207 <param argument="-m" name="min_ratio" type="float" |
259 <param name="min_gap" type="integer" argument="-g" optional="true" help="default=10k" label="Minimum gap size between duplications" /> | 208 min="0" max="1" value="0.05" label="Minimum ratio of remaining primary contig length to the original contig length"/> |
209 <param argument="-e" name="end_trim" type="boolean" | |
210 truevalue="-e" falsevalue="" checked="true" label="Trim end sequences" | |
211 help="Only remove sequences at end of halplotigs. If you also want to remove the duplications in the middle, | |
212 set to false, however that may delete false positive duplications."/> | |
213 <param argument="-s" name="split" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Split contigs"/> | |
214 <param argument="-g" name="min_gap" type="integer" min="0" value="10000" label="Minimum gap size between duplications" /> | |
215 </section> | |
260 </when> | 216 </when> |
261 </conditional> | 217 </conditional> |
262 </inputs> | 218 </inputs> |
263 <outputs> | 219 <outputs> |
264 <!-- Get Seqs --> | 220 <!-- Get Seqs --> |
265 <data name="get_seqs_hap" format="fasta" from_work_dir="hap.fa" label="${tool.name} on ${on_string}: get seqs haplotype fasta" > | 221 <data name="get_seqs_hap" format="fasta" from_work_dir="hap.fa" label="${tool.name} on ${on_string}: get_seqs haplotype" > |
266 <filter>function_select['functions'] == 'get_seqs'</filter> | 222 <filter>function_select['functions'] == 'get_seqs'</filter> |
267 </data> | 223 </data> |
268 <data name="get_seqs_purged" format="fasta" from_work_dir="purged.fa" label="${tool.name} on ${on_string}: get seqs purged fasta"> | 224 <data name="get_seqs_purged" format="fasta" from_work_dir="purged.fa" label="${tool.name} on ${on_string}: get_seqs purged sequences"> |
269 <filter>function_select['functions'] == 'get_seqs'</filter> | 225 <filter>function_select['functions'] == 'get_seqs'</filter> |
270 </data> | 226 </data> |
271 <!-- Split FA --> | 227 <!-- Split FA --> |
272 <data name="split_fasta" format="fasta" from_work_dir="split.fasta" label="${tool.name} on ${on_string}: split fasta"> | 228 <data name="split_fasta" format="fasta" from_work_dir="split.fasta" label="${tool.name} on ${on_string}: split FASTA"> |
273 <filter>function_select['functions'] == 'split_fa'</filter> | 229 <filter>function_select['functions'] == 'split_fa'</filter> |
274 </data> | 230 </data> |
275 <!-- Ngscstat --> | 231 <!-- Ngscstat --> |
276 <data name="ngscstat_cov" format="tabular" from_work_dir="TX.base.cov" label="${tool.name} on ${on_string}: ngscstat base coverage file"> | 232 <data name="ngscstat_cov" format="tabular" from_work_dir="TX.base.cov" label="${tool.name} on ${on_string}: NGSCSTAT base coverage"> |
277 <filter>function_select['functions'] == 'ngscstat'</filter> | 233 <filter>function_select['functions'] == 'ngscstat'</filter> |
278 </data> | 234 <filter>'ngscstat_coverage' in function_select['output_options']</filter> |
279 <data name="stat_file" format="tabular" from_work_dir="depth.stat" label="${tool.name} on ${on_string}: stat file"> | 235 </data> |
236 <data name="stat_file" format="tabular" from_work_dir="depth.stat" label="${tool.name} on ${on_string}: depths"> | |
280 <filter>function_select['functions'] == 'ngscstat' or function_select['functions'] == 'pbcstat'</filter> | 237 <filter>function_select['functions'] == 'ngscstat' or function_select['functions'] == 'pbcstat'</filter> |
238 <filter>'depth_stats' in function_select['output_options']</filter> | |
281 </data> | 239 </data> |
282 <!-- Pbcstat --> | 240 <!-- Pbcstat --> |
283 <data name="pbcstat_cov" format="tabular" from_work_dir="PB.base.cov" label="${tool.name} on ${on_string}: pbcstat base coverage file"> | 241 <data name="pbcstat_cov" format="tabular" from_work_dir="PB.base.cov" label="${tool.name} on ${on_string}: PBCSTAT base coverage"> |
284 <filter>function_select['functions'] == 'pbcstat'</filter> | 242 <filter>function_select['functions'] == 'pbcstat'</filter> |
285 </data> | 243 <filter>'pbcstat_coverage' in function_select['output_options']</filter> |
286 <data name="pbcstat_wig" format="wig" from_work_dir="PB.cov.wig" label="${tool.name} on ${on_string}: pbcstat base wig file"> | 244 </data> |
245 <data name="pbcstat_wig" format="wig" from_work_dir="PB.cov.wig" label="${tool.name} on ${on_string}: PBCSTAT base coverage (WIG)"> | |
287 <filter>function_select['functions'] == 'pbcstat'</filter> | 246 <filter>function_select['functions'] == 'pbcstat'</filter> |
247 <filter>'pbcstat_wig' in function_select['output_options']</filter> | |
288 </data> | 248 </data> |
289 | 249 |
290 <data name="hist" format="png" from_work_dir="hist.png" label="${tool.name} on ${on_string}: histogram plot"> | 250 <data name="hist" format="png" from_work_dir="hist.png" label="${tool.name} on ${on_string}: histogram plot"> |
291 <filter>function_select['functions'] == 'pbcstat' or function_select['functions'] == 'ngscstat'</filter> | 251 <filter>function_select['functions'] == 'pbcstat' or function_select['functions'] == 'ngscstat'</filter> |
252 <filter>'histogram' in function_select['output_options']</filter> | |
292 </data> | 253 </data> |
293 | 254 |
294 <!-- Calcuts --> | 255 <!-- Calcuts --> |
295 <data name="calcuts_log" format="txt" from_work_dir="calcuts.log" label="${tool.name} on ${on_string}: calcuts log file"> | 256 <data name="calcuts_log" format="txt" from_work_dir="calcuts.log" label="${tool.name} on ${on_string}: calcuts log"> |
296 <filter>function_select['functions'] in ('pbcstat', 'ngscstat', 'calcuts')</filter> | 257 <filter>function_select['functions'] in ('pbcstat', 'ngscstat', 'calcuts')</filter> |
297 </data> | 258 <filter>'calcuts_log' in function_select['output_options']</filter> |
298 <data name="calcuts_tab" format="tabular" from_work_dir="cutoffs.tsv" label="${tool.name} on ${on_string}: calcuts cutoff file"> | 259 </data> |
260 <data name="calcuts_cutoff" format="tabular" from_work_dir="cutoffs.tsv" label="${tool.name} on ${on_string}: calcuts cutoff"> | |
299 <filter>function_select['functions'] in ('pbcstat', 'ngscstat', 'calcuts')</filter> | 261 <filter>function_select['functions'] in ('pbcstat', 'ngscstat', 'calcuts')</filter> |
262 <filter>'calcuts_cutoff' in function_select['output_options']</filter> | |
300 </data> | 263 </data> |
301 <!-- Purge dups --> | 264 <!-- Purge dups --> |
302 <data name="purge_dups_log" format="txt" from_work_dir="purge_dups.log" label="${tool.name} on ${on_string}: purge_dups log file"> | 265 <data name="purge_dups_log" format="txt" from_work_dir="purge_dups.log" label="${tool.name} on ${on_string}: purge_dups log"> |
303 <filter>function_select['functions'] == 'purge_dups'</filter> | 266 <filter>function_select['functions'] == 'purge_dups'</filter> |
304 </data> | 267 <filter>function_select['log_file']</filter> |
305 <data name="purge_dups_bed" format="bed" from_work_dir="dups.bed" label="${tool.name} on ${on_string}: purge_dups bed file"> | 268 </data> |
269 <data name="purge_dups_bed" format="bed" from_work_dir="dups.bed" label="${tool.name} on ${on_string}: purge_dups BED"> | |
306 <filter>function_select['functions'] == 'purge_dups'</filter> | 270 <filter>function_select['functions'] == 'purge_dups'</filter> |
307 </data> | 271 </data> |
308 </outputs> | 272 </outputs> |
309 <tests> | 273 <tests> |
310 <!-- Purge dups --> | 274 <!-- Test 1 Purge dups --> |
311 <test expect_num_outputs="2"> | 275 <test expect_num_outputs="1"> |
312 <conditional name="function_select"> | 276 <conditional name="function_select"> |
313 <param name="functions" value="purge_dups"/> | 277 <param name="functions" value="purge_dups"/> |
314 <param name="input" value="test.paf"/> | 278 <param name="input" value="assembly_test.paf"/> |
315 <param name="coverage" value="test.cov" ftype="tabular"/> | 279 <param name="coverage" value="test.cov" ftype="tabular"/> |
316 <param name="cutoffs" value="cutoffs.tsv" ftype="tabular"/> | 280 <param name="cutoffs" value="cutoffs.tsv" ftype="tabular"/> |
317 <param name="min_bad" value="0.01"/> | 281 <param name="min_bad" value="0.01"/> |
318 <param name="min_align" value="10"/> | 282 <param name="min_align" value="10"/> |
319 <param name="min_match" value="100"/> | 283 <param name="min_match" value="100"/> |
324 <param name="max_gap_2" value="1001"/> | 288 <param name="max_gap_2" value="1001"/> |
325 </conditional> | 289 </conditional> |
326 <param name="min_chain_score" value="1"/> | 290 <param name="min_chain_score" value="1"/> |
327 <param name="max_extend" value="100"/> | 291 <param name="max_extend" value="100"/> |
328 </conditional> | 292 </conditional> |
329 <output name="purge_dups_bed" value="purge_dups_out.bed"/> | 293 <output name="purge_dups_bed" value="purge_dups_01.bed" ftype="bed"/> |
330 </test> | 294 </test> |
331 <!-- Purge dups gzip --> | 295 <!-- Test 2 Purge dups gzip --> |
332 <test expect_num_outputs="2"> | 296 <test expect_num_outputs="2"> |
333 <conditional name="function_select"> | 297 <conditional name="function_select"> |
334 <param name="functions" value="purge_dups"/> | 298 <param name="functions" value="purge_dups"/> |
335 <param name="input" value="test.paf.gz" ftype="paf.gz"/> | 299 <param name="input" value="assembly_test.paf.gz" ftype="paf.gz"/> |
336 <param name="coverage" value="test.cov" ftype="tabular"/> | 300 <param name="coverage" value="test.cov" ftype="tabular"/> |
337 <param name="cutoffs" value="cutoffs.tsv" ftype="tabular"/> | 301 <param name="cutoffs" value="cutoffs.tsv" ftype="tabular"/> |
338 <param name="min_bad" value="0.01"/> | 302 <param name="min_bad" value="0.01"/> |
339 <param name="min_align" value="10"/> | 303 <param name="min_align" value="10"/> |
340 <param name="min_match" value="100"/> | 304 <param name="min_match" value="100"/> |
344 <param name="chaining_rounds" value="two"/> | 308 <param name="chaining_rounds" value="two"/> |
345 <param name="max_gap_2" value="1001"/> | 309 <param name="max_gap_2" value="1001"/> |
346 </conditional> | 310 </conditional> |
347 <param name="min_chain_score" value="1"/> | 311 <param name="min_chain_score" value="1"/> |
348 <param name="max_extend" value="100"/> | 312 <param name="max_extend" value="100"/> |
349 </conditional> | 313 <param name="log_file" value="true"/> |
350 <output name="purge_dups_bed" value="purge_dups_out.bed"/> | 314 </conditional> |
351 </test> | 315 <output name="purge_dups_bed" value="purge_dups_02.bed" ftype="bed"/> |
352 <!-- Purge dups multiple input --> | 316 <output name="purge_dups_log" value="purge_dups_log_02.txt" ftype="txt"/> |
353 <test expect_num_outputs="2"> | 317 |
318 </test> | |
319 <!-- Test 3 Purge dups multiple input --> | |
320 <test expect_num_outputs="1"> | |
354 <conditional name="function_select"> | 321 <conditional name="function_select"> |
355 <param name="functions" value="purge_dups"/> | 322 <param name="functions" value="purge_dups"/> |
356 <param name="input" value="test.paf,test2.paf.gz"/> | 323 <param name="input" value="assembly_test.paf,test2.paf.gz"/> |
357 </conditional> | 324 </conditional> |
358 <output name="purge_dups_bed" value="purge_dups_out_2.bed"/> | 325 <output name="purge_dups_bed" value="purge_dups_03.bed" ftype="bed"/> |
359 </test> | 326 </test> |
360 <!-- Split fa --> | 327 <!-- Test 4 Split fa --> |
361 <test expect_num_outputs="1"> | 328 <test expect_num_outputs="1"> |
362 <conditional name="function_select"> | 329 <conditional name="function_select"> |
363 <param name="functions" value="split_fa"/> | 330 <param name="functions" value="split_fa"/> |
364 <param name="input" value="test.fasta"/> | 331 <param name="input" value="assembly_test.fasta"/> |
365 <param name="split" value="-n"/> | 332 </conditional> |
366 </conditional> | 333 <output name="split_fasta" value="split_04.fasta" ftype="fasta"/> |
367 <output name="split_fasta" value="split_out.fasta"/> | 334 </test> |
368 </test> | 335 <!-- Test 5 pbcstat --> |
369 <!-- pbcstat --> | |
370 <test expect_num_outputs="6"> | 336 <test expect_num_outputs="6"> |
371 <conditional name="function_select"> | 337 <conditional name="function_select"> |
372 <param name="functions" value="pbcstat"/> | 338 <param name="functions" value="pbcstat"/> |
373 <param name="input" value="test.paf"/> | 339 <param name="input" value="assembly_test.paf"/> |
374 <param name="max_cov" value="1000"/> | 340 <section name="pbcstat_options"> |
375 <param name="min_map_ratio" value="0.01"/> | 341 <param name="max_cov" value="1000"/> |
376 <param name="min_map_qual" value="1"/> | 342 <param name="min_map_ratio" value="0.01"/> |
377 <param name="flank" value="1"/> | 343 <param name="min_map_qual" value="1"/> |
378 <param name="primary_alignments" value="-p"/> | 344 <param name="flank" value="1"/> |
345 <param name="primary_alignments" value="-p"/> | |
346 </section> | |
379 <section name="section_calcuts"> | 347 <section name="section_calcuts"> |
380 <param name="min_depth" value="0.01"/> | 348 <param name="min_depth" value="0.01"/> |
381 <param name="low_depth" value="1"/> | 349 <param name="low_depth" value="1"/> |
382 <param name="transition" value="1"/> | 350 <param name="transition" value="1"/> |
383 <param name="upper_depth" value="100"/> | 351 <param name="upper_depth" value="100"/> |
384 <param name="ploidy" value="-d 0"/> | 352 <param name="ploidy" value="-d 0"/> |
385 </section> | 353 </section> |
386 </conditional> | 354 <param name="output_options" value="pbcstat_coverage,pbcstat_wig,depth_stats,histogram,calcuts_cutoff,calcuts_log"/> |
387 <output name="calcuts_tab" value="calcuts_out.tsv"/> | 355 </conditional> |
388 <output name="pbcstat_cov" value="out.cov"/> | 356 <output name="calcuts_cutoff" value="calcuts_cutoff_05.tabular" ftype="tabular"/> |
389 <output name="pbcstat_wig" value="out.wig"/> | 357 <output name="calcuts_log" value="calcuts_log_05.txt" ftype="txt"/> |
390 <output name="stat_file" value="pbcstats.tabular"/> | 358 <output name="pbcstat_cov" value="pbcstat_cov_05.tabular" ftype="tabular"/> |
391 <output name="hist" value="hist.png" ftype="png" compare="sim_size"/> | 359 <output name="pbcstat_wig" value="pbcstat_cov_05.wig" ftype="wig"/> |
392 </test> | 360 <output name="stat_file" value="pbcstats_05.tabular" ftype="tabular"/> |
393 <!-- pbcstat gzip --> | 361 <output name="hist" value="hist_05.png" ftype="png" compare="sim_size"/> |
394 <test expect_num_outputs="6"> | 362 </test> |
363 <!-- Test 6 pbcstat gzip --> | |
364 <test expect_num_outputs="2"> | |
395 <conditional name="function_select"> | 365 <conditional name="function_select"> |
396 <param name="functions" value="pbcstat"/> | 366 <param name="functions" value="pbcstat"/> |
397 <param name="input" value="test.paf.gz" ftype="paf.gz"/> | 367 <param name="input" value="assembly_test.paf.gz" ftype="paf.gz"/> |
398 <param name="max_cov" value="1000"/> | 368 <section name="pbcstat_options"> |
399 <param name="min_map_ratio" value="0.01"/> | 369 <param name="max_cov" value="1000"/> |
400 <param name="min_map_qual" value="1"/> | 370 <param name="min_map_ratio" value="0.01"/> |
401 <param name="flank" value="1"/> | 371 <param name="min_map_qual" value="1"/> |
402 <param name="primary_alignments" value="-p"/> | 372 <param name="flank" value="1"/> |
373 <param name="primary_alignments" value="-p"/> | |
374 </section> | |
403 <section name="section_calcuts"> | 375 <section name="section_calcuts"> |
404 <param name="min_depth" value="0.01"/> | 376 <param name="min_depth" value="0.01"/> |
405 <param name="low_depth" value="1"/> | 377 <param name="low_depth" value="1"/> |
406 <param name="transition" value="1"/> | 378 <param name="transition" value="1"/> |
407 <param name="upper_depth" value="100"/> | 379 <param name="upper_depth" value="100"/> |
408 <param name="ploidy" value="-d 0"/> | 380 <param name="ploidy" value="-d 0"/> |
409 </section> | 381 </section> |
410 </conditional> | 382 </conditional> |
411 <output name="calcuts_tab" value="calcuts_out.tsv"/> | 383 <param name="output_options" value="pbcstat_coverage,calcuts_cutoff"/> |
412 <output name="pbcstat_cov" value="out.cov"/> | 384 <output name="calcuts_cutoff" value="calcuts_cutoff_06.tabular" ftype="tabular"/> |
413 <output name="pbcstat_wig" value="out.wig"/> | 385 <output name="pbcstat_cov" value="pbcstat_cov_06.tabular" ftype="tabular"/> |
414 </test> | 386 </test> |
415 <!-- Pbcstat multiple input --> | 387 <!-- Test 7 Pbcstat multiple input --> |
416 <test expect_num_outputs="6"> | 388 <test expect_num_outputs="2"> |
417 <conditional name="function_select"> | 389 <conditional name="function_select"> |
418 <param name="functions" value="pbcstat"/> | 390 <param name="functions" value="pbcstat"/> |
419 <param name="input" value="test.paf,test2.paf.gz"/> | 391 <param name="input" value="assembly_test.paf,test2.paf.gz"/> |
420 <section name="section_calcuts"> | 392 <section name="section_calcuts"> |
421 <param name="min_depth" value="0.01"/> | 393 <param name="min_depth" value="0.01"/> |
422 <param name="low_depth" value="1"/> | 394 <param name="low_depth" value="1"/> |
423 <param name="transition" value="1"/> | 395 <param name="transition" value="1"/> |
424 <param name="upper_depth" value="100"/> | 396 <param name="upper_depth" value="100"/> |
425 <param name="ploidy" value="-d 0"/> | 397 <param name="ploidy" value="-d 0"/> |
426 </section> | 398 </section> |
427 </conditional> | 399 </conditional> |
428 <output name="calcuts_tab" value="calcuts_out.tsv"/> | 400 <param name="output_options" value="pbcstat_coverage,calcuts_cutoff"/> |
429 <output name="pbcstat_cov" value="out2.cov"/> | 401 <output name="calcuts_cutoff" value="calcuts_cutoff_07.tabular" ftype="tabular"/> |
430 <output name="stat_file" value="pbcstats2.tabular"/> | 402 <output name="pbcstat_cov" value="pbcstat_cov_07.tabular" ftype="tabular"/> |
431 <output name="pbcstat_wig" value="out2.wig"/> | 403 </test> |
432 </test> | 404 <!-- Test 8 ngscstat --> |
433 <!-- ngscstat --> | 405 <test expect_num_outputs="2"> |
434 <test expect_num_outputs="5"> | |
435 <conditional name="function_select"> | 406 <conditional name="function_select"> |
436 <param name="functions" value="ngscstat"/> | 407 <param name="functions" value="ngscstat"/> |
437 <param name="input" value="test.bam"/> | 408 <param name="input" value="test.bam"/> |
438 <param name="min_align_qual" value="10"/> | 409 <section name="ngscstat_options"> |
439 <param name="max_insert" value="100"/> | 410 <param name="min_align_qual" value="10"/> |
411 <param name="max_insert" value="100"/> | |
412 </section> | |
440 <section name="section_calcuts"> | 413 <section name="section_calcuts"> |
441 <param name="min_depth" value="0.01"/> | 414 <param name="min_depth" value="0.01"/> |
442 <param name="low_depth" value="1"/> | 415 <param name="low_depth" value="1"/> |
443 <param name="transition" value="1"/> | 416 <param name="transition" value="1"/> |
444 <param name="upper_depth" value="100"/> | 417 <param name="upper_depth" value="100"/> |
445 <param name="ploidy" value="-d 0"/> | 418 <param name="ploidy" value="-d 0"/> |
446 </section> | 419 </section> |
447 </conditional> | 420 </conditional> |
448 <output name="calcuts_tab" value="calcuts_out.tsv"/> | 421 <param name="output_options" value="ngscstat_coverage,calcuts_cutoff"/> |
449 <output name="ngscstat_cov" value="ngsc_out.cov"/> | 422 <output name="calcuts_cutoff" value="calcuts_cutoff_08.tabular" ftype="tabular"/> |
450 <output name="stat_file" value="tx_stats.tabular"/> | 423 <output name="ngscstat_cov" value="ngsc_cov_08.tabular" ftype="tabular"/> |
451 <output name="hist" value="hist.png" ftype="png" compare="sim_size"/> | 424 </test> |
452 </test> | 425 <!-- Test 9 Calcuts --> |
453 <!-- Calcuts --> | |
454 <test expect_num_outputs="2"> | 426 <test expect_num_outputs="2"> |
455 <conditional name="function_select"> | 427 <conditional name="function_select"> |
456 <param name="functions" value="calcuts"/> | 428 <param name="functions" value="calcuts"/> |
457 <param name="input" value="test.stat"/> | 429 <param name="input" value="test.stat"/> |
458 <section name="section_calcuts"> | 430 <section name="section_calcuts"> |
461 <param name="transition" value="1"/> | 433 <param name="transition" value="1"/> |
462 <param name="upper_depth" value="100"/> | 434 <param name="upper_depth" value="100"/> |
463 <param name="ploidy" value="-d 0"/> | 435 <param name="ploidy" value="-d 0"/> |
464 </section> | 436 </section> |
465 </conditional> | 437 </conditional> |
466 <output name="calcuts_tab" value="calcuts_out.tsv"/> | 438 <output name="calcuts_cutoff" value="calcuts_cutoff_09.tabular" ftype="tabular"/> |
467 </test> | 439 <output name="calcuts_log" value="calcuts_log_09.txt" ftype="txt"/> |
468 <!-- Get seqs --> | 440 </test> |
441 <!-- Test 10 Get seqs --> | |
469 <test expect_num_outputs="2"> | 442 <test expect_num_outputs="2"> |
470 <conditional name="function_select"> | 443 <conditional name="function_select"> |
471 <param name="functions" value="get_seqs"/> | 444 <param name="functions" value="get_seqs"/> |
472 <param name="fasta_input" value="split_out.fasta"/> | 445 <param name="fasta_input" value="split_out.fasta"/> |
473 <param name="bed_input" value="dups.bed"/> | 446 <param name="bed_input" value="dups.bed"/> |
474 <param name="coverage" value="-c"/> | 447 <section name="advanced_options"> |
475 <param name="length" value="10"/> | 448 <param name="coverage" value="-c"/> |
476 <param name="haplotigs" value="-a"/> | 449 <param name="length" value="10"/> |
477 <param name="min_ratio" value=".01"/> | 450 <param name="haplotigs" value="-a"/> |
478 <param name="end_trim" value="-e"/> | 451 <param name="min_ratio" value=".01"/> |
479 <param name="split" value="-s"/> | 452 <param name="end_trim" value="-e"/> |
480 <param name="min_gap" value="100000"/> | 453 <param name="split" value="-s"/> |
481 </conditional> | 454 <param name="min_gap" value="100000"/> |
482 <output name="get_seqs_purged" value="purged_out.fa"/> | 455 </section> |
483 </test> | 456 </conditional> |
484 <!-- pbcstat histogram options--> | 457 <output name="get_seqs_purged" value="get_seqs_purged_10.fa" ftype="fasta"/> |
485 <test expect_num_outputs="6"> | 458 <output name="get_seqs_hap" value="get_seqs_hap_10.fa" ftype="fasta"/> |
459 </test> | |
460 <!-- Test 11 pbcstat histogram options--> | |
461 <test expect_num_outputs="1"> | |
486 <conditional name="function_select"> | 462 <conditional name="function_select"> |
487 <param name="functions" value="pbcstat"/> | 463 <param name="functions" value="pbcstat"/> |
488 <param name="input" value="test.paf"/> | 464 <param name="input" value="assembly_test.paf"/> |
489 <param name="max_cov" value="1000"/> | 465 <section name="pbcstat_options"> |
490 <param name="min_map_ratio" value="0.01"/> | 466 <param name="max_cov" value="1000"/> |
491 <param name="min_map_qual" value="1"/> | 467 <param name="min_map_ratio" value="0.01"/> |
492 <param name="flank" value="1"/> | 468 <param name="min_map_qual" value="1"/> |
493 <param name="primary_alignments" value="-p"/> | 469 <param name="flank" value="1"/> |
470 <param name="primary_alignments" value="-p"/> | |
471 </section> | |
494 <section name="section_calcuts"> | 472 <section name="section_calcuts"> |
495 <param name="min_depth" value="0.01"/> | 473 <param name="min_depth" value="0.01"/> |
496 <param name="low_depth" value="1"/> | 474 <param name="low_depth" value="1"/> |
497 <param name="transition" value="1"/> | 475 <param name="transition" value="1"/> |
498 <param name="upper_depth" value="100"/> | 476 <param name="upper_depth" value="100"/> |
502 <param name="ymax" value="100"/> | 480 <param name="ymax" value="100"/> |
503 <param name="xmax" value="100"/> | 481 <param name="xmax" value="100"/> |
504 <param name="cutoffs_his" value="calcuts_out.tsv"/> | 482 <param name="cutoffs_his" value="calcuts_out.tsv"/> |
505 </section> | 483 </section> |
506 </conditional> | 484 </conditional> |
507 <output name="calcuts_tab" value="calcuts_out.tsv"/> | 485 <param name="output_options" value="histogram"/> |
508 <output name="pbcstat_cov" value="out_hist_options.cov"/> | 486 <output name="hist" value="hist_11.png" ftype="png" compare="sim_size"/> |
509 <output name="pbcstat_wig" value="out_hist_options.wig"/> | |
510 <output name="stat_file" value="pbcstats_hist_options.tabular"/> | |
511 <output name="hist" value="hist_options.png" ftype="png" compare="sim_size"/> | |
512 </test> | 487 </test> |
513 </tests> | 488 </tests> |
514 <help><![CDATA[ | 489 <help><![CDATA[ |
515 .. class:: infomark | 490 .. class:: infomark |
516 | 491 |
517 **What it does** | 492 **Purpose** |
518 | 493 |
519 The purge_dups tools are designed to remove haplotigs and contig overlaps in a de novo assembly based on read depth. | 494 The purge_dups tools are designed to remove haplotigs and contig overlaps in a de novo assembly based on read depth. |
495 purge_dups can significantly improve genome assemblies by removing overlaps and haplotigs caused by sequence divergence | |
496 in heterozygous regions. This both removes false duplications in primary draft assemblies while retaining completeness and sequence | |
497 integrity, and can improve scaffolding. | |
498 | |
499 ---- | |
500 | |
501 .. class:: infomark | |
502 | |
503 **Pipeline Guide** | |
504 | |
505 Given a primary assembly, and an alternative assembly (optional, if you have one), follow the steps shown below to build your | |
506 own purge_dups pipeline, steps with same number can be run simultaneously. Among all the steps, although step 5 is optional, | |
507 we highly recommend our users to do so, because assemblers may produce overrepresented sequences. In such a case, the final | |
508 step 5 can be applied to remove those seqeuences. | |
509 | |
510 - Step 1: Calculate the coverage cutoffs and base coverages. | |
511 - Step 2: Split an assembly with the **split_fasfa** function and do a self-self alignment by using minimap2. | |
512 - Step 3: Purge haplotigs and overlaps with the **purge_dups** function. | |
513 - Step 4: Get purged primary and haplotig sequences from the draft assembly with the **get_seqs** function. | |
514 - Step 5: Merge hap.fa file, generated in the previous step, and the alternate assembly, and redo the above steps to get a decent haplotig set. | |
515 | |
516 ---- | |
517 | |
518 .. class:: infomark | |
519 | |
520 **Limitations** | |
521 | |
522 - Read depth cutoffs calculation: the coverage cutoffs can be larger for a low heterozygosity species, which causes the purged assembly size smaller than expected. In such a case, please use script/hist_plot.py to make the histogram plot and set coverage cutoffs manually. | |
523 - Repeats: purge_dups has a limited ability to process repeats. | |
524 | |
525 ---- | |
526 | |
527 .. class:: infomark | |
528 | |
529 **Purged assembly validation** | |
530 | |
531 There are many ways to validate the purged assembly. One way is to make a coverage plot for it, the 2nd way is to run `BUSCO <https://busco.ezlab.org/>`_. A thid option is to use `Merqury <https://github.com/marbl/merqury>`_ | |
532 | |
533 | |
520 | 534 |
521 ]]></help> | 535 ]]></help> |
522 <citations> | 536 <expand macro="citations"/> |
523 <citation type="doi">10.1093/bioinformatics/btaa025</citation> | |
524 </citations> | |
525 </tool> | 537 </tool> |