Mercurial > repos > bgruening > canu
comparison canu.xml @ 3:5732f959936a draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/canu commit 7000c7eb839b77a0e7e91874048219bd3a3f5d47"
author | bgruening |
---|---|
date | Mon, 15 Feb 2021 12:31:26 +0000 |
parents | c5b7390290b1 |
children | 86f150c8019d |
comparison
equal
deleted
inserted
replaced
2:c5b7390290b1 | 3:5732f959936a |
---|---|
1 <tool id="canu" name="Canu assembler" version="1.8"> | 1 <tool id="canu" name="Canu assembler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> |
2 <description>Assembler optimized for long error-prone reads such as PacBio, Oxford Nanopore </description> | 2 <description>Assembler optimized for long error-prone reads such as PacBio, Oxford Nanopore</description> |
3 <xrefs> | |
4 <xref type="bio.tools">canu</xref> | |
5 </xrefs> | |
6 <macros> | |
7 <token name="@TOOL_VERSION@">2.1.1</token> | |
8 <token name="@VERSION_SUFFIX@">0</token> | |
9 </macros> | |
3 <requirements> | 10 <requirements> |
4 <requirement type="package" version="1.8">canu</requirement> | 11 <requirement type="package" version="@TOOL_VERSION@">canu</requirement> |
5 </requirements> | 12 </requirements> |
6 <version_command>canu --version</version_command> | 13 <version_command>canu --version</version_command> |
7 <command detect_errors="exit_code"> | 14 <command detect_errors="exit_code"> |
8 <![CDATA[ | 15 <![CDATA[ |
9 | 16 |
17 canu | 24 canu |
18 #if $stage != 'all': | 25 #if $stage != 'all': |
19 $stage | 26 $stage |
20 #end if | 27 #end if |
21 -p canu | 28 -p canu |
22 -d out_dir | 29 -d ./out_dir |
23 #if $s: | 30 #if $s: |
24 -s '$s' | 31 -s '$s' |
25 #end if | 32 #end if |
26 genomeSize=$genomeSize | |
27 #if $rawErrorRate: | 33 #if $rawErrorRate: |
28 rawErrorRate=$rawErrorRate | 34 rawErrorRate=$rawErrorRate |
29 #end if | 35 #end if |
30 #if $correctedErrorRate: | 36 #if $correctedErrorRate: |
31 correctedErrorRate=$correctedErrorRate | 37 correctedErrorRate=$correctedErrorRate |
32 #end if | 38 #end if |
33 minReadLength=$minReadLength | 39 minReadLength=$minReadLength |
34 minOverlapLength=$minOverlapLength | 40 minOverlapLength=$minOverlapLength |
35 corOutCoverage=$corOutCoverage | 41 corOutCoverage=$corOutCoverage |
42 #if $stopOnLowCoverage | |
43 stopOnLowCoverage=$stopOnLowCoverage | |
44 #end if | |
45 #if $minInputCoverage | |
46 minInputCoverage=$minInputCoverage | |
47 #end if | |
36 contigFilter=' | 48 contigFilter=' |
37 ${contigFilter.minReads} | 49 ${contigFilter.minReads} |
38 ${contigFilter.minLength} | 50 ${contigFilter.minLength} |
39 ${contigFilter.singleReadSpan} | 51 ${contigFilter.singleReadSpan} |
40 ${contigFilter.lowCovSpan} | 52 ${contigFilter.lowCovSpan} |
41 ${contigFilter.lowCovDepth} | 53 ${contigFilter.lowCovDepth} |
42 ' | 54 ' |
43 genomeSize=$genomeSize | 55 |
56 genomeSize='$genomeSize' | |
44 minThreads=\${GALAXY_SLOTS:-4} | 57 minThreads=\${GALAXY_SLOTS:-4} |
45 maxThreads=\${GALAXY_SLOTS:-4} | 58 maxThreads=\${GALAXY_SLOTS:-4} |
59 redMemory=\${GALAXY_MEMORY_MB:-4096}M | |
60 redThreads=\${GALAXY_SLOTS:-4} | |
46 obtovlThreads=\${GALAXY_SLOTS:-4} | 61 obtovlThreads=\${GALAXY_SLOTS:-4} |
47 utgovlThreads=\${GALAXY_SLOTS:-4} | 62 utgovlThreads=\${GALAXY_SLOTS:-4} |
48 batThreads=\${GALAXY_SLOTS:-4} | 63 batThreads=\${GALAXY_SLOTS:-4} |
49 batMemory=\${GALAXY_MEMORY_MB:-4096}M | 64 batMemory=\${GALAXY_MEMORY_MB:-4096}M |
50 cormhapMemory=\${GALAXY_MEMORY_MB:-4096}M | 65 cormhapMemory=\${GALAXY_MEMORY_MB:-4096}M |
51 obtovlMemory=\${GALAXY_MEMORY_MB:-4096}M | 66 obtovlMemory=\${GALAXY_MEMORY_MB:-4096}M |
52 utgovlMemory=\${GALAXY_MEMORY_MB:-4096}M | 67 utgovlMemory=\${GALAXY_MEMORY_MB:-4096}M |
53 gfaThreads=\${GALAXY_SLOTS:-4} | |
54 corThreads=\${GALAXY_SLOTS:-4} | 68 corThreads=\${GALAXY_SLOTS:-4} |
69 corMemory=\${GALAXY_MEMORY_MB:-4096}M | |
55 cnsThreads=\${GALAXY_SLOTS:-4} | 70 cnsThreads=\${GALAXY_SLOTS:-4} |
71 cnsMemory=\${GALAXY_MEMORY_MB:-4096}M | |
72 oeaMemory=\${GALAXY_MEMORY_MB:-4096}M | |
73 oeaThreads=\${GALAXY_SLOTS:-4} | |
56 useGrid=false | 74 useGrid=false |
57 $mode | 75 |
76 #for $haplotype in $haplotypes: | |
77 -haplotype${haplotype.haplotype_name} '${haplotype.haplotype_input}' | |
78 #end for | |
79 | |
80 $technology | |
81 #if $processing: | |
82 $processing | |
83 #end if | |
84 | |
58 #for $counter, $input in enumerate($inputs): | 85 #for $counter, $input in enumerate($inputs): |
59 #if $input.ext in ['fastq.gz', 'fasta.gz'] | 86 #if $input.ext in ['fastq.gz', 'fasta.gz'] |
60 ./input_${counter}.gz | 87 ./input_${counter}.gz |
61 #else: | 88 #else: |
62 '$input' | 89 '$input' |
63 #end if | 90 #end if |
64 #end for | 91 #end for |
65 2>&1 | 92 2>&1 |
66 && | 93 |
67 echo "Check echo" | |
68 ]]> | 94 ]]> |
69 </command> | 95 </command> |
70 <inputs> | 96 <inputs> |
71 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads" /> | 97 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads"/> |
72 <param name="mode" type="select" label="Mode"> | 98 <repeat name="haplotypes" min="0" max="2" title="Haplotypes for Trio Binning Assembly" help="Canu has support for using parental short-read sequencing to classify and bin"> |
73 <option value="-nanopore-raw" selected="true">Nanopore raw</option> | 99 <param name="haplotype_input" type="data" format="fasta,fastq" multiple="true" label="Haplotype input reads"/> |
74 <option value="-nanopore-corrected">Nanopore corrected</option> | 100 <param name="haplotype_name" type="text" label="Shot name to identify your haplotype"/> |
75 <option value="-pacbio-raw">PacBio raw</option> | 101 </repeat> |
76 <option value="-pacbio-corrected">PacBio corrected</option> | 102 <param name="technology" type="select" label="Technology"> |
103 <option value="-nanopore" selected="true">Nanopore</option> | |
104 <option value="-pacbio">PacBio</option> | |
105 <option value="-pacbio-hifi">PacBio HiFi</option> | |
106 </param> | |
107 <param name="processing" type="select" optional="true" label="Processing"> | |
108 <option value="-corrected">Corrected</option> | |
109 <option value="-trimmed">Trimmed</option> | |
77 </param> | 110 </param> |
78 <param name="stage" type="select" label="To restrict canu to only a specific stage, use"> | 111 <param name="stage" type="select" label="To restrict canu to only a specific stage, use"> |
79 <option value="all" selected="true">all</option> | 112 <option value="all" selected="true">all</option> |
113 <option value="-haplotype">generate haplotype-specific reads</option> | |
80 <option value="-correct">generate corrected reads</option> | 114 <option value="-correct">generate corrected reads</option> |
81 <option value="-trim">generate trimmed reads</option> | 115 <option value="-trim">generate trimmed reads</option> |
82 <option value="-assemble">generate an assembly</option> | 116 <option value="-assemble">generate an assembly</option> |
83 <option value="-trim-assemble">generate trimmed reads and then assemble them</option> | 117 <option value="-trim-assemble">generate trimmed reads and then assemble them</option> |
84 </param> | 118 </param> |
85 <param argument="genomeSize" type="text" label="Estimated genome size (e.g. 80m, 15k, 2g)"> | 119 <param argument="genomeSize" type="text" label="Estimated genome size (e.g. 8.0m, 15k, 2g)"> |
86 <validator type="empty_field" /> | 120 <validator type="empty_field"/> |
87 </param> | 121 <validator type="expression" message="Only values similar to 8.0m, 15k or 2g are allowed.">value.replace('.', '').isalnum() and value[-1] in ['m', 'k', 'g'] and float(value[:-1])</validator> |
88 <param argument="rawErrorRate" type="float" value="" optional="true" min="0" max="1" | 122 </param> |
89 label="Maximum raw overlap mismatch" help="The defaults are 0.300 for PacBio reads and 0.500 for Nanopore reads." /> | 123 <param argument="rawErrorRate" type="float" value="" optional="true" min="0" max="1" label="Maximum raw overlap mismatch" help="The defaults are 0.300 for PacBio reads and 0.500 for Nanopore reads."/> |
90 <param argument="correctedErrorRate" type="float" value="" optional="true" min="0" max="1" | 124 <param argument="correctedErrorRate" type="float" value="" optional="true" min="0" max="1" label="Maximum corrected overlap mismatch" help="The allowed difference in an overlap between two corrected reads. Assemblies of low coverage or data with biological differences will benefit from a slight increase in this. Defaults are 0.045 for PacBio reads and 0.144 for Nanopore reads."/> |
91 label="Maximum corrected overlap mismatch" help="The allowed difference in an overlap between two corrected reads. Assemblies of | 125 <param argument="minReadLength" type="integer" value="1000" min="1" label="Minimum read length"/> |
92 low coverage or data with biological differences will benefit from a slight increase | 126 <param argument="minOverlapLength" type="integer" value="500" min="1" label="Minimum overlap"/> |
93 in this. Defaults are 0.045 for PacBio reads and 0.144 for Nanopore reads." /> | 127 <param argument="minInputCoverage" type="integer" value="" min="1" optional="true" label="Minimum Input Coverage"/> |
94 <param argument="minReadLength" type="integer" value="1000" min="1" label="Minimum read length" /> | 128 <param argument="corOutCoverage" type="integer" value="40" min="1" label="Target coverage for corrected reads"/> |
95 <param argument="minOverlapLength" type="integer" value="500" min="1" label="Minimum overlap" /> | 129 <param argument="-s" type="data" format="txt" optional="true" label="Additonal options" help="Additional specifications provided in a canu spec file."/> |
96 <param argument="corOutCoverage" type="integer" value="40" min="1" label="Target coverage for corrected reads" /> | 130 <param argument="stopOnLowCoverage" type="integer" value="10" min="1" label="Stop the assembly if read coverage is too low to be useful" help="Coverage is checked whene when input sequences are initially loaded into the sequence store, when corrected reads are generated, and when read ends are trimmed off."/> |
97 <param argument="-s" type="data" format="txt" optional="true" label="Additonal options" help="Additional specifications provided in a canu spec file." /> | |
98 | |
99 <section name="contigFilter" title="Contig Filters"> | 131 <section name="contigFilter" title="Contig Filters"> |
100 <param argument="minReads" type="integer" value="2" min="0" label="Minimum reads" /> | 132 <param argument="minReads" type="integer" value="2" min="0" label="Minimum reads"/> |
101 <param argument="minLength" type="integer" value="0" min="0" label="Minimum length" /> | 133 <param argument="minLength" type="integer" value="0" min="0" label="Minimum length"/> |
102 <param argument="singleReadSpan" type="float" value="1.0" min="0.0" max="1.0" label="Maximum single read span (fraction)" /> | 134 <param argument="singleReadSpan" type="float" value="1.0" min="0.0" max="1.0" label="Maximum single read span (fraction)"/> |
103 <param argument="lowCovSpan" type="float" value="0.5" min="0.0" max="1.0" label="Low coverage span (fraction)" /> | 135 <param argument="lowCovSpan" type="float" value="0.5" min="0.0" max="1.0" label="Low coverage span (fraction)"/> |
104 <param argument="lowCovDepth" type="integer" value="5" min="0" label="Low coverage depth" /> | 136 <param argument="lowCovDepth" type="integer" value="5" min="0" label="Low coverage depth"/> |
105 </section> | 137 </section> |
106 </inputs> | 138 </inputs> |
107 <outputs> | 139 <outputs> |
140 <data name="report" format="txt" from_work_dir="out_dir/canu.report" label="${tool.name} on ${on_string} (report)"/> | |
108 <data name="contigs" format="fasta" from_work_dir="out_dir/canu.contigs.fasta" label="${tool.name} on ${on_string} (contigs)"> | 141 <data name="contigs" format="fasta" from_work_dir="out_dir/canu.contigs.fasta" label="${tool.name} on ${on_string} (contigs)"> |
109 <filter>stage == 'all'</filter> | 142 <filter>stage == 'all'</filter> |
110 </data> | 143 </data> |
111 <data name="unassembled" format="fasta" from_work_dir="out_dir/canu.unassembled.fasta" label="${tool.name} on ${on_string} (unassembled)"> | 144 <data name="unassembled" format="fasta" from_work_dir="out_dir/canu.unassembled.fasta" label="${tool.name} on ${on_string} (unassembled)"> |
112 <filter>stage == 'all'</filter> | 145 <filter>stage == 'all'</filter> |
113 </data> | 146 </data> |
114 <data name="unitigs" format="fasta" from_work_dir="out_dir/canu.unitigs.fasta" label="${tool.name} on ${on_string} (unitigs)"> | |
115 <filter>stage == 'all'</filter> | |
116 </data> | |
117 <data name="corrected_reads" format="fasta.gz" from_work_dir="out_dir/canu.correctedReads.fasta.gz" label="${tool.name} on ${on_string} (corrected reads)"> | 147 <data name="corrected_reads" format="fasta.gz" from_work_dir="out_dir/canu.correctedReads.fasta.gz" label="${tool.name} on ${on_string} (corrected reads)"> |
118 <filter>'-correct' in stage or stage == 'all'</filter> | 148 <filter>'-correct' in stage or stage == 'all'</filter> |
119 </data> | 149 </data> |
120 <data name="trimmed_reads" format="fasta.gz" from_work_dir="out_dir/canu.trimmedReads.fasta.gz" label="${tool.name} on ${on_string} (trimmed reads)"> | 150 <data name="trimmed_reads" format="fasta.gz" from_work_dir="out_dir/canu.trimmedReads.fasta.gz" label="${tool.name} on ${on_string} (trimmed reads)"> |
121 <filter>'-trim' in stage or stage == 'all'</filter> | 151 <filter>'-trim' in stage or stage == 'all'</filter> |
122 </data> | 152 </data> |
123 </outputs> | 153 </outputs> |
124 <tests> | 154 <tests> |
125 <test expect_num_outputs="5"> | 155 <test expect_num_outputs="5"> |
126 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | 156 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> |
127 <param name="genomeSize" value="4.6m" /> | 157 <param name="technology" value="-nanopore"/> |
128 <param name="minReadLength" value="2000" /> | 158 <param name="genomeSize" value="20k"/> |
159 <param name="stopOnLowCoverage" value="1"/> | |
160 <param name="minInputCoverage" value="1"/> | |
161 <param name="minReadLength" value="2000"/> | |
129 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result1.fa"/> | 162 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result1.fa"/> |
130 <output name="unitigs" ftype="fasta" file="ecoli_canu_unitigs_result1.fa"/> | |
131 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result1.fa"/> | 163 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result1.fa"/> |
132 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result1.fa.gz"/> | 164 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result1.fa.gz"/> |
133 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result1.fa.gz"/> | 165 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result1.fa.gz"/> |
166 <output name="report"> | |
167 <assert_contents> | |
168 <has_n_lines n="488"/> | |
169 <has_text_matching expression="[UNITIGGING/CONTIGS]"/> | |
170 <has_text_matching expression="-- Contig sizes based on genome size 20kbp:"/> | |
171 </assert_contents> | |
172 </output> | |
134 </test> | 173 </test> |
135 <test expect_num_outputs="5"> | 174 <test expect_num_outputs="5"> |
136 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | 175 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> |
137 <param name="genomeSize" value="4.6m" /> | 176 <param name="technology" value="-nanopore"/> |
138 <param name="minReadLength" value="2000" /> | 177 <param name="genomeSize" value="20k"/> |
139 <param name="minOverlapLength" value="800" /> | 178 <param name="stopOnLowCoverage" value="1"/> |
140 <param name="rawErrorRate" value="0.2" /> | 179 <param name="minInputCoverage" value="1"/> |
141 <param name="correctedErrorRate" value="0.05" /> | 180 <param name="minReadLength" value="2000"/> |
142 <param name="corOutCoverage" value="2" /> | 181 <param name="minOverlapLength" value="800"/> |
182 <param name="rawErrorRate" value="0.2"/> | |
183 <param name="correctedErrorRate" value="0.05"/> | |
184 <param name="corOutCoverage" value="2"/> | |
143 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result2.fa"/> | 185 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result2.fa"/> |
144 <output name="unitigs" ftype="fasta" file="ecoli_canu_unitigs_result2.fa"/> | |
145 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result2.fa"/> | 186 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result2.fa"/> |
146 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result2.fa.gz"/> | 187 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result2.fa.gz"/> |
147 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result2.fa.gz"/> | 188 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result2.fa.gz"/> |
148 </test> | 189 <output name="report"> |
149 <test expect_num_outputs="1"> | 190 <assert_contents> |
150 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | 191 <has_n_lines n="464"/> |
192 <has_text_matching expression="[UNITIGGING/CONTIGS]"/> | |
193 <has_text_matching expression="-- Contig sizes based on genome size 20kbp:"/> | |
194 </assert_contents> | |
195 </output> | |
196 </test> | |
197 <test expect_num_outputs="2"> | |
198 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | |
199 <param name="technology" value="-nanopore"/> | |
200 <param name="genomeSize" value="20k"/> | |
151 <param name="stage" value="-correct"/> | 201 <param name="stage" value="-correct"/> |
152 <param name="minReadLength" value="2500" /> | 202 <param name="stopOnLowCoverage" value="1"/> |
153 <param name="genomeSize" value="4.6m" /> | 203 <param name="minReadLength" value="2500"/> |
154 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result3.fa.gz"/> | 204 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result3.fa.gz"/> |
155 </test> | 205 <output name="report"> |
156 <test expect_num_outputs="1"> | 206 <assert_contents> |
157 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | 207 <has_n_lines n="187"/> |
208 <has_text_matching expression="[TRIMMING/READS]"/> | |
209 <has_text_matching expression="-- Found 89 reads."/> | |
210 </assert_contents> | |
211 </output> | |
212 </test> | |
213 <!--trimming test - it does currently not trim anything due to the input data --> | |
214 <test expect_num_outputs="2"> | |
215 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | |
216 <param name="technology" value="-nanopore"/> | |
217 <param name="genomeSize" value="3.4m"/> | |
158 <param name="stage" value="-trim"/> | 218 <param name="stage" value="-trim"/> |
159 <param name="minReadLength" value="2500" /> | 219 <param name="minReadLength" value="500"/> |
160 <param name="genomeSize" value="4.6m" /> | 220 <output name="report"> |
161 <output name="trimmed_reads" ftype="fasta.gz" compare="sim_size" delta="12000" file="ecoli_canu_trimmed_reads_result4.fa.gz"/> | 221 <assert_contents> |
162 </test> | 222 <has_text_matching expression="[TRIMMING/READS]"/> |
223 <has_n_lines n="6"/> | |
224 <has_text_matching expression="Found 0 reads."/> | |
225 </assert_contents> | |
226 </output> | |
227 </test> | |
228 <!--test expect_num_outputs="5"> | |
229 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | |
230 <param name="technology" value="-pacbio"/> | |
231 <repeat name="haplotypes"> | |
232 <param name="haplotype_name" value="K12"/> | |
233 <param name="haplotype_input" ftype="fasta" value="ecoli-reads.fasta"/> | |
234 </repeat> | |
235 <repeat name="haplotypes"> | |
236 <param name="haplotype_name" value="K13"/> | |
237 <param name="haplotype_input" ftype="fasta" value="ecoli-reads.fasta"/> | |
238 </repeat> | |
239 <param name="genomeSize" value="20k"/> | |
240 <param name="stopOnLowCoverage" value="1"/> | |
241 <param name="minInputCoverage" value="1"/> | |
242 <param name="minReadLength" value="2000"/> | |
243 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result5.fa"/> | |
244 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result5.fa"/> | |
245 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result5.fa.gz"/> | |
246 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result5.fa.gz"/> | |
247 </test--> | |
163 </tests> | 248 </tests> |
164 <help> | 249 <help> |
165 <![CDATA[ | 250 <![CDATA[ |
166 | 251 |
167 Canu specializes in assembling PacBio or Oxford Nanopore sequences. Canu operates in three phases: correction, trimming and assembly. | 252 Canu specializes in assembling PacBio or Oxford Nanopore sequences. Canu operates in three phases: correction, trimming and assembly. |