Mercurial > repos > bgruening > mgnify_seqprep
comparison mgnify_seqprep.xml @ 0:76ea9d4604bc draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mgnify_seqprep commit fd696b8f2ce44287b6ad19fe52277cfdbd7e94fb
| author | bgruening |
|---|---|
| date | Tue, 14 May 2024 09:49:32 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:76ea9d4604bc |
|---|---|
| 1 <tool id="mgnify_seqprep" name="Merging paired-end Illumina reads (SeqPrep, modified for use with MGnify piplines)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> | |
| 2 <description>Merge and Trim Adapter Sequences from Paired-End Illumina Reads</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="biotools"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <expand macro="creators"/> | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 SeqPrep | |
| 11 -f '${input1}' | |
| 12 -r '${input2}' | |
| 13 -1 '${output1}' | |
| 14 -2 '${output2}' | |
| 15 | |
| 16 #if $merge_reads | |
| 17 -s '${merged}' | |
| 18 #end if | |
| 19 | |
| 20 ## General Arguments ## | |
| 21 #if $general_options.first_read_discarded | |
| 22 -3 '${general_options.first_read_discarded}' | |
| 23 #end if | |
| 24 #if $general_options.second_read_discarded | |
| 25 -4 '${general_options.second_read_discarded}' | |
| 26 #end if | |
| 27 #if $general_options.phred64 | |
| 28 -6 '${general_options.phred64}' | |
| 29 #end if | |
| 30 #if $general_options.quality_cutoff | |
| 31 -q '${general_options.quality_cutoff}' | |
| 32 #end if | |
| 33 #if $general_options.min_length | |
| 34 -L '${general_options.min_length}' | |
| 35 #end if | |
| 36 | |
| 37 ## Additional Adapter/Primer Trimming Arguments ## | |
| 38 #if $trimming_options.adapter_a | |
| 39 -A '${trimming_options.adapter_a}' | |
| 40 #end if | |
| 41 #if $trimming_options.adapter_b | |
| 42 -B '${trimming_options.adapter_b}' | |
| 43 #end if | |
| 44 #if $trimming_options.adapter_overlap | |
| 45 -O '${trimming_options.adapter_overlap}' | |
| 46 #end if | |
| 47 #if $trimming_options.max_mismatch_fraction | |
| 48 -M '${trimming_options.max_mismatch_fraction}' | |
| 49 #end if | |
| 50 #if $trimming_options.min_match_fraction | |
| 51 -N '${trimming_options.min_match_fraction}' | |
| 52 #end if | |
| 53 #if $trimming_options.adapter_bandwidth | |
| 54 -b '${trimming_options.adapter_bandwidth}' | |
| 55 #end if | |
| 56 #if $trimming_options.gap_open | |
| 57 -Q '${trimming_options.gap_open}' | |
| 58 #end if | |
| 59 #if $trimming_options.gap_extend | |
| 60 -t '${trimming_options.gap_extend}' | |
| 61 #end if | |
| 62 #if $trimming_options.gap_end | |
| 63 -e '${trimming_options.gap_end}' | |
| 64 #end if | |
| 65 #if $trimming_options.local_alignment_score | |
| 66 -Z '${trimming_options.local_alignment_score}' | |
| 67 #end if | |
| 68 #if $trimming_options.read_alignment_bandwidth | |
| 69 -w '${trimming_options.read_alignment_bandwidth}' | |
| 70 #end if | |
| 71 #if $trimming_options.read_alignment_gap_open | |
| 72 -W '${trimming_options.read_alignment_gap_open}' | |
| 73 #end if | |
| 74 #if $trimming_options.read_alignment_gap_extend | |
| 75 -p '${trimming_options.read_alignment_gap_extend}' | |
| 76 #end if | |
| 77 #if $trimming_options.read_alignment_gap_end | |
| 78 -P '${trimming_options.read_alignment_gap_end}' | |
| 79 #end if | |
| 80 #if $trimming_options.read_alignment_max_gap_fraction | |
| 81 -X '${trimming_options.read_alignment_max_gap_fraction}' | |
| 82 #end if | |
| 83 | |
| 84 ## Additional Arguments for Merging ## | |
| 85 #if $merging_options.maximum_quality_score | |
| 86 -y '${merging_options.maximum_quality_score}' | |
| 87 #end if | |
| 88 #if $merging_options.print_overhang | |
| 89 -g '${merging_options.print_overhang}' | |
| 90 #end if | |
| 91 #if $merging_options.min_base_pair_overlap | |
| 92 -o '${merging_options.min_base_pair_overlap}' | |
| 93 #end if | |
| 94 #if $merging_options.max_mismatch_fraction | |
| 95 -m '${merging_options.max_mismatch_fraction}' | |
| 96 #end if | |
| 97 #if $merging_options.min_match_fraction | |
| 98 -n '${merging_options.min_match_fraction}' | |
| 99 #end if | |
| 100 ]]></command> | |
| 101 <inputs> | |
| 102 <param name="input1" type="data" format="fastq" label="First Read Input" help="Select the FASTQ file containing the first set of paired-end reads." /> | |
| 103 <param name="input2" type="data" format="fastq" label="Second Read Input" help="Select the FASTQ file containing the second set of paired-end reads." /> | |
| 104 <param name="merge_reads" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Merge Reads" help="Enable this to merge overlapping reads from the provided paired-end FASTQ files." /> | |
| 105 | |
| 106 <!-- Section for General Arguments --> | |
| 107 <section name="general_options" title="General Arguments (Optional)" expanded="false"> | |
| 108 <param name="first_read_discarded" argument="-3" type="text" optional="true" label="First Read Discarded FASTQ Filename" help="first read discarded fastq filename" /> | |
| 109 <param name="second_read_discarded" argument="-4" type="text" optional="true" label="Second Read Discarded FASTQ Filename" help="second read discarded fastq filename" /> | |
| 110 <param name="phred64" argument="-6" type="boolean" truevalue="-6" falsevalue="" checked="false" label="Input Sequence is in Phred+64 Format" help="Input sequence is in phred+64 rather than phred+33 format, the output will still be phred+33"/> | |
| 111 <param name="quality_cutoff" argument="-q" type="integer" optional="true" value="13" label="Quality Score Cutoff" help="Quality score cutoff for mismatches to be counted in overlap" /> | |
| 112 <param name="min_length" argument="-L" type="integer" optional="true" value="30" label="Minimum Length of Reads" help="Minimum length of a trimmed or merged read to print it" /> | |
| 113 </section> | |
| 114 | |
| 115 <!-- Section for Additional Adapter/Primer Trimming Arguments --> | |
| 116 <section name="trimming_options" title="Additional Adapter/Primer Trimming Arguments" expanded="false"> | |
| 117 <param name="adapter_a" argument="-A" label="Adapter Sequence A" type="text" optional="true" value="AGATCGGAAGAGCGGTTCAG" help="Forward read primer/adapter sequence to trim as it would appear at the end of a read" /> | |
| 118 <param name="adapter_b" argument="-B" label="Adapter Sequence B" type="text" optional="true" value="AGATCGGAAGAGCGTCGTGT" help="Reverse read primer/adapter sequence to trim as it would appear at the end of a read" /> | |
| 119 <param name="adapter_overlap" argument="-O" label="Minimum Overall Base Pair Overlap with Adapter" type="integer" value="10" optional="true" help="minimum overall base pair overlap with adapter sequence to trim" /> | |
| 120 <param name="max_mismatch_fraction" argument="-M" label="Maximum Fraction of Good Quality Mismatching Bases" type="float" value="0.02" optional="true" help="maximum fraction of good quality mismatching bases for primer/adapter overlap" /> | |
| 121 <param name="min_match_fraction" argument="-N" label="Minimum Fraction of Matching Bases" type="float" value="0.87" optional="true" help="minimum fraction of matching bases for primer/adapter overlap" /> | |
| 122 <param name="adapter_bandwidth" argument="-b" label="Adapter Alignment Band-width" type="integer" value="50" optional="true" /> | |
| 123 <param name="gap_open" argument="-Q" label="Adapter Alignment Gap-Open" type="integer" value="8" optional="true" /> | |
| 124 <param name="gap_extend" argument="-t" label="Adapter Alignment Gap-Extension" type="integer" value="2" optional="true" /> | |
| 125 <param name="gap_end" argument="-e" label="Adapter Alignment Gap-End" type="integer" value="2" optional="true" /> | |
| 126 <param name="local_alignment_score" argument="-Z" label="Minimum Local Alignment Score Cutoff" type="integer" value="26" optional="true" help="Adapter alignment minimum local alignment score cutoff [roughly (2*num_hits) - (num_gaps*gap_open) - (num_gaps*gap_close) - (gap_len*gap_extend) - (2*num_mismatches)]" /> | |
| 127 <param name="read_alignment_bandwidth" argument="-w" label="Read Alignment Band-width" type="integer" value="50" optional="true" /> | |
| 128 <param name="read_alignment_gap_open" argument="-W" label="Read Alignment Gap-Open" type="integer" value="26" optional="true" /> | |
| 129 <param name="read_alignment_gap_extend" argument="-p" label="Read Alignment Gap-Extension" type="integer" value="9" optional="true" /> | |
| 130 <param name="read_alignment_gap_end" argument="-P" label="Read Alignment Gap-End" type="integer" value="5" optional="true" help="read alignment maximum fraction gap cutoff" /> | |
| 131 <param name="read_alignment_max_gap_fraction" argument="-X" label="Read Alignment Maximum Fraction Gap Cutoff" type="float" value="0.125" optional="true" /> | |
| 132 </section> | |
| 133 | |
| 134 <!-- Section for Optional Arguments for Merging: --> | |
| 135 <section name="merging_options" title="Optional Arguments for Merging" expanded="false"> | |
| 136 <param name="maximum_quality_score" argument="-y" label="Maximum Quality Score in Output" type="text" optional="true" help="Maximum quality score in output (phred 33), default = ']'"/> | |
| 137 <param name="print_overhang" argument="-g" type="boolean" truevalue="-g" falsevalue="" checked="false" label="Print Overhang When Adapters Are Present and Stripped" help="Use this if reads are different lengths"/> | |
| 138 <param name="min_base_pair_overlap" argument="-o" type="integer" optional="true" value="15" label="Minimum Overall Base Pair Overlap" help="Minimum overall base pair overlap to merge two reads"/> | |
| 139 <param name="max_mismatch_fraction" argument="-m" type="float" optional="true" value="0.02" label="Maximum Fraction of Good Quality Mismatching Bases" help="Maximum fraction of good quality mismatching bases to overlap reads"/> | |
| 140 <param name="min_match_fraction" argument="-n" type="float" optional="true" value="0.9" label="Minimum Fraction of Matching Bases" help="Minimum fraction of matching bases to overlap reads"/> | |
| 141 </section> | |
| 142 </inputs> | |
| 143 <outputs> | |
| 144 <data format="fastq.gz" name="output1" label="${tool.name} on ${on_string}: First Read Output"> | |
| 145 <filter>output_all</filter> | |
| 146 </data> | |
| 147 <data format="fastq.gz" name="output2" label="${tool.name} on ${on_string}: Second Read Output"> | |
| 148 <filter>output_all</filter> | |
| 149 </data> | |
| 150 <data format="fastq.gz" name="merged" label="${tool.name} on ${on_string}: Merged Reads"> | |
| 151 <filter>merge_reads</filter> | |
| 152 </data> | |
| 153 </outputs> | |
| 154 <tests> | |
| 155 <!-- Test default inputs #1 --> | |
| 156 <test expect_num_outputs="3"> | |
| 157 <param name="input1" value="input1.fq" /> | |
| 158 <param name="input2" value="input2.fq" /> | |
| 159 <param name="merge_reads" value="true" /> | |
| 160 | |
| 161 <!-- Section for General Arguments --> | |
| 162 <section name="general_options" > | |
| 163 <param name="quality_cutoff" value="13" /> | |
| 164 <param name="min_length" value="30" /> | |
| 165 </section> | |
| 166 | |
| 167 <!-- Section for Additional Adapter/Primer Trimming Arguments --> | |
| 168 <section name="trimming_options"> | |
| 169 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> | |
| 170 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> | |
| 171 <param name="adapter_overlap" value="10" /> | |
| 172 <param name="max_mismatch_fraction" value="0.02" /> | |
| 173 <param name="min_match_fraction" value="0.87" /> | |
| 174 <param name="adapter_bandwidth" value="50" /> | |
| 175 <param name="gap_open" value="8" /> | |
| 176 <param name="gap_extend" value="2" /> | |
| 177 <param name="gap_end" value="2" /> | |
| 178 <param name="local_alignment_score" value="26" /> | |
| 179 <param name="read_alignment_bandwidth" value="50" /> | |
| 180 <param name="read_alignment_gap_open" value="26" /> | |
| 181 <param name="read_alignment_gap_extend" value="9" /> | |
| 182 <param name="read_alignment_gap_end" value="5" /> | |
| 183 <param name="read_alignment_max_gap_fraction" value="0.125" /> | |
| 184 </section> | |
| 185 <output name="output1" file="output1.fq.gz" /> | |
| 186 <output name="output2" file="output2.fq.gz" /> | |
| 187 <output name="merged" file="merged_output.fq.gz" /> | |
| 188 </test> | |
| 189 | |
| 190 <!-- Without Merging, Two Outputs #2 --> | |
| 191 <test expect_num_outputs="2"> | |
| 192 <param name="input1" value="input1.fq" /> | |
| 193 <param name="input2" value="input2.fq" /> | |
| 194 <param name="merge_reads" value="false" /> | |
| 195 | |
| 196 <!-- Section for General Arguments --> | |
| 197 <section name="general_options" > | |
| 198 <param name="quality_cutoff" value="13" /> | |
| 199 <param name="min_length" value="30" /> | |
| 200 </section> | |
| 201 | |
| 202 <!-- Section for Additional Adapter/Primer Trimming Arguments --> | |
| 203 <section name="trimming_options"> | |
| 204 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> | |
| 205 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> | |
| 206 <param name="adapter_overlap" value="10" /> | |
| 207 <param name="max_mismatch_fraction" value="0.02" /> | |
| 208 <param name="min_match_fraction" value="0.87" /> | |
| 209 <param name="adapter_bandwidth" value="50" /> | |
| 210 <param name="gap_open" value="8" /> | |
| 211 <param name="gap_extend" value="2" /> | |
| 212 <param name="gap_end" value="2" /> | |
| 213 <param name="local_alignment_score" value="26" /> | |
| 214 <param name="read_alignment_bandwidth" value="50" /> | |
| 215 <param name="read_alignment_gap_open" value="26" /> | |
| 216 <param name="read_alignment_gap_extend" value="9" /> | |
| 217 <param name="read_alignment_gap_end" value="5" /> | |
| 218 <param name="read_alignment_max_gap_fraction" value="0.125" /> | |
| 219 </section> | |
| 220 <output name="output1" file="outputNoMerge1.fq.gz" /> | |
| 221 <output name="output2" file="outputNoMerge2.fq.gz" /> | |
| 222 </test> | |
| 223 <!-- Test with Empty Input Files #3 --> | |
| 224 <test expect_num_outputs="3"> | |
| 225 <param name="input1" value="empty1.fq" /> | |
| 226 <param name="input2" value="empty2.fq" /> | |
| 227 <param name="merge_reads" value="true" /> | |
| 228 | |
| 229 <!-- Section for General Arguments --> | |
| 230 <section name="general_options" > | |
| 231 <param name="quality_cutoff" value="13" /> | |
| 232 <param name="min_length" value="30" /> | |
| 233 </section> | |
| 234 | |
| 235 <!-- Section for Additional Adapter/Primer Trimming Arguments --> | |
| 236 <section name="trimming_options"> | |
| 237 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> | |
| 238 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> | |
| 239 <param name="adapter_overlap" value="10" /> | |
| 240 <param name="max_mismatch_fraction" value="0.02" /> | |
| 241 <param name="min_match_fraction" value="0.87" /> | |
| 242 <param name="adapter_bandwidth" value="50" /> | |
| 243 <param name="gap_open" value="8" /> | |
| 244 <param name="gap_extend" value="2" /> | |
| 245 <param name="gap_end" value="2" /> | |
| 246 <param name="local_alignment_score" value="26" /> | |
| 247 <param name="read_alignment_bandwidth" value="50" /> | |
| 248 <param name="read_alignment_gap_open" value="26" /> | |
| 249 <param name="read_alignment_gap_extend" value="9" /> | |
| 250 <param name="read_alignment_gap_end" value="5" /> | |
| 251 <param name="read_alignment_max_gap_fraction" value="0.125" /> | |
| 252 </section> | |
| 253 <output name="output1" file="empty_output1.fq.gz" /> | |
| 254 <output name="output2" file="empty_output2.fq.gz" /> | |
| 255 <output name="merged" file="empty_merged_output.fq.gz" /> | |
| 256 </test> | |
| 257 | |
| 258 <!-- Advanced Functional Tests --> | |
| 259 <!-- General Arguments Test #4 --> | |
| 260 <test expect_num_outputs="2"> | |
| 261 <param name="input1" value="input1.fq" /> | |
| 262 <param name="input2" value="input2.fq" /> | |
| 263 <param name="merge_reads" value="false" /> | |
| 264 | |
| 265 <param name="quality_cutoff" value="15" /> | |
| 266 <param name="min_length" value="25" /> | |
| 267 <output name="output1" file="output1_general_args.fq.gz" /> | |
| 268 <output name="output2" file="output2_general_args.fq.gz" /> | |
| 269 </test> | |
| 270 | |
| 271 <!-- Adapter/Primer Trimming Arguements Test #5 --> | |
| 272 <test expect_num_outputs="2"> | |
| 273 <param name="input1" value="input1.fq" /> | |
| 274 <param name="input2" value="input2.fq" /> | |
| 275 <param name="merge_reads" value="false" /> | |
| 276 <section name="trimming_options"> | |
| 277 <param name="adapter_a" value="ACTGACTG" /> | |
| 278 <param name="adapter_b" value="GTGACTGA" /> | |
| 279 <param name="adapter_overlap" value="12" /> | |
| 280 <param name="max_mismatch_fraction" value="0.03" /> | |
| 281 <param name="min_match_fraction" value="0.85" /> | |
| 282 <param name="adapter_bandwidth" value="55" /> | |
| 283 <param name="gap_open" value="10" /> | |
| 284 <param name="gap_extend" value="3" /> | |
| 285 <param name="gap_end" value="3" /> | |
| 286 <param name="local_alignment_score" value="28" /> | |
| 287 </section> | |
| 288 <output name="output1" file="output1_adapter_trim.fq.gz" /> | |
| 289 <output name="output2" file="output2_adapter_trim.fq.gz" /> | |
| 290 </test> | |
| 291 <!-- Test with gzipped input files #6 --> | |
| 292 <test expect_num_outputs="3"> | |
| 293 <param name="input1" value="input1.fastq.gz" /> | |
| 294 <param name="input2" value="input2.fastq.gz" /> | |
| 295 <param name="merge_reads" value="true" /> | |
| 296 <output name="output1" file="output1_from_gzipped.fq.gz" /> | |
| 297 <output name="output2" file="output2_from_gzipped.fq.gz" /> | |
| 298 <output name="merged" file="merged_output_from_gzipped.fq.gz" /> | |
| 299 </test> | |
| 300 </tests> | |
| 301 <help><![CDATA[ | |
| 302 .. class:: warningmark | |
| 303 | |
| 304 **Caution** | |
| 305 ----------- | |
| 306 :: | |
| 307 | |
| 308 This is a modified version of the 1.2 release. Made for use with the MGnify pipeline. | |
| 309 | |
| 310 Difference in `utils.h`: | |
| 311 | |
| 312 :: | |
| 313 | |
| 314 -#define MAX_SEQ_LEN (256) | |
| 315 +#define MAX_SEQ_LEN (1024) | |
| 316 | |
| 317 **SeqPrep** | |
| 318 ----------- | |
| 319 :: | |
| 320 | |
| 321 SeqPrep is a versatile tool designed for merging overlapping paired-end Illumina reads into a single, longer read. | |
| 322 Additionally, it offers the functionality to trim adapter sequences from reads, making it a needful tool for preprocessing Illumina sequencing data. | |
| 323 | |
| 324 **Usage** | |
| 325 ========= | |
| 326 :: | |
| 327 | |
| 328 To utilize SeqPrep, start by selecting your input FASTQ files: one for the first set of reads and another for the second set. | |
| 329 SeqPrep provides several options to customize your data processing: | |
| 330 | |
| 331 - Adapter Sequences: You can provide specific sequences for adapter trimming if they are known. SeqPrep will remove these sequences from the reads. | |
| 332 - Quality Score Cutoff: Set a threshold for the quality score. Reads with quality scores below this threshold can be discarded or trimmed. | |
| 333 - Minimum Read Length: Define the minimum length for reads to be retained after trimming. Reads shorter than this length will be discarded. | |
| 334 | |
| 335 If the merging feature is enabled, SeqPrep will combine overlapping reads into longer sequences, thereby enhancing the data quality for downstream analysis. | |
| 336 | |
| 337 **Outputs** | |
| 338 =========== | |
| 339 :: | |
| 340 | |
| 341 SeqPrep generates outputs in gzipped FASTQ format. | |
| 342 | |
| 343 See more details on `SeqPrep GitHub repository <https://github.com/jstjohn/SeqPrep>`_. | |
| 344 | |
| 345 ]]></help> | |
| 346 <expand macro="citations"/> | |
| 347 </tool> |
