comparison pal_finder_wrapper.xml @ 8:4e625d3672ba draft

Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
author pjbriggs
date Wed, 16 May 2018 07:39:16 -0400
parents 5e133b7b79a6
children 52dbe2089d14
comparison
equal deleted inserted replaced
7:5e133b7b79a6 8:4e625d3672ba
1 <tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.6"> 1 <tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.7">
2 <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description> 2 <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description>
3 <macros> 3 <macros>
4 <import>pal_finder_macros.xml</import> 4 <import>pal_finder_macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <requirements>
7 <requirement type="package" version="0.02.04">pal_finder</requirement> 7 <requirement type="package" version="0.02.04">pal_finder</requirement>
8 <requirement type="package" version="2.7">python</requirement> 8 <requirement type="package" version="2.7">python</requirement>
9 <requirement type="package" version="1.65">biopython</requirement> 9 <requirement type="package" version="1.65">biopython</requirement>
10 <requirement type="package" version="2.8.1">pandaseq</requirement> 10 <requirement type="package" version="2.8.1">pandaseq</requirement>
11 </requirements> 11 </requirements>
12 <command><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
13 @CONDA_PAL_FINDER_SCRIPT_DIR@ && 13 @CONDA_PAL_FINDER_SCRIPT_DIR@ &&
14 @CONDA_PAL_FINDER_DATA_DIR@ && 14 @CONDA_PAL_FINDER_DATA_DIR@ &&
15 bash $__tool_directory__/pal_finder_wrapper.sh 15 bash $__tool_directory__/pal_finder_wrapper.sh
16 #if str( $platform.platform_type ) == "illumina" 16 #if str( $platform.platform_type ) == "illumina"
17 #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type 17 #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type
24 #end if 24 #end if
25 #else 25 #else
26 --454 "$platform.input_fasta" 26 --454 "$platform.input_fasta"
27 #end if 27 #end if
28 $output_microsat_summary $output_pal_summary 28 $output_microsat_summary $output_pal_summary
29 #if $report_bad_primer_ranges
30 --bad_primer_ranges "$output_bad_primer_read_ids"
31 #end if
29 #if $keep_config_file 32 #if $keep_config_file
30 --output_config_file "$output_config_file" 33 --output_config_file "$output_config_file"
31 #end if 34 #end if
32 --primer-prefix "$primer_prefix" 35 --primer-prefix "$primer_prefix"
33 --2merMinReps $min_2mer_repeats 36 --2merMinReps $min_2mer_repeats
58 --filter_microsats "$output_filtered_microsats" 61 --filter_microsats "$output_filtered_microsats"
59 #end for 62 #end for
60 #end if 63 #end if
61 #if str( $platform.assembly ) == '-assembly' 64 #if str( $platform.assembly ) == '-assembly'
62 $platform.assembly "$output_assembly" 65 $platform.assembly "$output_assembly"
66 #end if
67 #set $use_all_reads = $platform.subset_conditional.use_all_reads
68 #if str( $use_all_reads ) != "yes"
69 --subset "$platform.subset_conditional.subset"
63 #end if 70 #end if
64 #end if 71 #end if
65 ]]></command> 72 ]]></command>
66 <inputs> 73 <inputs>
67 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" /> 74 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" />
86 <param name="input_fastq_pair" format="fastqsanger" 93 <param name="input_fastq_pair" format="fastqsanger"
87 type="data_collection" collection_type="paired" 94 type="data_collection" collection_type="paired"
88 label="Select FASTQ dataset collection with R1/R2 pair" /> 95 label="Select FASTQ dataset collection with R1/R2 pair" />
89 </when> 96 </when>
90 </conditional> 97 </conditional>
98 <conditional name="subset_conditional">
99 <param name="use_all_reads" type="boolean" label="Use all reads for microsatellite detection?" checked="True" truevalue="yes" falsevalue="no" />
100 <when value="no">
101 <param name="subset" type="text" value="0.5" label="Number or fraction of reads to use" help="Either an integer number of reads or a decimal fraction (e.g. 0.5 to select 50% of reads)" />
102 </when>
103 <when value="yes" />
104 </conditional>
91 <param name="filters" type="select" display="checkboxes" 105 <param name="filters" type="select" display="checkboxes"
92 multiple="True" label="Filters to apply to the pal_finder results" 106 multiple="True" label="Filters to apply to the pal_finder results"
93 help="Apply none, one or more filters to refine results"> 107 help="Apply none, one or more filters to refine results">
94 <option value="-primers" selected="True">Only include loci with designed primers</option> 108 <option value="-primers" selected="True">Only include loci with designed primers</option>
95 <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option> 109 <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option>
101 </when> 115 </when>
102 <when value="454"> 116 <when value="454">
103 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> 117 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" />
104 </when> 118 </when>
105 </conditional> 119 </conditional>
106 <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" help="Set to zero to ignore repeats of this n-mer unit" /> 120 <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" min="1" help="Must detect at least one repeat of this n-mer unit" />
107 <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> 121 <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
108 <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> 122 <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
109 <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> 123 <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
110 <param name="min_6mer_repeats" type="integer" value="0" label="Minimum number of 6-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> 124 <param name="min_6mer_repeats" type="integer" value="0" label="Minimum number of 6-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
111 <conditional name="mispriming"> 125 <conditional name="mispriming">
153 help="Temperature should be in degrees Celsius" /> 167 help="Temperature should be in degrees Celsius" />
154 <param name="primer_pair_max_diff_tm" type="float" value="2.0" 168 <param name="primer_pair_max_diff_tm" type="float" value="2.0"
155 label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)" 169 label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)"
156 help="Temperature should be in degrees Celsius" /> 170 help="Temperature should be in degrees Celsius" />
157 </when> 171 </when>
172 <when value="default" />
158 </conditional> 173 </conditional>
174 <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer product size ranges" help="Can be used to screen reads in input Fastqs " />
159 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" 175 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False"
160 label="Output the config file to the history" 176 label="Output the config file to the history"
161 help="Can be used to run pal_finder outside of Galaxy" /> 177 help="Can be used to run pal_finder outside of Galaxy" />
162 </inputs> 178 </inputs>
163 <outputs> 179 <outputs>
167 </data> 183 </data>
168 <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: summary of microsatellite types" /> 184 <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: summary of microsatellite types" />
169 <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly"> 185 <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly">
170 <filter>platform['assembly'] is True</filter> 186 <filter>platform['assembly'] is True</filter>
171 </data> 187 </data>
188 <data name="output_bad_primer_read_ids" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: read IDs generating bad primer ranges">
189 <filter>report_bad_primer_ranges is True</filter>
190 </data>
172 <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file"> 191 <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file">
173 <filter>keep_config_file is True</filter> 192 <filter>keep_config_file is True</filter>
174 </data> 193 </data>
175 </outputs> 194 </outputs>
176 <tests> 195 <tests>
245 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> 264 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
246 <expand macro="output_illumina_microsat_summary" /> 265 <expand macro="output_illumina_microsat_summary" />
247 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> 266 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" />
248 <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" /> 267 <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" />
249 </test> 268 </test>
269 <!-- Test with Illumina input using subset of reads -->
270 <test>
271 <param name="platform_type" value="illumina" />
272 <param name="filters" value="" />
273 <param name="assembly" value="false" />
274 <param name="use_all_reads" value="no" />
275 <param name="subset" value="0.5" />
276 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
277 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
278 <expand macro="output_illumina_microsat_subset_summary" />
279 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_subset.out.re_match" />
280 </test>
281 <!-- Test with Illumina input filter that doesn't find any
282 microsatellites -->
283 <test expect_failure="true">
284 <param name="platform_type" value="illumina" />
285 <param name="filters" value="" />
286 <param name="assembly" value="false" />
287 <param name="min_2mer_repeats" value="8" />
288 <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" />
289 <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" />
290 <assert_stderr>
291 <has_text text="pal_finder failed to locate any microsatellites" />
292 </assert_stderr>
293 </test>
294 <!-- Test with Illumina input generating bad ranges -->
295 <test>
296 <param name="platform_type" value="illumina" />
297 <param name="filters" value="" />
298 <param name="assembly" value="false" />
299 <param name="min_2mer_repeats" value="8" />
300 <param name="input_fastq_r1" value="illuminaPE_r1_bad_ranges.fq" ftype="fastqsanger" />
301 <param name="input_fastq_r2" value="illuminaPE_r2_bad_ranges.fq" ftype="fastqsanger" />
302 <param name="min_2mer_repeats" value="8" />
303 <param name="min_3mer_repeats" value="8" />
304 <param name="min_4mer_repeats" value="8" />
305 <param name="min_5mer_repeats" value="8" />
306 <param name="min_6mer_repeats" value="8" />
307 <param name="primer_options" value="custom" />
308 <param name="primer_opt_size" value="25" />
309 <param name="primer_min_size" value="21" />
310 <param name="primer_max_size" value="30" />
311 <param name="primer_min_gc" value="40.0" />
312 <param name="primer_max_gc" value="60.0" />
313 <param name="primer_gc_clamp" value="3" />
314 <param name="primer_max_end_gc" value="5" />
315 <param name="primer_min_tm" value="60.0" />
316 <param name="primer_max_tm" value="80.0" />
317 <param name="primer_opt_tm" value="68.0" />
318 <param name="primer_pair_max_diff_tm" value="3.0" />
319 <param name="report_bad_primer_ranges" value="true" />
320 <expand macro="output_illumina_microsat_summary_bad_ranges" />
321 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_bad_ranges.out.re_match" />
322 <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_read_ids.out" />
323 </test>
324 <!-- Test with bad n-mers specified -->
325 <test expect_failure="true">
326 <param name="platform_type" value="illumina" />
327 <param name="filters" value="" />
328 <param name="assembly" value="false" />
329 <param name="min_2mer_repeats" value="8" />
330 <param name="min_3mer_repeats" value="8" />
331 <param name="min_4mer_repeats" value="0" />
332 <param name="min_5mer_repeats" value="8" />
333 <param name="min_6mer_repeats" value="8" />
334 <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" />
335 <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" />
336 <assert_stderr>
337 <has_text text="Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" />
338 </assert_stderr>
339 </test>
250 <!-- Test with 454 input --> 340 <!-- Test with 454 input -->
251 <test> 341 <test>
252 <param name="platform_type" value="454" /> 342 <param name="platform_type" value="454" />
253 <param name="input_fasta" value="454_in.fa" ftype="fasta" /> 343 <param name="input_fasta" value="454_in.fa" ftype="fasta" />
254 <expand macro="output_454_microsat_summary" /> 344 <expand macro="output_454_microsat_summary" />
275 present in high-quality assembly 365 present in high-quality assembly
276 366
277 Pal_finder runs the primer3_core program; information on the settings used in 367 Pal_finder runs the primer3_core program; information on the settings used in
278 primer3_core can be found in the Primer3 manual at 368 primer3_core can be found in the Primer3 manual at
279 http://primer3.sourceforge.net/primer3_manual.htm 369 http://primer3.sourceforge.net/primer3_manual.htm
370
371 -------------
372
373 .. class:: infomark
374
375 **Known issues**
376
377 .. class:: warning
378
379 **Low number of reads used for microsatellite detection/bad primer product size ranges**
380
381 For some datasets pal_finder may generate 'bad' product size ranges (where the
382 lower limit exceeds the upper limit) for one or more reads, for input into
383 primer3_core. In these cases primer3_core will terminate prematurely, which can
384 result in a substantially lower number of reads being used for microsatellite
385 detection and potentially sub-optimal primer design.
386
387 The number of reads generating the bad size ranges are reported in the
388 *Summary of microsat types* output dataset as 'readsWithBadRanges'. Ideally
389 the reported value should be zero.
390
391 The conditions which cause this issue within pal_finder are still unclear,
392 however we believe it to be associated with short or low quality reads. If this
393 problem affects your data then:
394
395 * Ensure that the input data are sufficiently trimmed and filtered (using
396 e.g. the Trimmomatic tool) before rerunning pal_finder.
397
398 * A list of read IDs for which pal_finder generates bad product size ranges can
399 be output by turning on *Output IDs for input reads which generate bad primer
400 ranges*. This outputs an additional dataset with a list of read IDs which can
401 be used to remove read pairs from the input Fastq files (using e.g. the *Filter
402 sequences by ID* tool) before rerunning pal_finder.
403
404 .. class:: warning
405
406 **Pal_finder takes a long time to run for large input datasets**
407
408 pal_finder was originally developed using MiSeq data, and is not optimised for
409 working with the larger Fastqs that are output from other platforms such as
410 HiSeq and NextSeq. As a consequence pal_finder may take a very long time to
411 complete when operating on larger datasets.
412
413 If this is a problem then the tool can be run using a subset of the input reads
414 by unchecking the *Use all reads...* option and entering either an integer number
415 of reads to use, or a decimal fraction (e.g. 0.5 will select 50% of the reads).
280 416
281 ------------- 417 -------------
282 418
283 .. class:: infomark 419 .. class:: infomark
284 420