Mercurial > repos > pjbriggs > pal_finder
annotate pal_finder_wrapper.xml @ 4:cb56cc1d5c39 draft
Updates to the palfilter.py utility.
author | pjbriggs |
---|---|
date | Mon, 21 Mar 2016 06:52:43 -0400 |
parents | e1a14ed7a9d6 |
children | a73c48890bde |
rev | line source |
---|---|
3
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
1 <tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.4"> |
2 | 2 <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description> |
3 <requirements> | |
4 <requirement type="package" version="5.16.3">perl</requirement> | |
5 <requirement type="package" version="0.02.04">pal_finder</requirement> | |
6 <requirement type="package" version="2.0.0">primer3_core</requirement> | |
7 <requirement type="package" version="1.65">biopython</requirement> | |
8 <requirement type="package" version="2.8.1">pandaseq</requirement> | |
9 </requirements> | |
0 | 10 <command interpreter="bash">pal_finder_wrapper.sh |
11 #if str( $platform.platform_type ) == "illumina" | |
2 | 12 #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type |
13 #if $paired_input_type == "pair_of_files" | |
14 "$platform.paired_input_type_conditional.input_fastq_r1" | |
15 "$platform.paired_input_type_conditional.input_fastq_r2" | |
16 #else | |
17 "$platform.paired_input_type_conditional.input_fastq_pair.forward" | |
18 "$platform.paired_input_type_conditional.input_fastq_pair.reverse" | |
19 #end if | |
0 | 20 #else |
2 | 21 --454 "$platform.input_fasta" |
0 | 22 #end if |
23 $output_microsat_summary $output_pal_summary | |
24 #if $keep_config_file | |
2 | 25 --output_config_file "$output_config_file" |
0 | 26 #end if |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
27 --primer-prefix "$primer_prefix" |
0 | 28 --2merMinReps $min_2mer_repeats |
29 --3merMinReps $min_3mer_repeats | |
30 --4merMinReps $min_4mer_repeats | |
31 --5merMinReps $min_5mer_repeats | |
32 --6merMinReps $min_6mer_repeats | |
33 #if str( $primer.primer_options ) == "custom" | |
34 --primer-opt-size $primer.primer_opt_size | |
35 --primer-min-size $primer.primer_min_size | |
36 --primer-max-size $primer.primer_max_size | |
37 --primer-min-gc $primer.primer_min_gc | |
38 --primer-max-gc $primer.primer_max_gc | |
39 --primer-gc-clamp $primer.primer_gc_clamp | |
40 --primer-max-end-gc $primer.primer_max_end_gc | |
41 --primer-min-tm $primer.primer_min_tm | |
42 --primer-max-tm $primer.primer_max_tm | |
43 --primer-opt-tm $primer.primer_opt_tm | |
44 --primer-pair-max-diff-tm $primer.primer_pair_max_diff_tm | |
45 #end if | |
46 #if str( $mispriming.mispriming_options ) == "custom" | |
47 --primer-mispriming-library $mispriming.mispriming_library | |
48 #end if | |
2 | 49 #if str( $platform.platform_type ) == "illumina" |
50 #if $platform.filters | |
51 #for $filter in str($platform.filters).split(',') | |
52 $filter | |
53 --filter_microsats "$output_filtered_microsats" | |
54 #end for | |
55 #end if | |
56 #if str( $platform.assembly ) == '-assembly' | |
57 $platform.assembly "$output_assembly" | |
58 #end if | |
59 #end if | |
0 | 60 </command> |
61 <inputs> | |
62 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" /> | |
63 <conditional name="platform"> | |
64 <param name="platform_type" type="select" label="Sequencing platform used to generate data" help="Currently pal_finder only handles Illumina paired-end reads and 454 single-end reads" > | |
65 <option value="illumina" selected="true">Illumina</option> | |
66 <option value="454">454</option> | |
67 </param> | |
68 <when value="illumina"> | |
2 | 69 <conditional name="paired_input_type_conditional"> |
70 <param name="paired_input_type" type="select" label="Input Type"> | |
71 <option value="pair_of_files" selected="true">Pair of datasets</option> | |
72 <option value="collection">Dataset collection pair</option> | |
73 </param> | |
74 <when value="pair_of_files"> | |
75 <param name="input_fastq_r1" type="data" format="fastqsanger" | |
76 label="Illumina fastq file (read 1)" /> | |
77 <param name="input_fastq_r2" type="data" format="fastqsanger" | |
78 label="Illumina fastq file (read 2)" /> | |
79 </when> | |
80 <when value="collection"> | |
81 <param name="input_fastq_pair" format="fastqsanger" | |
82 type="data_collection" collection_type="paired" | |
83 label="Select FASTQ dataset collection with R1/R2 pair" /> | |
84 </when> | |
85 </conditional> | |
86 <param name="filters" type="select" display="checkboxes" | |
87 multiple="True" label="Filters to apply to the pal_finder results" | |
88 help="Apply none, one or more filters to refine results"> | |
89 <option value="-primers" selected="True">Only include loci with designed primers</option> | |
90 <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option> | |
91 <option value="-rankmotifs" selected="True">Only include loci with 'perfect' motifs, and rank by motif size</option> | |
92 </param> | |
93 <param name="assembly" type="boolean" | |
94 checked="True" truevalue="-assembly" falsevalue="" | |
95 label="Use PANDAseq to assemble paired-end reads and confirm primer sequences are present in high-quality assembly" /> | |
0 | 96 </when> |
97 <when value="454"> | |
98 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> | |
99 </when> | |
100 </conditional> | |
101 <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" help="Set to zero to ignore repeats of this n-mer unit" /> | |
102 <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> | |
103 <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> | |
104 <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> | |
105 <param name="min_6mer_repeats" type="integer" value="0" label="Minimum number of 6-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> | |
106 <conditional name="mispriming"> | |
107 <param name="mispriming_options" type="select" label="Mispriming library to use" help="Specify file of nucleotide sequences to avoid amplifying (PRIMER_MISPRIMING_LIBRARY)"> | |
108 <option value="default">Default from pal_finder</option> | |
109 <option value="custom">Custom sequences from history</option> | |
110 </param> | |
111 <when value="default"> | |
112 </when> | |
113 <when value="custom"> | |
114 <param name="mispriming_library" type="data" format="fasta" label="Select mispriming library from history" help="Fasta file containing sequences to avoid amplifying" /> | |
115 </when> | |
116 </conditional> | |
117 <conditional name="primer"> | |
118 <param name="primer_options" type="select" label="Primer settings to use" help="Advanced users can customise the settings for primer3 for more control"> | |
119 <option value="default">Defaults for pal_finder</option> | |
120 <option value="custom">Custom</option> | |
121 </param> | |
122 <when value="custom"> | |
123 <param name="primer_opt_size" type="integer" value="20" | |
124 label="Optimum length (in bases) of a primer (PRIMER_OPT_SIZE)" | |
125 help="Primer3 will attempt to pick primers close to this length" /> | |
126 <param name="primer_min_size" type="integer" value="18" | |
127 label="Minimum acceptable length (in bases) of a primer (PRIMER_MIN_SIZE)" | |
128 help="Must be greater than 0 and less than or equal to PRIMER_MAX_SIZE" /> | |
129 <param name="primer_max_size" type="integer" value="30" | |
130 label="Maximum acceptable length (in bases) of a primer (PRIMER_MAX_SIZE)" | |
131 help="Currently this parameter cannot be larger than 35. This limit is governed by maximum oligo size for which primer3's melting-temperature is valid" /> | |
132 <param name="primer_min_gc" type="float" value="30.0" | |
133 label="Minimum allowable percentage of Gs and Cs in any primer (PRIMER_MIN_GC)" /> | |
134 <param name="primer_max_gc" type="float" value="80.0" | |
135 label="Maximum allowable percentage of Gs and Cs in any primer (PRIMER_MAX_GC)" /> | |
136 <param name="primer_gc_clamp" type="integer" value="2" | |
137 label="Specify number of consecutive Gs and Cs at 3' end of both the left and right primer (PRIMER_GC_CLAMP)" /> | |
138 <param name="primer_max_end_gc" type="integer" value="5" | |
139 label="Maximum number of Gs or Cs allowed in last five 3' bases of a left or right primer (PRIMER_MAX_END_GC)" /> | |
140 <param name="primer_min_tm" type="float" value="58.0" | |
141 label="Minimum acceptable melting temperature for a primer oligo (PRIMER_MIN_TM)" | |
142 help="Temperature should be in degrees Celsius" /> | |
143 <param name="primer_max_tm" type="float" value="65.0" | |
144 label="Maximum acceptable melting temperature for a primer oligo (PRIMER_MAX_TM)" | |
145 help="Temperature should be in degrees Celsius" /> | |
146 <param name="primer_opt_tm" type="float" value="62.0" | |
147 label="Optimum melting temperature for a primer (PRIMER_OPT_TM)" | |
148 help="Temperature should be in degrees Celsius" /> | |
149 <param name="primer_pair_max_diff_tm" type="float" value="2.0" | |
150 label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)" | |
151 help="Temperature should be in degrees Celsius" /> | |
152 </when> | |
153 </conditional> | |
154 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" | |
155 label="Output the config file to the history" | |
156 help="Can be used to run pal_finder outside of Galaxy" /> | |
157 </inputs> | |
158 <outputs> | |
2 | 159 <data name="output_pal_summary" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: all microsatellites (full details)" /> |
160 <data name="output_filtered_microsats" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: filtered microsatellites (full details)"> | |
161 <filter>platform['platform_type'] == 'illumina' and platform['filters'] is not None</filter> | |
0 | 162 </data> |
2 | 163 <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: summary of microsatellite types" /> |
164 <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly"> | |
165 <filter>platform['assembly'] is True</filter> | |
166 </data> | |
167 <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file"> | |
0 | 168 <filter>keep_config_file is True</filter> |
169 </data> | |
170 </outputs> | |
171 <tests> | |
172 <test> | |
173 <!-- Test with Illumina input --> | |
174 <param name="platform_type" value="illumina" /> | |
175 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> | |
176 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> | |
2 | 177 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> |
178 <output name="output_pal_summary" file="illuminaPE_microsats.out" /> | |
179 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" /> | |
180 <output name="output_assembly" file="illuminaPE_assembly_after_filters.out" /> | |
181 </test> | |
182 <test> | |
183 <!-- Test with Illumina input as dataset pair --> | |
184 <param name="platform_type" value="illumina" /> | |
185 <param name="paired_input_type" value="collection" /> | |
186 <param name="input_fastq_pair"> | |
187 <collection type="paired"> | |
188 <element name="forward" value="illuminaPE_r1.fq" ftype="fastqsanger" /> | |
189 <element name="reverse" value="illuminaPE_r2.fq" ftype="fastqsanger" /> | |
190 </collection> | |
191 </param> | |
0 | 192 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> |
193 <output name="output_pal_summary" file="illuminaPE_microsats.out" /> | |
194 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" /> | |
2 | 195 <output name="output_assembly" file="illuminaPE_assembly_after_filters.out" /> |
196 </test> | |
197 <test> | |
198 <!-- Test with Illumina input filter to loci with PandaSEQ assembly | |
199 ('-assembly' option) --> | |
200 <param name="platform_type" value="illumina" /> | |
201 <param name="filters" value="" /> | |
202 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> | |
203 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> | |
204 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> | |
205 <output name="output_pal_summary" file="illuminaPE_microsats.out" /> | |
206 <output name="output_assembly" file="illuminaPE_assembly.out" /> | |
207 </test> | |
208 <test> | |
209 <!-- Test with Illumina input filter to loci with primers | |
210 ('-primers' option) --> | |
211 <param name="platform_type" value="illumina" /> | |
212 <param name="filters" value="-primers" /> | |
213 <param name="assembly" value="false" /> | |
214 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> | |
215 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> | |
216 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> | |
217 <output name="output_pal_summary" file="illuminaPE_microsats.out" /> | |
218 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_primers.out" /> | |
219 </test> | |
220 <test> | |
221 <!-- Test with Illumina input filter to loci which appear only once | |
222 ('-occurrences' option) --> | |
223 <param name="platform_type" value="illumina" /> | |
224 <param name="filters" value="-occurrences" /> | |
225 <param name="assembly" value="false" /> | |
226 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> | |
227 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> | |
228 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> | |
229 <output name="output_pal_summary" file="illuminaPE_microsats.out" /> | |
230 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_occurrences.out" /> | |
231 </test> | |
232 <test> | |
233 <!-- Test with Illumina input filter and rank loci with perfect motifs | |
234 ('-rankmotifs' option) --> | |
235 <param name="platform_type" value="illumina" /> | |
236 <param name="filters" value="-rankmotifs" /> | |
237 <param name="assembly" value="false" /> | |
238 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> | |
239 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> | |
240 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> | |
241 <output name="output_pal_summary" file="illuminaPE_microsats.out" /> | |
242 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_rankmotifs.out" /> | |
0 | 243 </test> |
244 <test> | |
245 <!-- Test with 454 input --> | |
246 <param name="platform_type" value="454" /> | |
247 <param name="input_fasta" value="454_in.fa" ftype="fasta" /> | |
248 <output name="output_microsat_summary" file="454_microsat_types.out" /> | |
249 <output name="output_pal_summary" file="454_microsats.out" /> | |
250 </test> | |
251 </tests> | |
252 <help> | |
253 .. class:: infomark | |
254 | |
255 **What it does** | |
256 | |
257 This tool runs the pal_finder program, which finds microsatellite repeat elements | |
258 directly from raw 454 or Illumina paired-end sequencing reads. It then designs PCR | |
259 primers to amplify these repeat loci (Potentially Amplifiable Loci: PAL). | |
260 | |
2 | 261 Optionally for Illumina data, one or more filters can be applied to the output from |
262 pal_finder to: | |
263 | |
264 * Only include loci with designed primers | |
265 * Exclude loci where the primer sequences occur more than once in the reads | |
266 * Only include loci with 'perfect' motifs (and rank by motif size,largest to | |
267 smallest) | |
268 * Use PANDAseq to assemble paired-end reads and confirm primer sequences are | |
269 present in high-quality assembly | |
0 | 270 |
271 Pal_finder runs the primer3_core program; information on the settings used in | |
272 primer3_core can be found in the Primer3 manual at | |
273 http://primer3.sourceforge.net/primer3_manual.htm | |
274 | |
275 ------------- | |
276 | |
277 .. class:: infomark | |
278 | |
279 **Credits** | |
280 | |
281 This Galaxy tool has been developed by Peter Briggs within the Bioinformatics Core | |
282 Facility at the University of Manchester. It runs the pal_finder package which can be | |
283 obtained from http://sourceforge.net/projects/palfinder/: | |
284 | |
285 * PLoS One. 2012; 7(2): e30953 "Rapid Microsatellite Identification from Illumina Paired-End | |
286 Genomic Sequencing in Two Birds and a Snake" Todd A. Castoe, Alexander W. Poole, A. P. | |
287 Jason de Koning, Kenneth L. Jones, Diana F. Tomback, Sara J. Oyler-McCance, Jennifer A. | |
288 Fike, Stacey L. Lance, Jeffrey W. Streicher, Eric N. Smith, and David D. Pollock | |
289 | |
290 The paper is available at http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3279355/ | |
291 | |
292 This tool is compatible with pal_finder version 0.02.04, which in turn runs the | |
293 primer3_core program (version 2.0.0-alpha is required, available from | |
294 http://primer3.sourceforge.net/releases.php): | |
295 | |
296 * Steve Rozen and Helen J. Skaletsky (2000) "Primer3 on the WWW for general users and for | |
297 biologist programmers". In: Krawetz S, Misener S (eds) Bioinformatics Methods and | |
298 Protocols: Methods in Molecular Biology. Humana Press, Totowa, NJ, pp 365-386 | |
299 | |
300 The paper is available at | |
301 http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf | |
302 | |
2 | 303 The filtering and assembly of the pal_finder output for Illumina data is performed |
304 using a Python utility written by Graeme Fox at the University of Manchester, and which | |
305 is included with this tool; this utility uses the BioPython and PANDAseq packages. | |
0 | 306 |
307 Please kindly acknowledge both this Galaxy tool, the pal_finder and primer3 packages, and | |
2 | 308 the utility script and its dependencies if you use it in your work. |
0 | 309 </help> |
310 <citations> | |
311 <!-- | |
312 See https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set | |
313 Can be either DOI or Bibtex | |
314 Use http://www.bioinformatics.org/texmed/ to convert PubMed to Bibtex | |
315 --> | |
316 <citation type="doi">10.1371/journal.pone.0030953</citation> | |
317 <citation type="bibtex">@Article{pmid10547847, | |
2 | 318 Author="Rozen, S. and Skaletsky, H. ", |
0 | 319 Title="{{P}rimer3 on the {W}{W}{W} for general users and for biologist programmers}", |
320 Journal="Methods Mol. Biol.", | |
321 Year="2000", | |
322 Volume="132", | |
323 Pages="365--386", | |
324 URL="{http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf}" | |
325 }</citation> | |
2 | 326 <citation type="doi">10.1093/bioinformatics/btp163</citation> |
327 <citation type="doi">10.1186/1471-2105-13-31</citation> | |
0 | 328 </citations> |
329 </tool> |