Mercurial > repos > pjbriggs > pal_finder
diff pal_finder_wrapper.xml @ 2:b6ccc7dd7b02 draft
Version 0.02.04.3.
author | pjbriggs |
---|---|
date | Fri, 04 Dec 2015 07:43:30 -0500 |
parents | 771ebe02636f |
children | e1a14ed7a9d6 |
line wrap: on
line diff
--- a/pal_finder_wrapper.xml Mon Mar 23 07:01:37 2015 -0400 +++ b/pal_finder_wrapper.xml Fri Dec 04 07:43:30 2015 -0500 @@ -1,17 +1,28 @@ -<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.2"> - <description>Find microsatellite repeat elements sequencing reads and design PCR primers to amplify them</description> +<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.3"> + <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description> + <requirements> + <requirement type="package" version="5.16.3">perl</requirement> + <requirement type="package" version="0.02.04">pal_finder</requirement> + <requirement type="package" version="2.0.0">primer3_core</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="2.8.1">pandaseq</requirement> + </requirements> <command interpreter="bash">pal_finder_wrapper.sh #if str( $platform.platform_type ) == "illumina" - $platform.input_fastq_r1 $platform.input_fastq_r2 + #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type + #if $paired_input_type == "pair_of_files" + "$platform.paired_input_type_conditional.input_fastq_r1" + "$platform.paired_input_type_conditional.input_fastq_r2" + #else + "$platform.paired_input_type_conditional.input_fastq_pair.forward" + "$platform.paired_input_type_conditional.input_fastq_pair.reverse" + #end if #else - --454 $platform.input_fasta + --454 "$platform.input_fasta" #end if $output_microsat_summary $output_pal_summary - #if str( $platform.platform_type ) == "illumina" and $platform.filter_microsats - --filter_microsats $output_filtered_microsats - #end if #if $keep_config_file - --output_config_file $output_config_file + --output_config_file "$output_config_file" #end if --primer-prefix "$primer_prefix" --2merMinReps $min_2mer_repeats @@ -35,12 +46,18 @@ #if str( $mispriming.mispriming_options ) == "custom" --primer-mispriming-library $mispriming.mispriming_library #end if + #if str( $platform.platform_type ) == "illumina" + #if $platform.filters + #for $filter in str($platform.filters).split(',') + $filter + --filter_microsats "$output_filtered_microsats" + #end for + #end if + #if str( $platform.assembly ) == '-assembly' + $platform.assembly "$output_assembly" + #end if + #end if </command> - <requirements> - <requirement type="package" version="5.16.3">perl</requirement> - <requirement type="package" version="0.02.04">pal_finder</requirement> - <requirement type="package" version="2.0.0">primer3_core</requirement> - </requirements> <inputs> <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" /> <conditional name="platform"> @@ -49,11 +66,33 @@ <option value="454">454</option> </param> <when value="illumina"> - <param name="input_fastq_r1" type="data" format="fastqsanger" label="Illumina fastq file (read 1)" /> - <param name="input_fastq_r2" type="data" format="fastqsanger" label="Illumina fastq file (read 2)" /> - <param name="filter_microsats" type="boolean" truevalue="True" falsevalue="False" - label="Filter and sort the microsatellites" checked="True" - help="Filter pal_finder results to only include lines with primer sequences and remove non-perfect repeats" /> + <conditional name="paired_input_type_conditional"> + <param name="paired_input_type" type="select" label="Input Type"> + <option value="pair_of_files" selected="true">Pair of datasets</option> + <option value="collection">Dataset collection pair</option> + </param> + <when value="pair_of_files"> + <param name="input_fastq_r1" type="data" format="fastqsanger" + label="Illumina fastq file (read 1)" /> + <param name="input_fastq_r2" type="data" format="fastqsanger" + label="Illumina fastq file (read 2)" /> + </when> + <when value="collection"> + <param name="input_fastq_pair" format="fastqsanger" + type="data_collection" collection_type="paired" + label="Select FASTQ dataset collection with R1/R2 pair" /> + </when> + </conditional> + <param name="filters" type="select" display="checkboxes" + multiple="True" label="Filters to apply to the pal_finder results" + help="Apply none, one or more filters to refine results"> + <option value="-primers" selected="True">Only include loci with designed primers</option> + <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option> + <option value="-rankmotifs" selected="True">Only include loci with 'perfect' motifs, and rank by motif size</option> + </param> + <param name="assembly" type="boolean" + checked="True" truevalue="-assembly" falsevalue="" + label="Use PANDAseq to assemble paired-end reads and confirm primer sequences are present in high-quality assembly" /> </when> <when value="454"> <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> @@ -117,12 +156,15 @@ help="Can be used to run pal_finder outside of Galaxy" /> </inputs> <outputs> - <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix} (microsatellite types)" /> - <data name="output_pal_summary" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix} (microsatellites with read IDs and primer pairs)" /> - <data name="output_filtered_microsats" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix} (filtered and sorted microsatellites)"> - <filter>platform['platform_type'] == 'illumina' and platform['filter_microsats']</filter> + <data name="output_pal_summary" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: all microsatellites (full details)" /> + <data name="output_filtered_microsats" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: filtered microsatellites (full details)"> + <filter>platform['platform_type'] == 'illumina' and platform['filters'] is not None</filter> </data> - <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix} (config file)"> + <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: summary of microsatellite types" /> + <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly"> + <filter>platform['assembly'] is True</filter> + </data> + <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file"> <filter>keep_config_file is True</filter> </data> </outputs> @@ -132,24 +174,77 @@ <param name="platform_type" value="illumina" /> <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> - <!-- - **NB** outputs have to be specified in order that they appear in the - tool (which is the order they will be written to the history) - the - test framework seems to use the order and ignores the "name" attribute - --> + <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> + <output name="output_pal_summary" file="illuminaPE_microsats.out" /> + <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" /> + <output name="output_assembly" file="illuminaPE_assembly_after_filters.out" /> + </test> + <test> + <!-- Test with Illumina input as dataset pair --> + <param name="platform_type" value="illumina" /> + <param name="paired_input_type" value="collection" /> + <param name="input_fastq_pair"> + <collection type="paired"> + <element name="forward" value="illuminaPE_r1.fq" ftype="fastqsanger" /> + <element name="reverse" value="illuminaPE_r2.fq" ftype="fastqsanger" /> + </collection> + </param> <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> <output name="output_pal_summary" file="illuminaPE_microsats.out" /> <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" /> + <output name="output_assembly" file="illuminaPE_assembly_after_filters.out" /> + </test> + <test> + <!-- Test with Illumina input filter to loci with PandaSEQ assembly + ('-assembly' option) --> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> + <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> + <output name="output_pal_summary" file="illuminaPE_microsats.out" /> + <output name="output_assembly" file="illuminaPE_assembly.out" /> + </test> + <test> + <!-- Test with Illumina input filter to loci with primers + ('-primers' option) --> + <param name="platform_type" value="illumina" /> + <param name="filters" value="-primers" /> + <param name="assembly" value="false" /> + <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> + <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> + <output name="output_pal_summary" file="illuminaPE_microsats.out" /> + <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_primers.out" /> + </test> + <test> + <!-- Test with Illumina input filter to loci which appear only once + ('-occurrences' option) --> + <param name="platform_type" value="illumina" /> + <param name="filters" value="-occurrences" /> + <param name="assembly" value="false" /> + <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> + <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> + <output name="output_pal_summary" file="illuminaPE_microsats.out" /> + <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_occurrences.out" /> + </test> + <test> + <!-- Test with Illumina input filter and rank loci with perfect motifs + ('-rankmotifs' option) --> + <param name="platform_type" value="illumina" /> + <param name="filters" value="-rankmotifs" /> + <param name="assembly" value="false" /> + <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> + <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> + <output name="output_pal_summary" file="illuminaPE_microsats.out" /> + <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_rankmotifs.out" /> </test> <test> <!-- Test with 454 input --> <param name="platform_type" value="454" /> <param name="input_fasta" value="454_in.fa" ftype="fasta" /> - <!-- - **NB** outputs have to be specified in order that they appear in the - tool (which is the order they will be written to the history) - the - test framework seems to use the order and ignores the "name" attribute - --> <output name="output_microsat_summary" file="454_microsat_types.out" /> <output name="output_pal_summary" file="454_microsats.out" /> </test> @@ -163,9 +258,15 @@ directly from raw 454 or Illumina paired-end sequencing reads. It then designs PCR primers to amplify these repeat loci (Potentially Amplifiable Loci: PAL). -Optionally for Illumina data, the output from pal_finder can also be filtered to -remove any motifs without primer sequences, and with non-perfect microsatellites. -The microsatellites are then ranked by motif size (largest to smallest). +Optionally for Illumina data, one or more filters can be applied to the output from +pal_finder to: + + * Only include loci with designed primers + * Exclude loci where the primer sequences occur more than once in the reads + * Only include loci with 'perfect' motifs (and rank by motif size,largest to + smallest) + * Use PANDAseq to assemble paired-end reads and confirm primer sequences are + present in high-quality assembly Pal_finder runs the primer3_core program; information on the settings used in primer3_core can be found in the Primer3 manual at @@ -199,12 +300,12 @@ The paper is available at http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf -The filtering and sorting of the pal_finder output for Illumina data is performed -using a Perl script written by Graeme Fox at the University of Manchester, and which -is included with this tool. +The filtering and assembly of the pal_finder output for Illumina data is performed +using a Python utility written by Graeme Fox at the University of Manchester, and which +is included with this tool; this utility uses the BioPython and PANDAseq packages. Please kindly acknowledge both this Galaxy tool, the pal_finder and primer3 packages, and -the utility script if you use it in your work. +the utility script and its dependencies if you use it in your work. </help> <citations> <!-- @@ -214,7 +315,7 @@ --> <citation type="doi">10.1371/journal.pone.0030953</citation> <citation type="bibtex">@Article{pmid10547847, - Author="Rozen, S. and Skaletsky, H. ", + Author="Rozen, S. and Skaletsky, H. ", Title="{{P}rimer3 on the {W}{W}{W} for general users and for biologist programmers}", Journal="Methods Mol. Biol.", Year="2000", @@ -222,5 +323,7 @@ Pages="365--386", URL="{http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf}" }</citation> + <citation type="doi">10.1093/bioinformatics/btp163</citation> + <citation type="doi">10.1186/1471-2105-13-31</citation> </citations> </tool>