Mercurial > repos > iuc > snp_sites
changeset 1:5804f786060d draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snp-sites commit e4222f6179041846870721bfcb4d8e587ead8d8f"
author | iuc |
---|---|
date | Sat, 02 Nov 2019 11:12:17 -0400 |
parents | efe8f0e53d3e |
children | |
files | snp-sites.xml test-data/output_actg.fasta test-data/output_counts.tabular test-data/output_monomorphic.fasta test-data/output_monomorphic_actg.fasta |
diffstat | 5 files changed, 157 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/snp-sites.xml Fri Jun 30 16:43:26 2017 -0400 +++ b/snp-sites.xml Sat Nov 02 11:12:17 2019 -0400 @@ -1,30 +1,140 @@ <?xml version='1.0' encoding='UTF-8'?> -<tool id='snp_sites' name='Finds SNP sites' version='0.1.0'> +<tool id='snp_sites' name='Finds SNP sites' version='@TOOL_VERSION@+galaxy0'> <description>from a multi-FASTA alignment file</description> + <macros> + <token name="@TOOL_VERSION@">2.5.1</token> + </macros> <requirements> - <requirement type='package' version='2.3.2'>snp-sites</requirement> + <requirement type='package' version='@TOOL_VERSION@'>snp-sites</requirement> </requirements> <version_command>snp-sites -V</version_command> <command detect_errors='aggressive'><![CDATA[ +#if str($out_type.ot_select) == "alignment" + #if str($out_type.formats) == 'None' + echo "Error, no output format specified" >&2 && exit 1 ; + #set outputs="" + #else + #set outputs=" ".join(str($out_type.formats).split(",")) + #end if +#end if + snp-sites --mvp --o output -'$input_fasta' +#if str($out_type.ot_select) == "alignment" + $outputs + $out_type.b + $out_type.c +#else if str($out_type.ot_select) == "counts" + -C +#end if + -o output + '$input_fasta' + +#if str($out_type.ot_select) == "counts" + && mv output $output_counts +#else if ',' in str($out_type.formats) + #if '-m' in str($out_type.formats) + && mv 'output.snp_sites.aln' '$output_fasta' + #end if + #if '-v' in str($out_type.formats) + && mv 'output.vcf' '$output_vcf' + #end if + #if '-p' in str($out_type.formats) + && mv 'output.phylip' '$output_phylip' + #end if +#else + #if str($out_type.formats) == '-m' + && mv 'output' '$output_fasta' + #else if str($out_type.formats) == '-v' + && mv 'output' '$output_vcf' + #else if str($out_type.formats) == '-p' + && mv 'output' '$output_phylip' + #end if +#end if ]]></command> <inputs> + <param name='input_fasta' format='fasta' type='data' label='FASTA file' /> + <conditional name="out_type" label="Output type"> + <param type="select" name="ot_select" label="Output"> + <option value="alignment" selected="true">Sequence alignment / VCF</option> + <option value="counts">Constant site count (suitable for IQ-TREE -fconst)</option> + </param> + <when value="alignment"> + <param type='select' multiple='true' name='formats' label='Output formats' help='Output format to use if not counting constant sites'> + <option value="-m" selected="true">Multi-FASTA alignment file</option> + <option value="-v">VCF</option> + <option value="-p">Phylip</option> + </param> + <param argument="-b" label="Output constant sites (suitable for use in BEAST)" truevalue="-b" falsevalue="" + type="boolean" help="Output monomorphic (constant) as opposed to polymorphic (variable) sites"/> + <param argument="-c" label="Only output columns containing exclusively ACTG" truevalue="-c" falsevalue="" + type="boolean" help="Ignore columns containing ambiguous characters and gaps"/> + </when> + <when value="counts"> + </when> + </conditional> </inputs> <outputs> - <data name='output_fasta' format='fasta' label='${tool.name} on ${on_string}: FASTA' from_work_dir='output.snp_sites.aln'/> - <data name='output_vcf' format='vcf' label='${tool.name} on ${on_string}: VCF' from_work_dir='output.vcf'/> - <data name='output_phylip' format='phylip' label='${tool.name} on ${on_string}: phylip' from_work_dir='output.phylip'/> + <data name='output_fasta' format='fasta' label='${tool.name} on ${on_string}: FASTA'> + <!-- this becomes a catch-all for when no format is specified. if no format is specified the job ends with an error but shows 1 red dataset output --> + <filter>(out_type["ot_select"] == "alignment" and out_type["formats"] is not None and '-m' in out_type["formats"]) or (out_type["ot_select"] == "alignment" and out_type.formats is None)</filter> + </data> + <data name='output_vcf' format='vcf' label='${tool.name} on ${on_string}: VCF' from_work_dir='output.vcf'> + <filter>out_type["ot_select"] == "alignment" and out_type["formats"] is not None and '-v' in out_type["formats"]</filter> + </data> + <data name='output_phylip' format='phylip' label='${tool.name} on ${on_string}: phylip'> + <filter>out_type["ot_select"] == "alignment" and out_type["formats"] is not None and '-p' in out_type["formats"]</filter> + </data> + <data name='output_counts' format='tabular' label='${tool.name} on ${on_string}: constant site counts'> + <filter>out_type["ot_select"] == "counts"</filter> + </data> </outputs> <tests> - <test> + <test expect_num_outputs="3"> + <param name='input_fasta' value='input.fasta' ftype='fasta' /> + <conditional name="out_type"> + <param name="ot_select" value="alignment" /> + <param name='formats' value='-m,-p,-v' /> + </conditional> + <output name='output_fasta' value='output.fasta' ftype='fasta' /> + <output name='output_vcf' value='output.vcf' ftype='vcf' /> + <output name='output_phylip' value='output.phylip' ftype='phylip' /> + </test> + <test expect_num_outputs="1"> + <param name='input_fasta' value='input.fasta' ftype='fasta' /> + <conditional name="out_type"> + <param name="ot_select" value="alignment" /> + <param name='formats' value='-m' /> + <param name='b' value='True' /> + </conditional> + <output name='output_fasta' value='output_monomorphic.fasta' ftype='fasta' /> + </test> + <test expect_num_outputs="1"> <param name='input_fasta' value='input.fasta' ftype='fasta' /> - <output name='output_fasta' value='output.fasta' ftype='fasta' /> - <output name='output_vcf' value='output.vcf' ftype='vcf' /> - <output name='output_phylip' value='output.phylip' ftype='phylip' /> + <conditional name="out_type"> + <param name="ot_select" value="alignment" /> + <param name='formats' value='-m' /> + <param name='c' value='True' /> + </conditional> + <output name='output_fasta' value='output_actg.fasta' ftype='fasta' /> + </test> + <test expect_num_outputs="1"> + <param name='input_fasta' value='input.fasta' ftype='fasta' /> + <conditional name="out_type"> + <param name="ot_select" value="alignment" /> + <param name='formats' value='-m' /> + <param name='b' value='True' /> + <param name="c" value="True" /> + </conditional> + <output name='output_fasta' value='output_monomorphic_actg.fasta' ftype='fasta' /> + </test> + <test expect_num_outputs="1"> + <param name='input_fasta' value='input.fasta' ftype='fasta' /> + <conditional name="out_type"> + <param name="ot_select" value="counts" /> + <param name='C' value='True' /> + </conditional> + <output name='output_counts' value='output_counts.tabular' ftype='tabular' /> </test> </tests> <help> @@ -32,7 +142,11 @@ **SNP-sites** -This tool can rapidly extract SNPs from a multi-FASTA alignment using modest resources and can output results in multiple formats for downstream analysis. SNPs can be extracted from a 8.3 GB alignment file (1,842 taxa, 22,618 sites) in 267 seconds using 59 MB of RAM and 1 CPU core, making it feasible to run on modest computers. SNP-sites is implemented in C and is available under the open source license GNU GPL version 3. +This tool can rapidly extract SNPs from a multi-FASTA alignment using modest resources and can output results in multiple formats for downstream analysis. SNPs can be extracted from a 8.3 GB alignment file (1,842 taxa, 22,618 sites) in 267 seconds using 59 MB of RAM and 1 CPU core, making it feasible to run on modest computers. +SNP-sites is implemented in C and is available under the open source license GNU GPL version 3. + +Alternatively it can report on the number of constant sites in an alignment in a format (A count, C count, G count, T count) suitable for passing +to IQ-TREE's `-fconst` option. **Input FASTA format:** @@ -49,6 +163,16 @@ >sample1 AAACGCATTCAN +**Options** + +In addition to the default output mode, this tool supports three other options: + +1. Output only constant (monomorphic) (i.e. not polymorphic) sites (useful for input to BEAST) + +2. Output only columns that have purely ACTGs in them (i.e. no gaps or ambiguous nucleotides). This can be used together with the previous option. + +3. Output only a count of constant sites, in a format suitable for use with IQ-TREE `-fconst`. This option cannot be used together with the previously mentioned options. + **Output files:** The output of the tool are three different files in following format: @@ -56,6 +180,7 @@ - a multi fasta alignment, - relaxed phylip format and, - VCF. +- alternatively constant site counts (As, Cs, Gs and Ts). The VCF file for the above specified input is
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_actg.fasta Sat Nov 02 11:12:17 2019 -0400 @@ -0,0 +1,6 @@ +>sample1 +GA +>sample1 +GA +>sample1 +AG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_counts.tabular Sat Nov 02 11:12:17 2019 -0400 @@ -0,0 +1,1 @@ +4,4,0,1