Mercurial > repos > gene > raptor_seqan3
changeset 0:bbdc3fdf5298 draft default tip
"planemo upload commit 79f62a93f1c45ae643ce01ff3dcf9662c304c11b-dirty"
author | gene |
---|---|
date | Mon, 04 Oct 2021 09:54:45 +0000 |
parents | |
children | |
files | README.md raptor-build.xml raptor-search.xml test-data/expected_query_results.txt test-data/genome0.fasta test-data/genome1.fasta test-data/genome_files.txt test-data/query.fasta test-data/test_expected.index |
diffstat | 9 files changed, 310 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Oct 04 09:54:45 2021 +0000 @@ -0,0 +1,40 @@ +# raptor-galaxy + +This readme explains how to add a tool to bioconda and galaxy (by using planemo). + +## Bioconda +Bioconda has so called recipes for every available tool which are saved in their git repo https://github.com/bioconda/bioconda-recipes . +To get your tool into bioconda you have to create a PR with a recipe for your tool. +You can use the recipe of raptor as a template: https://github.com/bioconda/bioconda-recipes/tree/master/recipes/raptor + +## Install and setup of planemo +The next 3 steps will install planemo into the directory `.venv-planemo`. +``` +$ virtualenv .venv-planemo; source .venv-planemo/bin/activate +$ pip install "pip>=7" +$ pip install planemo +``` +Each time you open a new console and want to use planemo, it is needed to activate the planemo environment: +``` +$ source .venv-planemo/bin/activate +``` +For more information on how to install planemo checkout the project https://github.com/galaxyproject/planemo . + + +## Account on the toolshed +There exists the normal toolshed at https://toolshed.g2.bx.psu.edu/ and the test toolshed at https://testtoolshed.g2.bx.psu.edu/. +The steps for both are the same. We will show here how to use the testtoolshed. +- create an account on https://testtoolshed.g2.bx.psu.edu +- run `$ planemo config_init` to create a planemo config +- insert shed_username and api key into ~/.planemo.yml + +## Creating a new description for a tool +Lets assume you want to add a new subcommand to raptor called `newsubcommand`. +- copy raptor-build.xml to raptor-newsubcommand.xml +- adjust the xml file to own needs +- run `$ planemo test raptor-newsubcommand.xml` to check tests +- run `$ planemo lint raptor-newsubcommand.xml` to lint + +## Publishing your tool +Make sure to bump the version number of the tool for every new release (see xml tag <tool version="..."> ). +- run `$ planemo shed_update --shed_target testtoolshed path/to/this/repo` to publish the tools of this repository
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/raptor-build.xml Mon Oct 04 09:54:45 2021 +0000 @@ -0,0 +1,152 @@ +<tool id="raptor-build" name="raptor build" version="2.0.0+8" python_template_version="3.5"> + <description> + Builds an index to be used by `raptor search`. + </description> + <requirements> + <requirement type="package" version="2.0.0">raptor</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + echo "" > files.txt; + #for $i, $s in enumerate($inputs) + ln -s '${s}' genome${i}.fasta; + echo genome${i}.fasta >> files.txt; + #end for + + raptor build files.text + #if $window$ --window ${window} #end if# + --threads ${threads} + --parts ${parts} + --kmer ${kmer} + --size ${size} + --hash ${hash} + ${compressed} + ${compute_minimiser} + ${disable_cutoffs} + --output output.index; + ]]></command> + <inputs> + <param name="inputs" label="Files representing a bin each" type="data" format="fasta" multiple="true" /> + <param name="threads" label="Threads" type="integer" value="1" min="1" help="The numer of threads to use. Default: 1. Value must be a positive integer. " /> + <param name="parts" label="Parts" type="integer" value="1" min="1" help="Splits the index in this many parts. Default: 1. Value must be a power of two." /> + <param name="window" label="Window size" type="integer" optional="true" min="1" help="The window size. Default: kmer size. Value must be a positive integer." /> + <param name="kmer" label ="Kmer size" type="integer" value="20" min="1" max="32" help="The k-mer size. Default: 20. Value must be in range [1,32]." /> + <param name="size" label="Index size" type="text" value="1k" help="The size in bytes of the resulting index. Default: 1k. Must be an integer followed by [k,m,g,t] (case insensitive)." > + <option value="1k">1k</option> + <option value="1m">1m</option> + <option value="1g">1g</option> + <option value="1t">1t</option> + </param> + <param name="hash" label="Hash functions" type="integer" value="2" min="1" max="5" help="The number of hash functions to use. Default: 2. Value must be in range [1,5]." /> + <param name="compressed" label="Index compression" type="boolean" truevalue="--compressed" falsevalue="" help="Build a compressed index." /> + <param name="compute_minimiser" label="Compute Minimiser" type="boolean" truevalue="--compute-minimiser" falsevalue="" help="Computes minimisers using cutoffs from Mantis (Pandey et al.). Does not create the index." /> + <param name="disable_cutoffs" label="Disable cutoffs" type="boolean" truevalue="--disable-cutoffs" falsevalue="" help="Do not apply cutoffs when using --compute-minimiser." /> + </inputs> + <outputs> + <data name="index" format="binary" from_work_dir="output.index" /> + </outputs> + <tests> + <test> + <param name="inputs" value="genome0.fasta,genome1.fasta" /> + <param name="kmer" value="2" /> + <param name="size" value="1k" /> + <output name="index" file="test_expected.index" /> + </test> + </tests> + + <help><![CDATA[Raptor - A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences. +=========================================================================================================== + +POSITIONAL ARGUMENTS + ARGUMENT-1 (std::filesystem::path) + File containing file names. The file must contain at least one file + path per line, with multiple paths being separated by a whitespace. + Each line in the file corresponds to one bin. Valid extensions for + the paths in the file are [minimiser] when preprocessing, and + [embl,fasta,fa,fna,ffn,faa,frn,fas,fastq,fq,genbank,gb,gbk,sam], + possibly followed by [bz2,gz,bgzf] otherwise. The input file must + exist and read permissions must be granted. + +OPTIONS + + Basic options: + --threads (unsigned 8 bit integer) + The numer of threads to use. Default: 1. Value must be a positive + integer. + --parts (unsigned 8 bit integer) + Splits the index in this many parts. Default: 1. Value must be a + power of two. + --window (unsigned 32 bit integer) + The window size. Default: 20. Value must be a positive integer. + --kmer (unsigned 8 bit integer) + The k-mer size. Default: 20. Value must be in range [1,32]. + --output (std::filesystem::path) + Provide an output filepath or an output directory if + --compute-minimiser is used. + --size (std::string) + The size in bytes of the resulting index. Default: 1k. Must be an + integer followed by [k,m,g,t] (case insensitive). + --hash (unsigned 64 bit integer) + The number of hash functions to use. Default: 2. Value must be in + range [1,5]. + --compressed + Build a compressed index. + --compute-minimiser + Computes minimisers using cutoffs from Mantis (Pandey et al.). Does + not create the index. + --disable-cutoffs + Do not apply cutoffs when using --compute-minimiser. + +EXAMPLES + raptor build --kmer 19 --window 23 --size 8m --output raptor.index + all_bin_paths.txt + + raptor build --kmer 19 --window 23 --compute-minimiser --output + precomputed_minimisers all_bin_paths.txt + + raptor build --size 8m --output minimiser_raptor.index + all_minimiser_paths.txt + +VERSION + Last update: 2021-08-26 + Raptor version: 2.0.0 (raptor-v2.0.0) + SeqAn version: 3.1.0-rc.2 + +URL + https://github.com/seqan/raptor + +LEGAL + Raptor Copyright: BSD 3-Clause License + Author: Enrico Seiler + Contact: enrico.seiler@fu-berlin.de + SeqAn Copyright: 2006-2021 Knut Reinert, FU-Berlin; released under the + 3-clause BSDL. + In your academic works please cite: Raptor: A fast and space-efficient + pre-filter for querying very large collections of nucleotide sequences; + Enrico Seiler, Svenja Mehringer, Mitra Darvish, Etienne Turc, and Knut + Reinert; iScience 2021 24 (7): 102782. doi: + https://doi.org/10.1016/j.isci.2021.102782 + For full copyright and/or warranty information see --copyright. + ]]></help> + <citations> + <citation type="bibtex"> +@Article{Seiler2021, +author={Seiler, Enrico +and Mehringer, Svenja +and Darvish, Mitra +and Turc, Etienne +and Reinert, Knut}, +title={Raptor: A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences}, +journal={iScience}, +year={2021}, +month={Jul}, +day={23}, +publisher={Elsevier}, +volume={24}, +number={7}, +issn={2589-0042}, +doi={10.1016/j.isci.2021.102782}, +url={https://doi.org/10.1016/j.isci.2021.102782} +} +</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/raptor-search.xml Mon Oct 04 09:54:45 2021 +0000 @@ -0,0 +1,103 @@ +<tool id="raptor-search" name="raptor search" version="2.0.0+8" python_template_version="3.5"> + <description> + Queries an index to determine which fasta file the query can be found in. + </description> + <requirements> + <requirement type="package" version="2.0.0">raptor</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + ln -s '${query}' query.fasta; + raptor search + --index '${index}' + --query query.fasta + --output results.txt + #if $threshold# --threshold ${threshold} #end if# + #if $tau# --tau ${tau} #end if# + #if $pattern# --pattern ${pattern} #end if# + ]]></command> + <inputs> + <param name="threads" label="Threads" type="integer" value="1" min="1" help="The numer of threads to use. Default: 1. Value must be a positive integer." /> + <param name="index" label="Index" type="data" format="binary" help="Provide a valid path to an index. Parts: Without suffix _0 " /> + <param name="query" label="Query" type="data" format="fasta" help="Provide a path to the query file. The input file must exist and read permissions must be granted." /> + <param name="error" label="Allowed errors" type="integer" value="0" min="0" help="The number of errors Default: 0. Value must be a positive integer or 0." /> + <param name="tau" label="Probabilistic threshold τ" type="float" value="0.99" min="0" max="1" help="Threshold for probabilistic models. Default: 0.99. Value must be in range [0,1]." /> + <param name="threshold" label="Non-Probabilistic threshold" type="float" value="0" min="0" max="1" optional="true" help="If set, this threshold is used instead of the probabilistic models. Default: 0. Value must be in range [0,1]." /> + <param name="pattern" label="Pattern size" type="integer" optional="true" help="The pattern size. Default: Use median of sequence lengths in query file. Default: 0." /> + </inputs> + <outputs> + <data name="results" format="txt" from_work_dir="results.txt" /> + </outputs> + <tests> + <test> + <param name="index" value="test_expected.index" /> + <param name="query" value="query.fasta" /> + <output name="results" file="expected_query_results.txt" /> + </test> + </tests> + <help><![CDATA[Raptor - A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences. +=========================================================================================================== + +OPTIONS + + Basic options: + --threads (unsigned 8 bit integer) + The numer of threads to use. Default: 1. Value must be a positive integer. + --index (std::filesystem::path) + Provide a valid path to an index. Parts: Without suffix _0 + --query (std::filesystem::path) + Provide a path to the query file. The input file must exist and read permissions must be granted. + --output (std::filesystem::path) + Provide a path to the output. + --error (unsigned 8 bit integer) + The number of errors Default: 0. Value must be a positive integer or 0. + --tau (double) + Threshold for probabilistic models. Default: 0.99. Value must be in range [0,1]. + --threshold (double) + If set, this threshold is used instead of the probabilistic models. Default: 0. Value must be in range + [0,1]. + --pattern (unsigned 64 bit integer) + The pattern size. Default: Use median of sequence lengths in query file. Default: 0. + +EXAMPLES + raptor search --error 2 --index raptor.index --query queries.fastq --output search.output + +VERSION + Last update: 2021-08-26 + Raptor version: 2.0.0 (raptor-v2.0.0) + SeqAn version: 3.1.0-rc.2 + +URL + https://github.com/seqan/raptor + +LEGAL + Raptor Copyright: BSD 3-Clause License + Author: Enrico Seiler + Contact: enrico.seiler@fu-berlin.de + SeqAn Copyright: 2006-2021 Knut Reinert, FU-Berlin; released under the 3-clause BSDL. + In your academic works please cite: Raptor: A fast and space-efficient pre-filter for querying very large + collections of nucleotide sequences; Enrico Seiler, Svenja Mehringer, Mitra Darvish, Etienne Turc, and Knut + Reinert; iScience 2021 24 (7): 102782. doi: https://doi.org/10.1016/j.isci.2021.102782 + For full copyright and/or warranty information see --copyright. + ]]></help> + <citations> + <citation type="bibtex"> +@Article{Seiler2021, +author={Seiler, Enrico +and Mehringer, Svenja +and Darvish, Mitra +and Turc, Etienne +and Reinert, Knut}, +title={Raptor: A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences}, +journal={iScience}, +year={2021}, +month={Jul}, +day={23}, +publisher={Elsevier}, +volume={24}, +number={7}, +issn={2589-0042}, +doi={10.1016/j.isci.2021.102782}, +url={https://doi.org/10.1016/j.isci.2021.102782} +}</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expected_query_results.txt Mon Oct 04 09:54:45 2021 +0000 @@ -0,0 +1,5 @@ +#0 genome0.fasta +#1 genome1.fasta +#QUERY_NAME USER_BINS +query1 +query2 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome0.fasta Mon Oct 04 09:54:45 2021 +0000 @@ -0,0 +1,2 @@ +>Charmander +ACGTACGT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome1.fasta Mon Oct 04 09:54:45 2021 +0000 @@ -0,0 +1,2 @@ +>Caterpie +AACCGGTT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_files.txt Mon Oct 04 09:54:45 2021 +0000 @@ -0,0 +1,2 @@ +genome0.fasta +genome1.fasta