Mercurial > repos > iuc > mash_sketch
changeset 2:91ee99b4f05a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit 57309b08e7a08b6f72982f0d78cb5574616c7b67"
author | iuc |
---|---|
date | Sat, 24 Apr 2021 11:29:14 +0000 |
parents | 77289957d47b |
children | 43f9ca23c132 |
files | macros.xml mash_sketch.xml test-data/ERR024951_seqtk_sample_1000_1.sketch.msh test-data/test_01_mash_sketch.msh test-data/test_02_mash_sketch.msh test-data/test_03_mash_sketch.msh test-data/test_04_mash_sketch.msh test-data/test_05_mash_sketch.msh test-data/test_06_mash_sketch.msh test-data/test_07_mash_sketch.msh test-data/test_assembly.sketch.msh |
diffstat | 11 files changed, 111 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Wed Feb 26 15:48:46 2020 -0500 +++ b/macros.xml Sat Apr 24 11:29:14 2021 +0000 @@ -1,23 +1,19 @@ <macros> - <token name="@TOOL_VERSION@">2.1</token> + <token name="@TOOL_VERSION@">2.3</token> <token name="@INTYPES@"> fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz </token> - + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">mash</requirement> + </requirements> + </xml> <xml name="citations"> <citations> <citation type="doi">10.1186/s13059-016-0997-x</citation> </citations> </xml> - - <xml name="requirements"> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">mash</requirement> - </requirements> - </xml> - <xml name="version_command"> <version_command>mash --version</version_command> </xml> - </macros>
--- a/mash_sketch.xml Wed Feb 26 15:48:46 2020 -0500 +++ b/mash_sketch.xml Sat Apr 24 11:29:14 2021 +0000 @@ -11,22 +11,29 @@ mash sketch -s '${sketch_size}' -k '${kmer_size}' + -w '${prob_threshold}' #if str ( $reads_assembly.reads_assembly_selector ) == "reads" - -m '${reads_assembly.minimum_kmer_copies}' - -r - #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" - '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2' - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection" - '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse' - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "single" - '$reads_assembly.reads_input.reads' - #end if + -m '${reads_assembly.minimum_kmer_copies}' + -r + #if $reads_assembly.target_coverage + -c '${reads_assembly.target_coverage}' + #end if + #if $reads_assembly.genome_size + -g '${reads_assembly.genome_size}' + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" + '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2' + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection" + '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse' + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "single" + '$reads_assembly.reads_input.reads' + #end if #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly" - -p \${GALAXY_SLOTS:-1} - '${assembly}' - ${reads_assembly.individual_sequences} + -p \${GALAXY_SLOTS:-1} + '${assembly}' + ${reads_assembly.individual_sequences} #end if -o 'sketch' ]]></command> @@ -55,29 +62,104 @@ </when> </conditional> <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/> + <param type="integer" name="target_coverage" argument="-c" value="" min="0" max="500" optional="true" label="Target coverage" help="If specified, sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity)"/> + <param type="integer" name="genome_size" argument="-g" value="" min="1000" max="10 0000000000" optional="true" label="Genome size" help="If specified, will be used for p-value calculation instead of an estimated size from k-mer content"/> </when> <when value="assembly"> <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/> - <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences"/> + <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences" help="Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons"/> </when> </conditional> - <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" /> + <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes"/> <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" /> + <param type="float" name="prob_threshold" argument="-w" value="0.01" min="0" max="1" label="Probability threshold for warning about low k-mer size" /> </inputs> <outputs> <data name="sketch" format="msh" from_work_dir="sketch.msh"/> </outputs> <tests> <test> - <param name="reads_assembly_selector" value="reads" /> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - <output name="sketch" file="ERR024951_seqtk_sample_1000_1.sketch.msh" compare="sim_size" /> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + </conditional> + <output name="sketch" file="test_01_mash_sketch.msh" compare="sim_size" /> + </test> + <test> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + <param name="minimum_kmer_copies" value="10"/> + </conditional> + </conditional> + <output name="sketch" file="test_02_mash_sketch.msh" compare="sim_size" /> + </test> + <test> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + <param name="target_coverage" value="1"/> + </conditional> + </conditional> + <output name="sketch" file="test_03_mash_sketch.msh" compare="sim_size" /> </test> <test> - <param name="reads_assembly_selector" value="assembly" /> - <param name="assembly" value="test_assembly.fasta"/> - <output name="sketch" file="test_assembly.sketch.msh" compare="sim_size" /> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + <param name="genome_size" value="1000"/> + </conditional> + </conditional> + <output name="sketch" file="test_04_mash_sketch.msh" compare="sim_size" /> + </test> + <test> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + </conditional> + <param name="sketch_size" value="500"/> + <output name="sketch" file="test_05_mash_sketch.msh" compare="sim_size" /> + </test> + <test> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + </conditional> + <param name="kmer_size" value="17"/> + <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" /> + </test> + <test> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + </conditional> + <param name="prob_threshold" value="0.1"/> + <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" /> + </test> + <test> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="assembly"/> + <param name="assembly" value="test_assembly.fasta"/> + </conditional> + <output name="sketch" file="test_07_mash_sketch.msh" compare="sim_size" /> </test> </tests> <help><![CDATA[