diff mash_sketch.xml @ 2:91ee99b4f05a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit 57309b08e7a08b6f72982f0d78cb5574616c7b67"
author iuc
date Sat, 24 Apr 2021 11:29:14 +0000
parents 68084b06ae83
children 43f9ca23c132
line wrap: on
line diff
--- a/mash_sketch.xml	Wed Feb 26 15:48:46 2020 -0500
+++ b/mash_sketch.xml	Sat Apr 24 11:29:14 2021 +0000
@@ -11,22 +11,29 @@
         mash sketch
             -s '${sketch_size}'
             -k '${kmer_size}'
+            -w '${prob_threshold}'
             #if str ( $reads_assembly.reads_assembly_selector ) == "reads"
-              -m '${reads_assembly.minimum_kmer_copies}'
-              -r
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired"
-                '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2'
-              #end if
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection"
-                '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse'
-              #end if
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "single"
-                '$reads_assembly.reads_input.reads'
-              #end if
+                -m '${reads_assembly.minimum_kmer_copies}'
+                -r
+                #if $reads_assembly.target_coverage
+                    -c '${reads_assembly.target_coverage}'
+                #end if
+                #if $reads_assembly.genome_size
+                    -g '${reads_assembly.genome_size}'
+                #end if
+                #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired"
+                    '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2'
+                #end if
+                #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection"
+                    '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse'
+                #end if
+                #if str( $reads_assembly.reads_input.reads_input_selector ) == "single"
+                    '$reads_assembly.reads_input.reads'
+                #end if
             #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly"
-              -p \${GALAXY_SLOTS:-1}
-              '${assembly}'
-              ${reads_assembly.individual_sequences}
+                -p \${GALAXY_SLOTS:-1}
+                '${assembly}'
+                ${reads_assembly.individual_sequences}
             #end if
             -o 'sketch'
     ]]></command>
@@ -55,29 +62,104 @@
                     </when>
                 </conditional>
                 <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/>
+                <param type="integer" name="target_coverage" argument="-c" value="" min="0" max="500" optional="true" label="Target coverage" help="If specified, sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity)"/>
+                <param type="integer" name="genome_size" argument="-g" value="" min="1000" max="10  0000000000" optional="true" label="Genome size" help="If specified, will be used for p-value calculation instead of an estimated size from k-mer content"/>
             </when>
             <when value="assembly">
                 <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/>
-                <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences"/>
+                <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences" help="Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons"/>
             </when>
         </conditional>
-        <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" />
+        <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes"/>
         <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" />
+        <param type="float" name="prob_threshold" argument="-w" value="0.01" min="0" max="1" label="Probability threshold for warning about low k-mer size" />   
     </inputs>
     <outputs>
         <data name="sketch" format="msh" from_work_dir="sketch.msh"/>
     </outputs>
     <tests>
         <test>
-            <param name="reads_assembly_selector" value="reads" />
-            <param name="reads_input_selector" value="single"/>
-            <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-            <output name="sketch" file="ERR024951_seqtk_sample_1000_1.sketch.msh" compare="sim_size" />
+            <conditional name="reads_assembly">
+                <param name="reads_assembly_selector" value="reads"/>
+                <conditional name="reads_input">
+                    <param name="reads_input_selector" value="single"/>
+                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
+                </conditional>
+            </conditional>
+            <output name="sketch" file="test_01_mash_sketch.msh" compare="sim_size" />
+        </test>
+        <test>
+            <conditional name="reads_assembly">
+                <param name="reads_assembly_selector" value="reads"/>
+                <conditional name="reads_input">
+                    <param name="reads_input_selector" value="single"/>
+                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
+                    <param name="minimum_kmer_copies" value="10"/>
+                </conditional>
+            </conditional>
+            <output name="sketch" file="test_02_mash_sketch.msh" compare="sim_size" />
+        </test>
+        <test>
+            <conditional name="reads_assembly">
+                <param name="reads_assembly_selector" value="reads"/>
+                <conditional name="reads_input">
+                    <param name="reads_input_selector" value="single"/>
+                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
+                    <param name="target_coverage" value="1"/>
+                </conditional>
+            </conditional>
+            <output name="sketch" file="test_03_mash_sketch.msh" compare="sim_size" />
         </test>
         <test>
-            <param name="reads_assembly_selector" value="assembly" />
-            <param name="assembly" value="test_assembly.fasta"/>
-            <output name="sketch" file="test_assembly.sketch.msh" compare="sim_size" />
+            <conditional name="reads_assembly">
+                <param name="reads_assembly_selector" value="reads"/>
+                <conditional name="reads_input">
+                    <param name="reads_input_selector" value="single"/>
+                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
+                    <param name="genome_size" value="1000"/>
+                </conditional>
+            </conditional>
+            <output name="sketch" file="test_04_mash_sketch.msh" compare="sim_size" />
+        </test>
+        <test>
+            <conditional name="reads_assembly">
+                <param name="reads_assembly_selector" value="reads"/>
+                <conditional name="reads_input">
+                    <param name="reads_input_selector" value="single"/>
+                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
+                </conditional>
+            </conditional>
+            <param name="sketch_size" value="500"/>
+            <output name="sketch" file="test_05_mash_sketch.msh" compare="sim_size" />
+        </test>
+        <test>
+            <conditional name="reads_assembly">
+                <param name="reads_assembly_selector" value="reads"/>
+                <conditional name="reads_input">
+                    <param name="reads_input_selector" value="single"/>
+                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
+                </conditional>
+            </conditional>
+            <param name="kmer_size" value="17"/>
+            <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" />
+        </test>
+        <test>
+            <conditional name="reads_assembly">
+                <param name="reads_assembly_selector" value="reads"/>
+                <conditional name="reads_input">
+                    <param name="reads_input_selector" value="single"/>
+                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
+                </conditional>
+            </conditional>
+            <param name="prob_threshold" value="0.1"/>
+            <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" />
+        </test>
+        <test>
+            <conditional name="reads_assembly">
+                <param name="reads_assembly_selector" value="assembly"/>
+                <param name="assembly" value="test_assembly.fasta"/>
+            </conditional>
+            <output name="sketch" file="test_07_mash_sketch.msh" compare="sim_size" />
         </test>
     </tests>
     <help><![CDATA[