view mash_sketch.xml @ 3:43f9ca23c132 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit 0412195e7380e34250a231f6ea4371d309284ff9"
author iuc
date Sat, 24 Apr 2021 13:28:53 +0000
parents 91ee99b4f05a
children
line wrap: on
line source

<tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy0" profile="19.01">
    <description>
        Create a reduced representation of a sequence or set of sequences, based on min-hashes
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="version_command" />
    <command detect_errors="exit_code"><![CDATA[
        mash sketch
            -s '${sketch_size}'
            -k '${kmer_size}'
            -w '${prob_threshold}'
            #if str ( $reads_assembly.reads_assembly_selector ) == "reads"
                -m '${reads_assembly.minimum_kmer_copies}'
                -r
                #if $reads_assembly.target_coverage
                    -c '${reads_assembly.target_coverage}'
                #end if
                #if $reads_assembly.genome_size
                    -g '${reads_assembly.genome_size}'
                #end if
                #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired"
                    '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2'
                #end if
                #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection"
                    '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse'
                #end if
                #if str( $reads_assembly.reads_input.reads_input_selector ) == "single"
                    '$reads_assembly.reads_input.reads'
                #end if
            #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly"
                -p \${GALAXY_SLOTS:-1}
                '${assembly}'
                ${reads_assembly.individual_sequences}
            #end if
            -o 'sketch'
    ]]></command>
    <inputs>
        <conditional name="reads_assembly">
            <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies">
                <option selected="True" value="reads">Reads</option>
                <option value="assembly">Assembly</option>
            </param>
            <when value="reads">
                <conditional name="reads_input">
                    <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
                        <option value="paired">Paired</option>
                        <option value="single">Single</option>
                        <option value="paired_collection">Paired Collection</option>
                    </param>
                    <when value="paired">
                        <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/>
                        <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
                    </when>
                    <when value="single">
                        <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/>
                    </when>
                    <when value="paired_collection">
                        <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
                    </when>
                </conditional>
                <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/>
                <param type="integer" name="target_coverage" argument="-c" value="" min="0" max="500" optional="true" label="Target coverage" help="If specified, sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity)"/>
                <param type="integer" name="genome_size" argument="-g" value="" min="1000" max="100000000000" optional="true" label="Genome size" help="If specified, will be used for p-value calculation instead of an estimated size from k-mer content"/>
            </when>
            <when value="assembly">
                <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/>
                <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences" help="Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons"/>
            </when>
        </conditional>
        <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes"/>
        <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" />
        <param type="float" name="prob_threshold" argument="-w" value="0.01" min="0" max="1" label="Probability threshold for warning about low k-mer size" />   
    </inputs>
    <outputs>
        <data name="sketch" format="msh" from_work_dir="sketch.msh"/>
    </outputs>
    <tests>
        <test>
            <conditional name="reads_assembly">
                <param name="reads_assembly_selector" value="reads"/>
                <conditional name="reads_input">
                    <param name="reads_input_selector" value="single"/>
                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
                </conditional>
            </conditional>
            <output name="sketch" file="test_01_mash_sketch.msh" compare="sim_size" />
        </test>
        <test>
            <conditional name="reads_assembly">
                <param name="reads_assembly_selector" value="reads"/>
                <conditional name="reads_input">
                    <param name="reads_input_selector" value="single"/>
                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
                    <param name="minimum_kmer_copies" value="10"/>
                </conditional>
            </conditional>
            <output name="sketch" file="test_02_mash_sketch.msh" compare="sim_size" />
        </test>
        <test>
            <conditional name="reads_assembly">
                <param name="reads_assembly_selector" value="reads"/>
                <conditional name="reads_input">
                    <param name="reads_input_selector" value="single"/>
                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
                    <param name="target_coverage" value="1"/>
                </conditional>
            </conditional>
            <output name="sketch" file="test_03_mash_sketch.msh" compare="sim_size" />
        </test>
        <test>
            <conditional name="reads_assembly">
                <param name="reads_assembly_selector" value="reads"/>
                <conditional name="reads_input">
                    <param name="reads_input_selector" value="single"/>
                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
                    <param name="genome_size" value="1000"/>
                </conditional>
            </conditional>
            <output name="sketch" file="test_04_mash_sketch.msh" compare="sim_size" />
        </test>
        <test>
            <conditional name="reads_assembly">
                <param name="reads_assembly_selector" value="reads"/>
                <conditional name="reads_input">
                    <param name="reads_input_selector" value="single"/>
                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
                </conditional>
            </conditional>
            <param name="sketch_size" value="500"/>
            <output name="sketch" file="test_05_mash_sketch.msh" compare="sim_size" />
        </test>
        <test>
            <conditional name="reads_assembly">
                <param name="reads_assembly_selector" value="reads"/>
                <conditional name="reads_input">
                    <param name="reads_input_selector" value="single"/>
                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
                </conditional>
            </conditional>
            <param name="kmer_size" value="17"/>
            <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" />
        </test>
        <test>
            <conditional name="reads_assembly">
                <param name="reads_assembly_selector" value="reads"/>
                <conditional name="reads_input">
                    <param name="reads_input_selector" value="single"/>
                    <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
                </conditional>
            </conditional>
            <param name="prob_threshold" value="0.1"/>
            <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" />
        </test>
        <test>
            <conditional name="reads_assembly">
                <param name="reads_assembly_selector" value="assembly"/>
                <param name="assembly" value="test_assembly.fasta"/>
            </conditional>
            <output name="sketch" file="test_07_mash_sketch.msh" compare="sim_size" />
        </test>
    </tests>
    <help><![CDATA[

**What it does**

  Create a sketch file, which is a reduced representation of a sequence or set
  of sequences (based on min-hashes) that can be used for fast distance
  estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can
  be given to read from standard input. Input files can also be files of file
  names (see -l). For output, one sketch file will be generated, but it can have
  multiple sketches within it, divided by sequences or files (see -i). By
  default, the output file name will be the first input file with a '.msh'
  extension, or 'stdin.msh' if standard input is used (see -o).
  ]]></help>
    <expand macro="citations"/>
</tool>