view snippy.xml @ 14:08d22d220254 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snippy commit bc5bbb26c53bf2455043cf4b8f552571e6f74530
author iuc
date Mon, 20 Jan 2025 10:10:04 +0000
parents fa016f1eea1a
children
line wrap: on
line source

<tool id="snippy" name="snippy" version="@WRAPPER_VERSION@+galaxy@VERSION_SUFFIX@">
  <description>
      Snippy finds SNPs between a haploid reference genome and your NGS sequence reads.
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="version_command" />

    <command detect_errors="exit_code"><![CDATA[

        @REFERENCE_SOURCE_FILE@

        #import re
        #if str( $fastq_input.fastq_input_selector ) == "paired"
            #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input1.element_identifier)
        #elif str( $fastq_input.fastq_input_selector ) == "paired_collection"
            #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input.name)
        #elif str( $fastq_input.fastq_input_selector ) == "single"
            #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input_single.element_identifier)
        #elif str( $fastq_input.fastq_input_selector ) == "paired_iv"
            #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input_interleaved.element_identifier)
        #elif str( $fastq_input.fastq_input_selector ) == "contigs"
            #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fasta_input.element_identifier)
        #end if

        snippy
            --outdir '${dir_name}'
            --cpus \${GALAXY_SLOTS:-1}
            --ram \$((\${GALAXY_MEMORY_MB:-4096}/1024))
            @REFERENCE_COMMAND@
            --mapqual $adv.mapqual
            --mincov $adv.mincov
            --minfrac $adv.minfrac
            --minqual $adv.minqual
            #if $adv.rgid
                --rgid '$adv.rgid'
            #end if
            #if $adv.bwaopt
                --bwaopt '$adv.bwaopt'
            #end if

            #if str( $fastq_input.fastq_input_selector ) == "paired"
                --R1 '$fastq_input.fastq_input1'
                --R2 '$fastq_input.fastq_input2'
            #elif str( $fastq_input.fastq_input_selector ) == "paired_collection"
                --R1 '$fastq_input.fastq_input.forward'
                --R2 '$fastq_input.fastq_input.reverse'
            #elif str( $fastq_input.fastq_input_selector ) == "single"
                --se '$fastq_input.fastq_input_single'
            #elif str( $fastq_input.fastq_input_selector ) == "paired_iv"
                --peil '$fastq_input.fastq_input_interleaved'
            #elif str( $fastq_input.fastq_input_selector ) == "contigs"
                --ctgs '$fastq_input.fasta_input'
            #end if

        #if "outcon" in str($outputs) and $adv.rename_cons
          && sed -i 's/>.*/>${dir_name}/' '${dir_name}/snps.consensus.fa'
        #end if

        && cp -r '${dir_name}' out
        && tar -czf out.tgz '${dir_name}'
    ]]>    </command>

    <inputs>
        <expand macro="reference_selector" />
        <conditional name="fastq_input">
            <param name="fastq_input_selector" type="select" label="Input type" help="Select paired/single end raw read data or assembled contigs">
                <option value="paired">Paired end reads</option>
                <option value="single">Single end reads</option>
                <option value="paired_collection">Paired end reads in a collection</option>
                <option value="paired_iv">Interleaved paired end reads</option>
                <option value="contigs">Assembled Contigs</option>
            </param>
            <when value="paired">
                <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
                <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
            </when>
            <when value="single">
                <param name="fastq_input_single" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
            </when>
            <when value="paired_collection">
                <param name="fastq_input" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
            </when>
            <when value="paired_iv">
                <param name="fastq_input_interleaved" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with interleaved reads"/>
            </when>
            <when value="contigs">
                <param name="fasta_input" type="data" format="fasta" label="Select fasta dataset" help="Specify dataset with assembled contigs"/>
            </when>

        </conditional>

        <section name="adv" title="Advanced parameters" expanded="false">
            <param name="mapqual" type="integer" value="60" label="Minimum mapping quality" help="Minimum mapping quality to allow" />
            <param name="mincov" type="integer" value="10" label="Minimum coverage" help="Minimum coverage to call a snp" />
            <param name="minfrac" type="float" value="0.9" label="Minumum proportion for variant evidence" help="Minumum proportion for variant evidence" />
            <param name="minqual" type="float" value="100.0" label="Minumum QUALITY in VCF column 6" help="Minumum QUALITY in VCF column 6" />
            <param name="rgid" type="text" value="" label="Bam header @RG ID" help="Use this @RG ID: in the BAM header" />
            <param name="bwaopt" type="text" value="" label="Extra BWA MEM options" help="Extra BWA MEM options, eg. -x pacbio" />
            <param name="rename_cons" type="boolean" truevalue="rename_cons" falsevalue="" help="When producing an output of the reference genome with variants instantiated, edit the header so that it is named after the input VCF" />
        </section>

        <param name="outputs" type="select" multiple="true" display="checkboxes" label="Output selection">
            <option value="outvcf" selected="True">The final annotated variants in VCF format</option>
            <option value="outgff" selected="False">The variants in GFF3 format</option>
            <option value="outtab" selected="True">A simple tab-separated summary of all the variants</option>
            <option value="outsum" selected="False">A summary of the samples and mapping</option>
            <option value="outlog" selected="False">A log file with the commands run and their outputs</option>
            <option value="outaln" selected="False">A version of the reference but with - at position with depth=0 and N for 0 to depth to --mincov (does not have variants)</option>
            <option value="outcon" selected="False">A version of the reference genome with all variants instantiated</option>
            <option value="outbam" selected="False">The alignments in BAM format. Note that multi-mapping and unmapped reads are not present.</option>
            <option value="outzip" selected="True">Zipped files needed for input into snippy-core</option>
        </param>

    </inputs>

    <outputs>

        <data format="vcf" name="snpvcf" label="${tool.name} on ${on_string} snps vcf file" from_work_dir="out/snps.vcf">
            <filter>outputs and 'outvcf' in outputs</filter>
        </data>
        <data format="gff3" name="snpgff" label="${tool.name} on ${on_string} snps gff file" from_work_dir="out/snps.gff">
            <filter>outputs and 'outgff' in outputs</filter>
        </data>
        <data format="tabular" name="snptab" label="${tool.name} on ${on_string} snps table" from_work_dir="out/snps.tab">
            <filter>outputs and 'outtab' in outputs</filter>
        </data>
        <data format="tabular" name="snpsum" label="${tool.name} on ${on_string} snps summary" from_work_dir="out/snps.txt">
            <filter>outputs and 'outsum' in outputs</filter>
        </data>
        <data format="txt" name="snplog" label="${tool.name} on ${on_string} log file" from_work_dir="out/snps.log">
            <filter>outputs and 'outlog' in outputs</filter>
        </data>
        <data format="fasta" name="snpalign" label="${tool.name} on ${on_string} aligned fasta" from_work_dir="out/snps.aligned.fa">
            <filter>outputs and 'outaln' in outputs</filter>
        </data>
        <data format="fasta" name="snpconsensus" label="${tool.name} on ${on_string} consensus fasta" from_work_dir="out/snps.consensus.fa">
            <filter>outputs and 'outcon' in outputs</filter>
        </data>
        <data format="bam" name="snpsbam" label="${tool.name} on ${on_string} mapped reads (bam)" from_work_dir="out/snps.bam">
            <filter>outputs and 'outbam' in outputs</filter>
        </data>
        <data format="zip" name="outdir" label="${tool.name} on ${on_string} dir for snippy core" from_work_dir="out.tgz">
            <filter>outputs and 'outzip' in outputs</filter>
        </data>

    </outputs>

    <tests>

        <test>            <!-- test 0 - fasta ref no snps -->
            <!-- <param name="ref" value="reference.fasta" ftype="fasta" /> -->
            <conditional name="reference_source">
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
            </conditional>
            <param name="fastq_input_selector" value="paired" />
            <param name="fastq_input1" ftype="fastqsanger.gz" value="a_1.fastq.gz" />
            <param name="fastq_input2" ftype="fastqsanger.gz" value="a_2.fastq.gz" />
            <param name="mincov" value="2" />
            <param name="minqual" value="60" />
            <param name="outputs" value="outgff,outsum" />
            <output name="snpsum" ftype="tabular" file="a_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="8" />
            <output name="snpgff" ftype="gff3" file="a_fna_ref_mincov_2_minqual_60.snps.gff" lines_diff="2"/>
        </test>

        <test>            <!-- test 1 - fasta ref one snp -->
            <conditional name="reference_source">
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
            </conditional>
            <param name="fastq_input_selector" value="paired" />
            <param name="fastq_input1" ftype="fastqsanger" value="b_1.fastq" />
            <param name="fastq_input2" ftype="fastqsanger" value="b_2.fastq" />
            <param name="mincov" value="2" />
            <param name="minqual" value="60" />
            <param name="outputs" value="outgff,outsum" />
            <output name="snpsum" ftype="tabular" file="b_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="8" />
            <output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" lines_diff="2"/>
        </test>

        <test>            <!-- test 2 - fasta ref one snp paired_collection -->
            <conditional name="reference_source">
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
            </conditional>
            <param name="fastq_input_selector" value="paired_collection" />
            <param name="fastq_input">
                <collection type="paired">
                    <element name="forward" ftype="fastqsanger" value="b_1.fastq" />
                    <element name="reverse" ftype="fastqsanger" value="b_2.fastq" />
                </collection>
            </param>
            <param name="mincov" value="2" />
            <param name="minqual" value="60" />
            <param name="outputs" value="outgff,outsum" />
            <output name="snpsum" ftype="tabular" file="b_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="8" />
            <output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" lines_diff="2"/>
        </test>

        <test>            <!-- test 3 - fasta ref one snp single -->
            <conditional name="reference_source">
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
            </conditional>
            <param name="fastq_input_selector" value="single" />
            <param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" />
            <param name="mincov" value="2" />
            <param name="minqual" value="60" />
            <param name="outputs" value="outgff,outsum" />
            <output name="snpsum" ftype="tabular" file="b_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="8" />
            <output name="snpgff" ftype="gff3" file="b_2_fna_ref_mincov_2_minqual_60.snps.gff" lines_diff="2"/>
        </test>

        <test>            <!-- test 4 - reference source as cached -->
            <conditional name="reference_source">
                <param name="reference_source_selector" value="cached"/>
                <param name="ref_file" value="test_id"/>
            </conditional>
            <param name="fastq_input_selector" value="single" />
            <param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" />
            <param name="mincov" value="2" />
            <param name="minqual" value="60" />
            <param name="outputs" value="outgff,outsum" />
            <output name="snpsum" ftype="tabular" file="b_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="8" />
            <output name="snpgff" ftype="gff3" file="b_2_fna_ref_mincov_2_minqual_60.snps.gff" lines_diff="2"/>
        </test>

        <test>            <!-- test 5 - fasta ref one snp single -->
            <conditional name="reference_source">
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
            </conditional>
            <conditional name="fastq_input">
                <param name="fastq_input_selector" value="contigs" />
                <param name="fasta_input" value="contigs.fasta" ftype="fasta" />
            </conditional>
            <param name="mincov" value="2" />
            <param name="minqual" value="60" />
            <param name="outputs" value="outgff,outsum" />
            <output name="snpsum" ftype="tabular" file="d_fna_ref_mincov_1_minqual_60.snps.txt" lines_diff="6" />
            <output name="snpgff" ftype="gff3" file="d_fna_ref_mincov_1_minqual_60.snps.gff" lines_diff="2"/>
        </test>
    </tests>


    <help><![CDATA[

**Snippy @VERSION@**

    Snippy finds SNPs between a haploid reference genome and your NGS sequence reads. It will find both substitutions (snps) and insertions/deletions (indels).

**Author**

    Torsten Seemann

**Inputs**

    - NGS Reads in fastq format (single or paired end)
    - Reference file in either fasta or genbank format

If the reference file is supplied in genbank format, snpeff will be called to determine the effect of any snps found.

**Advanced options**

    - mapping quality - Integer -  Minimum mapping quality to allow (default '60')

    - minimum coverage - Integer - Minimum coverage of variant site (default '10')

    - minimum fraction - Float - Minumum proportion for variant evidence (default '0.9')

    - minimum quality - Float - Minumum QUALITY in VCF column 6 (default '100.0')

    - rgid - String -         Use this @RG ID: in the BAM header (default '')

    - bwaopt - Extra BWA MEM options, eg. -x pacbio (default '')

**Further information**

    For a much more in depth description of snippy and how it works, see https://github.com/tseemann/snippy

    ]]>    </help>
    <expand macro="citations"/>

</tool>