Mercurial > repos > iuc > samtools_markdup

<macros>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="@TOOL_VERSION@">samtools</requirement>
            <yield/>
        </requirements>
    </xml>
    <token name="@TOOL_VERSION@">1.13</token>
    <token name="@PROFILE@">20.05</token>
    <token name="@FLAGS@"><![CDATA[
        #set $flags = 0
        #if $filter
            #set $flags = sum(map(int, str($filter).split(',')))
        #end if
    ]]></token>
    <token name="@PREPARE_IDX@"><![CDATA[
        ##prepare input and indices
        ln -s '$input' infile &&
        #if $input.is_of_type('bam'):
            #if str( $input.metadata.bam_index ) != "None":
                ln -s '${input.metadata.bam_index}' infile.bai &&
            #else:
                samtools index infile infile.bai &&
            #end if
        #elif $input.is_of_type('cram'):
            #if str( $input.metadata.cram_index ) != "None":
                ln -s '${input.metadata.cram_index}' infile.crai &&
            #else:
                samtools index infile infile.crai &&
            #end if
        #end if
    ]]></token>
    <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[
        ##prepare input and indices
        #for $i, $bam in enumerate( $input_bams ):
            ln -s '$bam' '${i}' &&
            #if $bam.is_of_type('bam'):
                #if str( $bam.metadata.bam_index ) != "None":
                    ln -s '${bam.metadata.bam_index}' '${i}.bai' &&
                #else:
                    samtools index '${i}' '${i}.bai' &&
                #end if
            #elif $bam.is_of_type('cram'):
                #if str( $bam.metadata.cram_index ) != "None":
                    ln -s '${bam.metadata.cram_index}' '${i}.crai' &&
                #else:
                    samtools index '${i}' '${i}.crai' &&
                #end if
            #end if
        #end for
    ]]></token>
    <token name="@PREPARE_FASTA_IDX@"><![CDATA[
        ## Make the user-selected reference genome, if any, accessible through
        ## a shell variable $reffa, index the reference if necessary, and make
        ## the fai-index file available through a shell variable $reffai.

        ## For a cached genome simply sets the shell variables to point to the
        ## genome file and its precalculated index.
        ## For a genome from the user's history, if that genome is a plain
        ## fasta file, the code creates a symlink in the pwd, creates the fai
        ## index file next to it, then sets the shell variables to point to the
        ## symlink and its index.
        ## For a fasta.gz dataset from the user's history, it tries the same,
        ## but this will only succeed if the file got compressed with bgzip.
        ## For a regular gzipped file samtools faidx will fail, in which case
        ## the code falls back to decompressing to plain fasta before
        ## reattempting the indexing.
        ## Indexing of a bgzipped file produces a regular fai index file *and*
        ## a compressed gzi file. The former is identical to the fai index of
        ## the uncompressed fasta.

        ## If the user has not selected a reference (it's an optional parameter
        ## in some samtools wrappers), a cheetah boolean use_ref is set to
        ## False to encode that fact.

        #set use_ref=True
        #if $addref_cond.addref_select == "history":
            #if $addref_cond.ref.is_of_type('fasta'):
                reffa="reference.fa" &&
                ln -s '${addref_cond.ref}' \$reffa &&
                samtools faidx \$reffa &&
            #else:
                reffa="reference.fa.gz" &&
                ln -s '${addref_cond.ref}' \$reffa &&
                {
                    samtools faidx \$reffa ||
                    {
                        echo "Failed to index compressed reference. Trying decompressed ..." 1>&2 &&
                        gzip -dc \$reffa > reference.fa &&
                        reffa="reference.fa" &&
                        samtools faidx \$reffa;
                    }
                } &&
            #end if
            reffai=\$reffa.fai &&
        #elif $addref_cond.addref_select == "cached":
            ## in case of cached the absolute path is used which allows to read
            ## a cram file  without specifying the reference
            reffa='${addref_cond.ref.fields.path}' &&
            reffai=\$reffa.fai &&
        #else
            #set use_ref=False
        #end if
    ]]></token>

    <xml name="optional_reference">
        <conditional name="addref_cond">
            <param name="addref_select" type="select" label="Use a reference sequence">
                <help>@HELP@</help>
                <option value="no">No</option>
                <option value="history">Use a genome/index from the history</option>
                <option value="cached">Use a built-in genome</option>
            </param>
            <when value="no"/>
            <when value="history">
                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
            </when>
            <when value="cached">
                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
                    <options from_data_table="fasta_indexes">
                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
                    </options>
                    <validator  type="no_options" message="No reference genome is available for the build associated with the selected input dataset"/>
                </param>
            </when>
        </conditional>
    </xml>
    <xml name="mandatory_reference" token_help="" token_argument="">
        <conditional name="addref_cond">
            <param name="addref_select" type="select" label="Use a reference sequence">
                <help>@HELP@</help>
                <option value="history">Use a genome/index from the history</option>
                <option value="cached">Use a built-in genome</option>
            </param>
            <when value="history">
                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
            </when>
            <when value="cached">
                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
                    <options from_data_table="fasta_indexes">
                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
                        <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
                    </options>
                </param>
            </when>
        </conditional>
    </xml>


    <token name="@ADDTHREADS@"><![CDATA[
        ##compute the number of ADDITIONAL threads to be used by samtools (-@)
        addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) &&
    ]]></token>
    <token name="@ADDMEMORY@"><![CDATA[
        ##compute the number of memory available to samtools sort (-m)
        ##use only 75% of available: https://github.com/samtools/samtools/issues/831
        addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
        ((addmemory=addmemory*75/100)) &&
    ]]></token>
    <xml name="seed_input">
       <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
    </xml>
    <xml name="flag_options" token_s1="false" token_s2="false" token_s4="false" token_s8="false" token_s16="false" token_s32="false" token_s64="false" token_s128="false" token_s256="false" token_s512="false" token_s1024="false" token_s2048="false">
        <option value="1" selected="@S1@">Read is paired</option>
        <option value="2" selected="@S2@">Read is mapped in a proper pair</option>
        <option value="4" selected="@S4@">Read is unmapped</option>
        <option value="8" selected="@S8@">Mate is unmapped</option>
        <option value="16" selected="@S16@">Read is mapped to the reverse strand of the reference</option>
        <option value="32" selected="@S32@">Mate is mapped to the reverse strand of the reference</option>
        <option value="64" selected="@S64@">Read is the first in a pair</option>
        <option value="128" selected="@S128@">Read is the second in a pair</option>
        <option value="256" selected="@S256@">Alignment of the read is not primary</option>
        <option value="512" selected="@S512@">Read fails platform/vendor quality checks</option>
        <option value="1024" selected="@S1024@">Read is a PCR or optical duplicate</option>
        <option value="2048" selected="@S2048@">Alignment is supplementary</option>
    </xml>

    <!-- region specification macros and tokens for tools that allow the specification
         of region by bed file / space separated list of regions -->
    <token name="@REGIONS_FILE@"><![CDATA[
        #if $cond_region.select_region == 'tab':
            -t '$cond_region.targetregions'
        #end if
    ]]></token>
    <token name="@REGIONS_MANUAL@"><![CDATA[
        #if $cond_region.select_region == 'text':
            #for $i, $x in enumerate($cond_region.regions_repeat):
               '${x.region}'
            #end for
        #end if
    ]]></token>
    <xml name="regions_macro">
        <conditional name="cond_region">
            <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)">
                <option value="no" selected="True">No</option>
                <option value="text">Manualy specify regions</option>
                <option value="tab">Regions from tabular file</option>
            </param>
            <when value="no"/>
            <when value="text">
                <repeat name="regions_repeat" min="1" default="1" title="Regions">
                    <param name="region" type="text" label="region" help="format chr:from-to">
                        <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator>
                    </param>
                </repeat>
            </when>
            <when value="tab">
                <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" />
            </when>
        </conditional>
    </xml>

    <xml name="citations">
        <citations>
            <citation type="bibtex">
                @misc{SAM_def,
                title={Definition of SAM/BAM format},
                url = {https://samtools.github.io/hts-specs/},}
            </citation>
            <citation type="doi">10.1093/bioinformatics/btp352</citation>
            <citation type="doi">10.1093/bioinformatics/btr076</citation>
            <citation type="doi">10.1093/bioinformatics/btr509</citation>
            <citation type="bibtex">
                @misc{Danecek_et_al,
                Author={Danecek, P., Schiffels, S., Durbin, R.},
                title={Multiallelic calling model in bcftools (-m)},
                url = {http://samtools.github.io/bcftools/call-m.pdf},}
            </citation>
            <citation type="bibtex">
                @misc{Durbin_VCQC,
                Author={Durbin, R.},
                title={Segregation based metric for variant call QC},
                url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}
            </citation>
            <citation type="bibtex">
                @misc{Li_SamMath,
                Author={Li, H.},
                title={Mathematical Notes on SAMtools Algorithms},
                url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}
            </citation>
            <citation type="bibtex">
                @misc{SamTools_github,
                title={SAMTools GitHub page},
                url = {https://github.com/samtools/samtools},}
            </citation>
        </citations>
    </xml>
    <xml name="version_command">
        <version_command><![CDATA[samtools 2>&1 | grep Version]]></version_command>
    </xml>
    <xml name="stdio">
        <stdio>
            <exit_code range="1:" level="fatal" description="Error" />
        </stdio>
    </xml>
</macros>
author	iuc
date	Wed, 22 Jun 2022 07:48:23 +0000
parents	8c440c3002bc
children	b5527cc104ab