Mercurial > repos > iuc > nugen_nudup

<tool id="nugen_nudup" name="NuDUP" version="2.3.3" profile="17.01">
    <description>
        mark/remove PCR duplicates based on molecular tags
    </description>
    <xrefs>
        <xref type="bio.tools">nudup</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="2.3.3">nudup</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <version_command>nudup.py --version</version_command>
    <command><![CDATA[
        ln -f -s '$input' 'input.bam' &&
        ln -f -s '$input.metadata.bam_index' 'input.bai' &&
        mkdir 'tmp' &&
        #if $umi_fastq.is_of_type('fastq.gz','fastqsanger.gz'):
            #set umi_file = 'umi.fastq.gz'
        #else:
            #set umi_file = 'umi.fastq'
        #end if
        ln -f -s '$umi_fastq' '$umi_file' &&
        nudup.py
        -T 'tmp'
        $paired_end
        -f '$umi_file'
        --start $start
        --length $length
        $rmdup_only
        'input.bam'
        ]]>
    </command>
    <inputs>
        <param type="data" name="input" label="Input SAM/BAM file"
            format="sam,bam" help="Input SAM/BAM containing only unique
            alignments" />
        <param type="data" name="umi_fastq"
            label="Fastq file containing molecular tag sequence"
            format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" help="FASTQ
            file containing the molecular tag sequence for each read name in
            the corresponding SAM/BAM file" />
        <param type="boolean" argument="--paired-end" label="Paired-end deduping"
            truevalue="--paired-end" falsevalue="" checked="false"
            help="use paired end deduping with template. SAM/BAM alignment
            must contain paired end reads. Degenerate read pairs
            (alignments for one read of pair) will be discarded." />
        <param type="integer" argument="--start" label="Tag sequence start
            position from 3' end" value="6" help="position in index read where
            molecular tag sequence begins. This should be a 1-based value that
            counts in from the 3' END of the read." />
        <param type="integer" argument="--length" label="Tag sequence length"
            value="6" help="length of molecular tag sequence" />
        <param type="boolean" argument="--rmdup-only"
            label="Only output BAM with duplicates removed"
            truevalue="--rmdup-only" falsevalue="" checked="false"
            help="Do not ouput BAM with duplicates marked. Default is to ouput
            both marked duplicates and removed duplicates BAM files." />
    </inputs>
    <outputs>
        <data format="bam" name="markdup" metadata_source="input"
              label="${tool.name} on ${on_string}: MarkDup"
              from_work_dir="prefix.sorted.markdup.bam">
              <filter>not rmdup_only</filter>
        </data>
        <data format="bam" name="dedup" metadata_source="input"
              label="${tool.name} on ${on_string}: DeDup"
              from_work_dir="prefix.sorted.dedup.bam" />
        <data format="txt" name="log"
              label="${tool.name} on ${on_string}: Log"
              from_work_dir="prefix_dup_log.txt" />
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="input" value="nudup_test_1.bam" ftype="bam" />
            <param name="umi_fastq" value="nudup_umis.fastq"
                ftype="fastqsanger" />
            <param name="start" value="8" />
            <param name="length" value="8" />
            <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" />
            <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" />
            <output name="log" file="nudup_log_1.txt" ftype="txt" />
        </test>
        <test expect_num_outputs="3">
            <param name="input" value="nudup_test_1.bam" ftype="bam" />
            <param name="umi_fastq" value="nudup_umis.fastq.gz"
                ftype="fastqsanger.gz" />
            <param name="start" value="8" />
            <param name="length" value="8" />
            <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" />
            <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" />
            <output name="log" file="nudup_log_1.txt" ftype="txt" />
        </test>
    </tests>
    <help><![CDATA[
Marks/removes PCR introduced duplicate molecules based on the molecular tagging
technology used in NuGEN products.

For SINGLE END reads, duplicates are marked if they fulfill the following
criteria: a) start at the same genomic coordinate b) have the same strand
orientation c) have the same molecular tag sequence. The read with the
highest mapping quality is kept as the non-duplicate read.

For PAIRED END reads, duplicates are marked if they fulfill the following
criteria: a) start at the same genomic coordinate b) have the same template
length c) have the same molecular tag sequence. The read pair with the highest
mapping quality is kept as the non-duplicate read.

Author: Anand Patel

Contact: NuGEN Technologies Inc., techserv@nugen.com

::

    Input:
      IN.sam|IN.bam         input sorted/unsorted SAM/BAM containing only unique
                            alignments (sorted required for case 2 detailed above)

    Options:
      -2, --paired-end      use paired end deduping with template. SAM/BAM
                            alignment must contain paired end reads. Degenerate
                            read pairs (alignments for one read of pair) will be
                            discarded.
      -f INDEX.fq|READ.fq   FASTQ file containing the molecular tag sequence for
                            each read name in the corresponding SAM/BAM file
                            (required only for CASE 1 detailed above)
      -o OUT_PREFIX, --out OUT_PREFIX
                            prefix of output file paths for sorted BAMs (default
                            will create prefix.sorted.markdup.bam,
                            prefix.sorted.dedup.bam, prefix_dup_log.txt)
      -s START, --start START
                            position in index read where molecular tag sequence
                            begins. This should be a 1-based value that counts in
                            from the 3' END of the read. (default = 6)
      -l LENGTH, --length LENGTH
                            length of molecular tag sequence (default = 6)
      -T TEMP_DIR           directory for reading and writing to temporary files
                            and named pipes (default: /tmp)
      --old-samtools        required for compatibility with samtools sort style in
                            samtools versions <=0.1.19
      --rmdup-only          required for only outputting duplicates removed file
      -v, --version         show program's version number and exit
      -h, --help            show this help message and exit
        ]]></help>
    <citations>
        <citation type="bibtex">@misc{Patel2017,
  author = {Patel, Anand},
  title = {NuDUP},
  version = {2.3.2},
  year = {2017},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/nugentechnologies/nudup}},
  commit = {7a126eb5a4ccc2bacb426c7cf58b351962798093}
}
        </citation>
    </citations>
</tool>
author	iuc
date	Fri, 15 Mar 2024 12:40:38 +0000
parents	2bad02c1cb0d
children