Mercurial > repos > iuc > nugen_nudup
view nugen_nudup.xml @ 0:0ad51e73587e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nugen_nudup commit 9f2d2e8d94050274a4eaae7fa1e48887fed657d4
author | iuc |
---|---|
date | Fri, 02 Dec 2016 18:03:47 -0500 |
parents | |
children | 24693e595caf |
line wrap: on
line source
<tool id="nugen_nudup" name="NuDUP" version="2.2_post2016104"> <description>mark/remove PCR duplicates based on molecular tags</description> <requirements> <requirement type="package" version="2.2_post2016104">nudup</requirement> </requirements> <stdio> <exit_code range="1:" /> </stdio> <version_command>nudup.py --version</version_command> <command><![CDATA[ ln -f -s '$input' 'input.bam' && ln -f -s '$input.metadata.bam_index' 'input.bai' && nudup.py $paired_end -f '$umi_fastq' --start $start --length $length 'input.bam' ]]> </command> <inputs> <param type="data" name="input" label="Input SAM/BAM file" format="sam,bam" help="Input SAM/BAM containing only unique alignments" /> <param type="data" name="umi_fastq" label="Fastq file containing molecular tag sequence" format="fastq,fastqsanger" help="FASTQ file containing the molecular tag sequence for each read name in the corresponding SAM/BAM file" /> <param type="boolean" argument="--paired-end" label="Paired-end deduping" name="paired_end" truevalue="--paired-end" falsevalue="" checked="false" help="use paired end deduping with template. SAM/BAM alignment must contain paired end reads. Degenerate read pairs (alignments for one read of pair) will be discarded." /> <param type="integer" argument="--start" label="Tag sequence start position from 3' end" value="6" help="position in index read where molecular tag sequence begins. This should be a 1-based value that counts in from the 3' END of the read." /> <param type="integer" argument="--length" label="Tag sequence length" value="6" help="length of molecular tag sequence" /> </inputs> <outputs> <data format="bam" name="markdup" from_work_dir="prefix.sorted.markdup.bam" /> <data format="bam" name="dedup" from_work_dir="prefix.sorted.dedup.bam" /> <data format="txt" name="log" from_work_dir="prefix_dup_log.txt" /> </outputs> <tests> <test> <param name="input" value="nudup_test_1.bam" ftype="bam" /> <param name="umi_fastq" value="nudup_umis.fastq" ftype="fastqsanger" /> <param name="start" value="8" /> <param name="length" value="8" /> <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" /> <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" /> <output name="log" file="nudup_log_1.txt" ftype="txt" /> </test> </tests> <help><![CDATA[ Marks/removes PCR introduced duplicate molecules based on the molecular tagging technology used in NuGEN products. For SINGLE END reads, duplicates are marked if they fulfill the following criteria: a) start at the same genomic coordinate b) have the same strand orientation c) have the same molecular tag sequence. The read with the highest mapping quality is kept as the non-duplicate read. For PAIRED END reads, duplicates are marked if they fulfill the following criteria: a) start at the same genomic coordinate b) have the same template length c) have the same molecular tag sequence. The read pair with the highest mapping quality is kept as the non-duplicate read. Author: Anand Patel Contact: NuGEN Technologies Inc., techserv@nugen.com :: Input: IN.sam|IN.bam input sorted/unsorted SAM/BAM containing only unique alignments (sorted required for case 2 detailed above) Options: -2, --paired-end use paired end deduping with template. SAM/BAM alignment must contain paired end reads. Degenerate read pairs (alignments for one read of pair) will be discarded. -f INDEX.fq|READ.fq FASTQ file containing the molecular tag sequence for each read name in the corresponding SAM/BAM file (required only for CASE 1 detailed above) -o OUT_PREFIX, --out OUT_PREFIX prefix of output file paths for sorted BAMs (default will create prefix.sorted.markdup.bam, prefix.sorted.dedup.bam, prefix_dup_log.txt) -s START, --start START position in index read where molecular tag sequence begins. This should be a 1-based value that counts in from the 3' END of the read. (default = 6) -l LENGTH, --length LENGTH length of molecular tag sequence (default = 6) -v, --version show program's version number and exit -h, --help show this help message and exit ]]></help> <citations> <citation type="bibtex">@misc{Patel2016, author = {Patel, Anand}, title = {NuDUP}, year = {2016}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/nugentechnologies/nudup}}, commit = {740d9fe439dd8917605a56483a8796b377eb24c6} } </citation> </citations> </tool>