Mercurial > repos > iuc > nugen_nudup
view nugen_nudup.xml @ 2:57a00c4e43ec draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nugen_nudup commit c3b2bf5ee64db2e334711d6f012190f9f7b4ea28
author | iuc |
---|---|
date | Fri, 03 Mar 2017 19:35:28 -0500 |
parents | 24693e595caf |
children | 2bad02c1cb0d |
line wrap: on
line source
<tool id="nugen_nudup" name="NuDUP" version="2.3.2" profile="17.01"> <description> mark/remove PCR duplicates based on molecular tags </description> <requirements> <requirement type="package" version="2.3.2">nudup</requirement> </requirements> <stdio> <exit_code range="1:" /> </stdio> <version_command>nudup.py --version</version_command> <command><![CDATA[ ln -f -s '$input' 'input.bam' && ln -f -s '$input.metadata.bam_index' 'input.bai' && mkdir 'tmp' && #if $umi_fastq.is_of_type('fastq.gz','fastqsanger.gz'): #set umi_file = 'umi.fastq.gz' #else: #set umi_file = 'umi.fastq' #end if ln -f -s '$umi_fastq' '$umi_file' && nudup.py -T 'tmp' $paired_end -f '$umi_file' --start $start --length $length $rmdup_only 'input.bam' ]]> </command> <inputs> <param type="data" name="input" label="Input SAM/BAM file" format="sam,bam" help="Input SAM/BAM containing only unique alignments" /> <param type="data" name="umi_fastq" label="Fastq file containing molecular tag sequence" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" help="FASTQ file containing the molecular tag sequence for each read name in the corresponding SAM/BAM file" /> <param type="boolean" argument="--paired-end" label="Paired-end deduping" name="paired_end" truevalue="--paired-end" falsevalue="" checked="false" help="use paired end deduping with template. SAM/BAM alignment must contain paired end reads. Degenerate read pairs (alignments for one read of pair) will be discarded." /> <param type="integer" argument="--start" label="Tag sequence start position from 3' end" value="6" help="position in index read where molecular tag sequence begins. This should be a 1-based value that counts in from the 3' END of the read." /> <param type="integer" argument="--length" label="Tag sequence length" value="6" help="length of molecular tag sequence" /> <param type="boolean" argument="--rmdup-only" name="rmdup_only" label="Only output BAM with duplicates removed" truevalue="--rmdup-only" falsevalue="" checked="false" help="Do not ouput BAM with duplicates marked. Default is to ouput both marked duplicates and removed duplicates BAM files." /> </inputs> <outputs> <data format="bam" name="markdup" metadata_source="input" label="${tool.name} on ${on_string}: MarkDup" from_work_dir="prefix.sorted.markdup.bam"> <filter>not rmdup_only</filter> </data> <data format="bam" name="dedup" metadata_source="input" label="${tool.name} on ${on_string}: DeDup" from_work_dir="prefix.sorted.dedup.bam" /> <data format="txt" name="log" label="${tool.name} on ${on_string}: Log" from_work_dir="prefix_dup_log.txt" /> </outputs> <tests> <test> <param name="input" value="nudup_test_1.bam" ftype="bam" /> <param name="umi_fastq" value="nudup_umis.fastq" ftype="fastqsanger" /> <param name="start" value="8" /> <param name="length" value="8" /> <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" /> <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" /> <output name="log" file="nudup_log_1.txt" ftype="txt" /> </test> <test> <param name="input" value="nudup_test_1.bam" ftype="bam" /> <param name="umi_fastq" value="nudup_umis.fastq.gz" ftype="fastqsanger.gz" /> <param name="start" value="8" /> <param name="length" value="8" /> <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" /> <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" /> <output name="log" file="nudup_log_1.txt" ftype="txt" /> </test> </tests> <help><![CDATA[ Marks/removes PCR introduced duplicate molecules based on the molecular tagging technology used in NuGEN products. For SINGLE END reads, duplicates are marked if they fulfill the following criteria: a) start at the same genomic coordinate b) have the same strand orientation c) have the same molecular tag sequence. The read with the highest mapping quality is kept as the non-duplicate read. For PAIRED END reads, duplicates are marked if they fulfill the following criteria: a) start at the same genomic coordinate b) have the same template length c) have the same molecular tag sequence. The read pair with the highest mapping quality is kept as the non-duplicate read. Author: Anand Patel Contact: NuGEN Technologies Inc., techserv@nugen.com :: Input: IN.sam|IN.bam input sorted/unsorted SAM/BAM containing only unique alignments (sorted required for case 2 detailed above) Options: -2, --paired-end use paired end deduping with template. SAM/BAM alignment must contain paired end reads. Degenerate read pairs (alignments for one read of pair) will be discarded. -f INDEX.fq|READ.fq FASTQ file containing the molecular tag sequence for each read name in the corresponding SAM/BAM file (required only for CASE 1 detailed above) -o OUT_PREFIX, --out OUT_PREFIX prefix of output file paths for sorted BAMs (default will create prefix.sorted.markdup.bam, prefix.sorted.dedup.bam, prefix_dup_log.txt) -s START, --start START position in index read where molecular tag sequence begins. This should be a 1-based value that counts in from the 3' END of the read. (default = 6) -l LENGTH, --length LENGTH length of molecular tag sequence (default = 6) -T TEMP_DIR directory for reading and writing to temporary files and named pipes (default: /tmp) --old-samtools required for compatibility with samtools sort style in samtools versions <=0.1.19 --rmdup-only required for only outputting duplicates removed file -v, --version show program's version number and exit -h, --help show this help message and exit ]]></help> <citations> <citation type="bibtex">@misc{Patel2017, author = {Patel, Anand}, title = {NuDUP}, version = {2.3.2}, year = {2017}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/nugentechnologies/nudup}}, commit = {7a126eb5a4ccc2bacb426c7cf58b351962798093} } </citation> </citations> </tool>