Mercurial > repos > iuc > nugen_nudup
view nugen_nudup.xml @ 4:0d24a02c555f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nugen_nudup commit 26045b518c15c3f18996f6dc2af7064a9c1941dd
author | iuc |
---|---|
date | Fri, 15 Mar 2024 12:40:38 +0000 |
parents | 2bad02c1cb0d |
children |
line wrap: on
line source
<tool id="nugen_nudup" name="NuDUP" version="2.3.3" profile="17.01"> <description> mark/remove PCR duplicates based on molecular tags </description> <xrefs> <xref type="bio.tools">nudup</xref> </xrefs> <requirements> <requirement type="package" version="2.3.3">nudup</requirement> </requirements> <stdio> <exit_code range="1:" /> </stdio> <version_command>nudup.py --version</version_command> <command><![CDATA[ ln -f -s '$input' 'input.bam' && ln -f -s '$input.metadata.bam_index' 'input.bai' && mkdir 'tmp' && #if $umi_fastq.is_of_type('fastq.gz','fastqsanger.gz'): #set umi_file = 'umi.fastq.gz' #else: #set umi_file = 'umi.fastq' #end if ln -f -s '$umi_fastq' '$umi_file' && nudup.py -T 'tmp' $paired_end -f '$umi_file' --start $start --length $length $rmdup_only 'input.bam' ]]> </command> <inputs> <param type="data" name="input" label="Input SAM/BAM file" format="sam,bam" help="Input SAM/BAM containing only unique alignments" /> <param type="data" name="umi_fastq" label="Fastq file containing molecular tag sequence" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" help="FASTQ file containing the molecular tag sequence for each read name in the corresponding SAM/BAM file" /> <param type="boolean" argument="--paired-end" label="Paired-end deduping" truevalue="--paired-end" falsevalue="" checked="false" help="use paired end deduping with template. SAM/BAM alignment must contain paired end reads. Degenerate read pairs (alignments for one read of pair) will be discarded." /> <param type="integer" argument="--start" label="Tag sequence start position from 3' end" value="6" help="position in index read where molecular tag sequence begins. This should be a 1-based value that counts in from the 3' END of the read." /> <param type="integer" argument="--length" label="Tag sequence length" value="6" help="length of molecular tag sequence" /> <param type="boolean" argument="--rmdup-only" label="Only output BAM with duplicates removed" truevalue="--rmdup-only" falsevalue="" checked="false" help="Do not ouput BAM with duplicates marked. Default is to ouput both marked duplicates and removed duplicates BAM files." /> </inputs> <outputs> <data format="bam" name="markdup" metadata_source="input" label="${tool.name} on ${on_string}: MarkDup" from_work_dir="prefix.sorted.markdup.bam"> <filter>not rmdup_only</filter> </data> <data format="bam" name="dedup" metadata_source="input" label="${tool.name} on ${on_string}: DeDup" from_work_dir="prefix.sorted.dedup.bam" /> <data format="txt" name="log" label="${tool.name} on ${on_string}: Log" from_work_dir="prefix_dup_log.txt" /> </outputs> <tests> <test expect_num_outputs="3"> <param name="input" value="nudup_test_1.bam" ftype="bam" /> <param name="umi_fastq" value="nudup_umis.fastq" ftype="fastqsanger" /> <param name="start" value="8" /> <param name="length" value="8" /> <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" /> <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" /> <output name="log" file="nudup_log_1.txt" ftype="txt" /> </test> <test expect_num_outputs="3"> <param name="input" value="nudup_test_1.bam" ftype="bam" /> <param name="umi_fastq" value="nudup_umis.fastq.gz" ftype="fastqsanger.gz" /> <param name="start" value="8" /> <param name="length" value="8" /> <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" /> <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" /> <output name="log" file="nudup_log_1.txt" ftype="txt" /> </test> </tests> <help><![CDATA[ Marks/removes PCR introduced duplicate molecules based on the molecular tagging technology used in NuGEN products. For SINGLE END reads, duplicates are marked if they fulfill the following criteria: a) start at the same genomic coordinate b) have the same strand orientation c) have the same molecular tag sequence. The read with the highest mapping quality is kept as the non-duplicate read. For PAIRED END reads, duplicates are marked if they fulfill the following criteria: a) start at the same genomic coordinate b) have the same template length c) have the same molecular tag sequence. The read pair with the highest mapping quality is kept as the non-duplicate read. Author: Anand Patel Contact: NuGEN Technologies Inc., techserv@nugen.com :: Input: IN.sam|IN.bam input sorted/unsorted SAM/BAM containing only unique alignments (sorted required for case 2 detailed above) Options: -2, --paired-end use paired end deduping with template. SAM/BAM alignment must contain paired end reads. Degenerate read pairs (alignments for one read of pair) will be discarded. -f INDEX.fq|READ.fq FASTQ file containing the molecular tag sequence for each read name in the corresponding SAM/BAM file (required only for CASE 1 detailed above) -o OUT_PREFIX, --out OUT_PREFIX prefix of output file paths for sorted BAMs (default will create prefix.sorted.markdup.bam, prefix.sorted.dedup.bam, prefix_dup_log.txt) -s START, --start START position in index read where molecular tag sequence begins. This should be a 1-based value that counts in from the 3' END of the read. (default = 6) -l LENGTH, --length LENGTH length of molecular tag sequence (default = 6) -T TEMP_DIR directory for reading and writing to temporary files and named pipes (default: /tmp) --old-samtools required for compatibility with samtools sort style in samtools versions <=0.1.19 --rmdup-only required for only outputting duplicates removed file -v, --version show program's version number and exit -h, --help show this help message and exit ]]></help> <citations> <citation type="bibtex">@misc{Patel2017, author = {Patel, Anand}, title = {NuDUP}, version = {2.3.2}, year = {2017}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/nugentechnologies/nudup}}, commit = {7a126eb5a4ccc2bacb426c7cf58b351962798093} } </citation> </citations> </tool>