Mercurial > repos > iuc > nugen_nudup
changeset 0:0ad51e73587e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nugen_nudup commit 9f2d2e8d94050274a4eaae7fa1e48887fed657d4
author | iuc |
---|---|
date | Fri, 02 Dec 2016 18:03:47 -0500 |
parents | |
children | 24693e595caf |
files | nugen_nudup.xml test-data/nudup_dedup_1.bam test-data/nudup_log_1.txt test-data/nudup_markdup_1.bam test-data/nudup_test_1.bam test-data/nudup_umis.fastq |
diffstat | 6 files changed, 133 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nugen_nudup.xml Fri Dec 02 18:03:47 2016 -0500 @@ -0,0 +1,115 @@ +<tool id="nugen_nudup" name="NuDUP" version="2.2_post2016104"> + <description>mark/remove PCR duplicates based on molecular tags</description> + <requirements> + <requirement type="package" version="2.2_post2016104">nudup</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <version_command>nudup.py --version</version_command> + <command><![CDATA[ + ln -f -s '$input' 'input.bam' && + ln -f -s '$input.metadata.bam_index' 'input.bai' && + nudup.py $paired_end + -f '$umi_fastq' + --start $start + --length $length + 'input.bam' + ]]> + </command> + <inputs> + <param type="data" name="input" label="Input SAM/BAM file" + format="sam,bam" help="Input SAM/BAM containing only unique + alignments" /> + <param type="data" name="umi_fastq" label="Fastq file containing + molecular tag sequence" format="fastq,fastqsanger" help="FASTQ + file containing the molecular tag sequence for each read name in + the corresponding SAM/BAM file" /> + <param type="boolean" argument="--paired-end" + label="Paired-end deduping" name="paired_end" + truevalue="--paired-end" falsevalue="" + checked="false" + help="use paired end deduping with template. SAM/BAM alignment + must contain paired end reads. Degenerate read pairs + (alignments for one read of pair) will be discarded." /> + <param type="integer" argument="--start" label="Tag sequence start + position from 3' end" value="6" help="position in index read where + molecular tag sequence begins. This should be a 1-based value that + counts in from the 3' END of the read." /> + <param type="integer" argument="--length" label="Tag sequence length" + value="6" help="length of molecular tag sequence" /> + </inputs> + <outputs> + <data format="bam" name="markdup" from_work_dir="prefix.sorted.markdup.bam" /> + <data format="bam" name="dedup" from_work_dir="prefix.sorted.dedup.bam" /> + <data format="txt" name="log" from_work_dir="prefix_dup_log.txt" /> + </outputs> + <tests> + <test> + <param name="input" value="nudup_test_1.bam" ftype="bam" /> + <param name="umi_fastq" value="nudup_umis.fastq" ftype="fastqsanger" /> + <param name="start" value="8" /> + <param name="length" value="8" /> + <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" /> + <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" /> + <output name="log" file="nudup_log_1.txt" ftype="txt" /> + </test> + </tests> + <help><![CDATA[ +Marks/removes PCR introduced duplicate molecules based on the molecular tagging +technology used in NuGEN products. + +For SINGLE END reads, duplicates are marked if they fulfill the following +criteria: a) start at the same genomic coordinate b) have the same strand +orientation c) have the same molecular tag sequence. The read with the +highest mapping quality is kept as the non-duplicate read. + +For PAIRED END reads, duplicates are marked if they fulfill the following +criteria: a) start at the same genomic coordinate b) have the same template +length c) have the same molecular tag sequence. The read pair with the highest +mapping quality is kept as the non-duplicate read. + +Author: Anand Patel + +Contact: NuGEN Technologies Inc., techserv@nugen.com + +:: + + Input: + IN.sam|IN.bam input sorted/unsorted SAM/BAM containing only unique + alignments (sorted required for case 2 detailed above) + + Options: + -2, --paired-end use paired end deduping with template. SAM/BAM + alignment must contain paired end reads. Degenerate + read pairs (alignments for one read of pair) will be + discarded. + -f INDEX.fq|READ.fq FASTQ file containing the molecular tag sequence for + each read name in the corresponding SAM/BAM file + (required only for CASE 1 detailed above) + -o OUT_PREFIX, --out OUT_PREFIX + prefix of output file paths for sorted BAMs (default + will create prefix.sorted.markdup.bam, + prefix.sorted.dedup.bam, prefix_dup_log.txt) + -s START, --start START + position in index read where molecular tag sequence + begins. This should be a 1-based value that counts in + from the 3' END of the read. (default = 6) + -l LENGTH, --length LENGTH + length of molecular tag sequence (default = 6) + -v, --version show program's version number and exit + -h, --help show this help message and exit + ]]></help> + <citations> + <citation type="bibtex">@misc{Patel2016, + author = {Patel, Anand}, + title = {NuDUP}, + year = {2016}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/nugentechnologies/nudup}}, + commit = {740d9fe439dd8917605a56483a8796b377eb24c6} +} + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/nudup_log_1.txt Fri Dec 02 18:03:47 2016 -0500 @@ -0,0 +1,2 @@ +aligned_count unaligned_count position_dup_count frac_position_dup moltag_dup_count frac_moltag_dup +4 0 3 0.7500 1 0.2500
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/nudup_umis.fastq Fri Dec 02 18:03:47 2016 -0500 @@ -0,0 +1,16 @@ +@HWI-M04292:9:000000000-G129D:1:1102:2975:16166 3:N:0: +CTAATACT ++ +BFFFGGFE +@HWI-M04292:9:000000000-G129D:1:1103:25506:5857 3:N:0: +GCCATCGC ++ +FFCFGGGG +@HWI-M04292:9:000000000-G129D:1:1104:4648:15369 3:N:0: +GACCTGGT ++ +FFFFGGGG +@HWI-M04292:9:000000000-G129D:1:2101:15393:25938 3:N:0: +CTAATACT ++ +FFFFGGGG