Mercurial > repos > iuc > nugen_nudup

--- a/nugen_nudup.xml	Fri Dec 02 18:03:47 2016 -0500
+++ b/nugen_nudup.xml	Sat Feb 25 01:38:55 2017 -0500
@@ -1,19 +1,31 @@
-<tool id="nugen_nudup" name="NuDUP" version="2.2_post2016104">
-    <description>mark/remove PCR duplicates based on molecular tags</description>
+<tool id="nugen_nudup" name="NuDUP" version="2.3.1" profile="17.01">
+    <description>
+        mark/remove PCR duplicates based on molecular tags
+    </description>
     <requirements>
-        <requirement type="package" version="2.2_post2016104">nudup</requirement>
+        <requirement type="package" version="2.3.1">nudup</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" />
     </stdio>
     <version_command>nudup.py --version</version_command>
     <command><![CDATA[
-         ln -f -s '$input' 'input.bam' &&
-         ln -f -s '$input.metadata.bam_index' 'input.bai' &&
-        nudup.py $paired_end
-        -f '$umi_fastq'
+        ln -f -s '$input' 'input.bam' &&
+        ln -f -s '$input.metadata.bam_index' 'input.bai' &&
+        mkdir 'tmp' &&
+        #if $umi_fastq.is_of_type('fastq.gz','fastqsanger.gz'):
+            #set umi_file = 'umi.fastq.gz'
+        #else:
+            #set umi_file = 'umi.fastq'
+        #end if
+        ln -f -s '$umi_fastq' '$umi_file' &&
+        nudup.py
+        -T \$PWD'/tmp'
+        $paired_end
+        -f '$umi_file'
         --start $start
         --length $length
+        $rmdup_only
         'input.bam'
         ]]>
     </command>
@@ -21,8 +33,9 @@
         <param type="data" name="input" label="Input SAM/BAM file"
             format="sam,bam" help="Input SAM/BAM containing only unique
             alignments" />
-        <param type="data" name="umi_fastq" label="Fastq file containing
-            molecular tag sequence" format="fastq,fastqsanger" help="FASTQ
+        <param type="data" name="umi_fastq"
+            label="Fastq file containing molecular tag sequence"
+            format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" help="FASTQ
             file containing the molecular tag sequence for each read name in
             the corresponding SAM/BAM file" />
         <param type="boolean" argument="--paired-end"
@@ -38,16 +51,40 @@
             counts in from the 3' END of the read." />
         <param type="integer" argument="--length" label="Tag sequence length"
             value="6" help="length of molecular tag sequence" />
+        <param type="boolean" argument="--rmdup-only" name="rmdup_only"
+            label="Only output BAM with duplicates removed"
+            truevalue="--rmdup-only" falsevalue="" checked="false"
+            help="Do not ouput BAM with duplicates marked. Default is to ouput
+            both marked duplicates and removed duplicates BAM files." />
     </inputs>
     <outputs>
-        <data format="bam" name="markdup" from_work_dir="prefix.sorted.markdup.bam" />
-        <data format="bam" name="dedup" from_work_dir="prefix.sorted.dedup.bam" />
-        <data format="txt" name="log" from_work_dir="prefix_dup_log.txt" />
+        <data format="bam" name="markdup" metadata_source="input"
+              label="${tool.name} on ${on_string}: MarkDup"
+              from_work_dir="prefix.sorted.markdup.bam">
+              <filter>not rmdup_only</filter>
+        </data>
+        <data format="bam" name="dedup" metadata_source="input"
+              label="${tool.name} on ${on_string}: DeDup"
+              from_work_dir="prefix.sorted.dedup.bam" />
+        <data format="txt" name="log"
+              label="${tool.name} on ${on_string}: Log"
+              from_work_dir="prefix_dup_log.txt" />
     </outputs>
     <tests>
         <test>
             <param name="input" value="nudup_test_1.bam" ftype="bam" />
-            <param name="umi_fastq" value="nudup_umis.fastq" ftype="fastqsanger" />
+            <param name="umi_fastq" value="nudup_umis.fastq"
+                ftype="fastqsanger" />
+            <param name="start" value="8" />
+            <param name="length" value="8" />
+            <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" />
+            <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" />
+            <output name="log" file="nudup_log_1.txt" ftype="txt" />
+        </test>
+        <test>
+            <param name="input" value="nudup_test_1.bam" ftype="bam" />
+            <param name="umi_fastq" value="nudup_umis.fastq.gz"
+                ftype="fastqsanger.gz" />
             <param name="start" value="8" />
             <param name="length" value="8" />
             <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" />
@@ -97,18 +134,23 @@
                             from the 3' END of the read. (default = 6)
       -l LENGTH, --length LENGTH
                             length of molecular tag sequence (default = 6)
+      -T TEMP_DIR           directory for reading and writing to temporary files
+                            and named pipes (default: /tmp)
+      --old-samtools        required for compatibility with samtools sort style in
+                            samtools versions <=0.1.19
+      --rmdup-only          required for only outputting duplicates removed file
       -v, --version         show program's version number and exit
       -h, --help            show this help message and exit
         ]]></help>
     <citations>
-        <citation type="bibtex">@misc{Patel2016,
+        <citation type="bibtex">@misc{Patel2017,
   author = {Patel, Anand},
   title = {NuDUP},
-  year = {2016},
+  year = {2017},
   publisher = {GitHub},
   journal = {GitHub repository},
   howpublished = {\url{https://github.com/nugentechnologies/nudup}},
-  commit = {740d9fe439dd8917605a56483a8796b377eb24c6}
+  commit = {7451de86680d24f19638ed6ac88f6504f0817753}
 }
         </citation>
     </citations>
Binary file test-data/nudup_umis.fastq.gz has changed