diff tools/coverage_stats/coverage_stats.xml @ 2:7254ece0c0ff draft

v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
author peterjc
date Thu, 11 May 2017 12:16:10 -0400
parents d1fdfaae5dbe
children
line wrap: on
line diff
--- a/tools/coverage_stats/coverage_stats.xml	Fri Nov 21 09:43:58 2014 -0500
+++ b/tools/coverage_stats/coverage_stats.xml	Thu May 11 12:16:10 2017 -0400
@@ -1,29 +1,35 @@
-<tool id="coverage_stats" name="BAM coverage statistics" version="0.0.1">
+<tool id="coverage_stats" name="BAM coverage statistics" version="0.0.5">
     <description>using samtools idxstats and depth</description>
     <requirements>
-        <requirement type="binary">samtools</requirement>
-        <requirement type="package" version="0.1.19">samtools</requirement>
+        <requirement type="package" version="1.4.1">samtools</requirement>
     </requirements>
-    <version_command interpreter="python">coverage_stats.py --version</version_command>
-    <command interpreter="python">coverage_stats.py "$input_bam" "${input_bam.metadata.bam_index}" "$out_tabular"</command>
+    <version_command>
+python $__tool_directory__/coverage_stats.py --version
+    </version_command>
+    <command detect_errors="aggressive">
+python $__tool_directory__/coverage_stats.py '$input_bam' '${input_bam.metadata.bam_index}' '$out_tabular' '$max_depth'
+    </command>
     <inputs>
         <param name="input_bam" type="data" format="bam" label="Input BAM file" />
+        <param name="max_depth" type="integer" min="0" max="10000000" label="Max depth" value="8000" />
     </inputs>
     <outputs>
         <data name="out_tabular" format="tabular" label="$input_bam.name (coverage stats)" />
     </outputs>
-    <stdio>
-        <!-- Assume anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <tests>
         <test>
             <param name="input_bam" value="ex1.bam" ftype="bam" />
+	    <param name="max_depth" value="123" />
             <output name="out_tabular" file="ex1.coverage_stats.tabular" ftype="tabular" />
         </test>
         <test>
+            <param name="input_bam" value="ex1.bam" ftype="bam" />
+            <param name="max_depth" value="50" />
+            <output name="out_tabular" file="ex1.coverage_stats.md50.tabular" ftype="tabular" />
+        </test>
+        <test>
             <param name="input_bam" value="coverage_test.bam" ftype="bam" />
+	    <param name="max_depth" value="123" />
             <output name="out_tabular" file="coverage_test.coverage_stats.tabular" ftype="tabular" />
         </test>
     </tests>
@@ -47,8 +53,8 @@
      2 Reference sequence length
      3 Number of mapped reads
      4 Number of placed but unmapped reads (typically unmapped partners of mapped reads)
-     5 Minimum coverage
-     6 Maximum coverage
+     5 Minimum coverage (per base of reference)
+     6 Maximum coverage (per base of reference)
      7 Mean coverage (given to 2 dp)
 ====== =================================================================================
 
@@ -77,9 +83,15 @@
 
 .. class:: warningmark
 
-**Note**. There is an internal hard limit of 8000 for the pileup routine in
-samtools, meaning the reported coverage from ``samtools depth`` will show
-maximum coverage depths *around* 8000.
+**Note**. If using this on a mapping BAM file, beware that the coverage counting is
+done per base of the reference.  This means if your reference has any extra bases
+compared to the reads being mapped, those bases will be skipped by CIGAR D operators
+and these "extra" bases can have an extremely low coverage, giving a potentially
+misleading ``min_cov`` values. A sliding window coverage may be more appropriate.
+
+**Note**. Up until samtools 1.2, there was an internal hard limit of 8000 for the
+pileup routine, meaning the reported coverage from ``samtools depth`` would show
+maximum coverage depths *around* 8000. This is now a run time option.
 
 
 **Citation**