Mercurial > repos > blankenberg > naive_variant_caller

--- a/README.rst	Fri Feb 17 11:42:07 2017 -0500
+++ b/README.rst	Wed Feb 28 15:54:57 2018 -0500
@@ -1,4 +1,4 @@
-This repository contains the **Naive Variant Caller** tool.
+This repository contains the **Naive Variant Caller** tool (NVC).

 ------
--- a/dependency_configs/tool_dependencies.xml	Fri Feb 17 11:42:07 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="numpy" version="1.7.1">
-        <repository changeset_revision="300877695495" name="package_numpy_1_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="pyBamParser" version="0.0.1">
-        <repository changeset_revision="144681ee972c" name="package_pybamparser_0_0_1" owner="blankenberg" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="pyBamTools" version="0.0.2">
-        <repository changeset_revision="6819855ac2e8" name="package_pybamtools_0_0_2" owner="blankenberg" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/naive_variant_caller.xml	Wed Feb 28 15:54:57 2018 -0500
@@ -0,0 +1,232 @@
+<tool id="naive_variant_caller" name="Naive Variant Caller" version="0.0.3">
+  <description> - tabulate variable sites from BAM datasets</description>
+  <requirements>
+    <requirement type="package" version="0.0.3">nvc</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" />
+    <exit_code range=":-1" />
+  </stdio>
+  <version_command>naive_variant_caller.py --version</version_command>
+  <command>naive_variant_caller.py
+      -o "${output_vcf}"
+
+     #for $input_bam in $reference_source.input_bams:
+         -b '${input_bam.input_bam}'
+         -i '${input_bam.input_bam.metadata.bam_index}'
+     #end for
+
+     #if $reference_source.reference_source_selector != "history":
+         -r '${reference_source.ref_file.fields.path}'
+     #elif $reference_source.ref_file:
+         -r '${reference_source.ref_file}'
+     #end if
+
+     #for $region in $regions:
+         --region '${region.chromosome}:${region.start}-${region.end}'
+     #end for
+
+     #for $region_file in $region_files:
+         --regions_filename '${region_file.input_region}'
+         --regions_file_columns '${int($region_file.input_region.metadata.chromCol)-1},${int($region_file.input_region.metadata.startCol)-1},${int($region_file.input_region.metadata.endCol)-1}'
+     #end for
+
+     ${variants_only}
+
+     ${use_strand}
+
+     --ploidy '${$ploidy}'
+
+     --min_support_depth '${min_support_depth}'
+
+     #if str($min_base_quality):
+         --min_base_quality '${min_base_quality}'
+     #end if
+
+     #if str($min_mapping_quality):
+         --min_mapping_quality '${min_mapping_quality}'
+     #end if
+
+     --allow_out_of_bounds_positions
+
+     #if str( $advanced_options.advanced_options_selector ) == "advanced":
+         #if str( $advanced_options.coverage_dtype ) != "guess":
+             --coverage_dtype '${advanced_options.coverage_dtype}'
+         #end if
+         ${advanced_options.safe}
+     #end if
+  </command>
+  <inputs>
+    <conditional name="reference_source">
+      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
+        <option value="cached">Locally cached</option>
+        <option value="history">History</option>
+      </param>
+      <when value="cached">
+        <repeat name="input_bams" title="BAM file" min="1" >
+            <param name="input_bam" type="data" format="bam" label="BAM file">
+              <validator type="unspecified_build" />
+              <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
+            </param>
+        </repeat>
+        <param name="ref_file" type="select" label="Using reference genome" >
+          <options from_data_table="sam_fa_indexes">
+            <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
+          </options>
+          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+        </param>
+      </when>
+      <when value="history"> <!-- FIX ME!!!! -->
+        <repeat name="input_bams" title="BAM file" min="1" >
+            <param name="input_bam" type="data" format="bam" label="BAM file" >
+            </param>
+        </repeat>
+        <param name="ref_file" type="data" format="fasta" label="Using reference file" optional="True" />
+      </when>
+    </conditional>
+
+    <repeat name="regions" title="Restrict to regions" min="0" >
+        <param name="chromosome" type="text" value="" optional="False" label="Chromosome" />
+        <param name="start" type="integer" value="" optional="True" label="Start" help="0-based, closed. (BED style)" />
+        <param name="end" type="integer" value="" optional="True" label="End" help="0-based, open. (BED style)" />
+    </repeat>
+
+    <repeat name="region_files" title="Restrict to regions by file" min="0" >
+        <param name="input_region" type="data" format="interval" label="Genomic Regions" />
+    </repeat>
+
+    <!-- TODO: enhance filtering -->
+    <param name="min_support_depth" type="integer" value="0" min="0" label="Minimum number of reads needed to consider a REF/ALT" />
+    <param name="min_base_quality" type="integer" value="" label="Minimum base quality" optional="True" />
+    <param name="min_mapping_quality" type="integer" value="" label="Minimum mapping quality" optional="True" />
+
+    <param name="ploidy" type="integer" value="2" min="1" label="Ploidy" />
+    <param name="variants_only" type="boolean" truevalue="--variants_only" falsevalue="" checked="False" label="Only write out positions with possible alternate alleles"/>
+
+    <param name="use_strand" type="boolean" truevalue="--use_strand" falsevalue="" checked="False" label="Report counts by strand"/>
+
+    <conditional name="advanced_options">
+        <param name="advanced_options_selector" type="select" label="Show Advanced Options">
+            <option value="basic" selected="True">Hide Advanced Options</option>
+            <option value="advanced">Show Advanced Options</option>
+        </param>
+        <when value="basic">
+            <!-- Do nothing here -->
+        </when>
+        <when value="advanced">
+            <param name="coverage_dtype" type="select" label="Choose the dtype to use for storing coverage information" help="This affects the maximum recorded value for a position, e.g. uint8 would be 255 coverage, but will require the least amount of RAM">
+                <option value="guess" selected="True">Guess</option>
+                <option value="uint8">uint8</option>
+                <option value="uint16">uint16</option>
+                <option value="uint32">uint32</option>
+                <option value="uint64">uint64</option>
+            </param>
+            <param name="safe" type="boolean" truevalue="--safe" falsevalue="" checked="False" label="Be extra safe"/>
+        </when>
+    </conditional>
+
+  </inputs>
+  <outputs>
+    <data format="vcf" name="output_vcf" />
+  </outputs>
+  <tests>
+      <test>
+          <param name="reference_source_selector" value="history" />
+          <param name="input_bam" value="fake_phiX174_reads_1.bam" ftype="bam" />
+          <param name="ref_file" value="phiX174.fasta" ftype="fasta" />
+          <param name="regions" value="0" />
+          <param name="min_support_depth" value="0" />
+          <param name="min_base_quality" value="" />
+          <param name="min_mapping_quality" value="" />
+          <param name="ploidy" value="2" />
+          <param name="variants_only" value="False" />
+          <param name="use_strand" value="False" />
+          <param name="advanced_options_selector" value="advanced" />
+          <param name="coverage_dtype" value="uint8" />
+          <output name="output_vcf" file="fake_phiX174_reads_1_test_out_1.vcf" compare="contains" />
+      </test>
+  </tests>
+  <help>
+**What it does**
+
+This tool is a naive variant caller that processes aligned sequencing reads from the BAM format and produces a VCF file containing per position variant calls. This tool allows multiple BAM files to be provided as input and utilizes read group information to make calls for individual samples.
+
+User configurable options allow filtering reads that do not pass mapping or base quality thresholds and minimum per base read depth; user's can also specify the ploidy and whether to consider each strand separately.
+
+In addition to calling alternate alleles based upon simple ratios of nucleotides at a position, per base nucleotide counts are also provided. A custom tag, NC, is used within the Genotype fields. The NC field is a comma-separated listing of nucleotide counts in the form of &lt;nucleotide&gt;=&lt;count&gt;, where a plus or minus character is prepended to indicate strand, if the strandedness option was specified.
+
+
+------
+
+**Inputs**
+
+Accepts one or more BAM input files and a reference genome from the built-in list or from a FASTA file in your history.
+
+
+**Outputs**
+
+The output is in VCF format.
+
+Example VCF output line, without reporting by strand:
+    ``chrM	16029	.	T	G,A,C	.	.	AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095	GT:AC:AF:NC	0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:A=9,C=5,T=9629,G=15,``
+
+Example VCF output line, when reporting by strand:
+    ``chrM	16029	.	T	G,A,C	.	.	AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095	GT:AC:AF:NC	0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:+T=3972,-A=9,-C=5,-T=5657,-G=15,``
+
+**Options**
+
+Reference Genome:
+
+    Ensure that you have selected the correct reference genome, either from the list of built-in genomes or by selecting the corresponding FASTA file from your history.
+
+Restrict to regions:
+
+    You can specify any number of regions on which you would like to receive results. You can specify just a chromosome name, or a chromosome name and start postion, or a chromosome name and start and end position for the set of desired regions.
+
+Minimum number of reads needed to consider a REF/ALT:
+
+    This value declares the minimum number of reads containing a particular base at each position in order to list and use said allele in genotyping calls. Default is 0.
+
+Minimum base quality:
+
+    The minimum base quality score needed for the position in a read to be used for nucleotide counts and genotyping. Default is no filter.
+
+Minimum mapping quality:
+
+    The minimum mapping quality score needed to consider a read for nucleotide counts and genotyping. Default is no filter.
+
+Ploidy:
+
+    The number of genotype calls to make at each reported position.
+
+Only write out positions with possible alternate alleles:
+
+    When set, only positions which have at least one non-reference nucleotide which passes declare filters will be present in the output.
+
+Report counts by strand:
+
+    When set, nucleotide counts (NC) will be reported in reference to the aligned read's source strand. Reported as: &lt;strand&gt;&lt;BASE&gt;=&lt;COUNT&gt;.
+
+Choose the dtype to use for storing coverage information:
+
+    This controls the maximum depth value for each nucleotide/position/strand (when specified). Smaller values require the least amount of memory, but have smaller maximal limits.
+
+        +--------+----------------------------+
+        | name   | maximum coverage value     |
+        +========+============================+
+        | uint8  | 255                        |
+        +--------+----------------------------+
+        | uint16 | 65,535                     |
+        +--------+----------------------------+
+        | uint32 | 4,294,967,295              |
+        +--------+----------------------------+
+        | uint64 | 18,446,744,073,709,551,615 |
+        +--------+----------------------------+
+
+
+  </help>
+  <citations>
+    <citation type="doi">10.1186/gb4161</citation>
+  </citations>
+
+</tool>
--- a/tool-data/tool_data_table_conf.xml.sample	Fri Feb 17 11:42:07 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-<tables>
-    <!-- Location of SAMTools indexes and other files -->
-    <table name="sam_fa_indexes" comment_char="#">
-        <columns>line_type, value, path</columns>
-        <file path="tool-data/sam_fa_indices.loc" />
-    </table>
-</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Feb 28 15:54:57 2018 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of SAMTools indexes and other files -->
+    <table name="sam_fa_indexes" comment_char="#">
+        <columns>line_type, value, path</columns>
+        <file path="tool-data/sam_fa_indices.loc" />
+    </table>
+</tables>
--- a/tools/naive_variant_caller.py	Fri Feb 17 11:42:07 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-import sys
-import optparse
-
-from pyBamParser.bam import Reader
-from pyBamTools.genotyping.naive import VCFReadGroupGenotyper, PROGRAM_NAME, PROGRAM_VERSION
-
-def main():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-b', '--bam', dest='bam_file', action='append', type="string", default=[], help='BAM filename, optionally index filename. Multiple allowed.' )
-    parser.add_option( '-i', '--index', dest='index_file', action='append', type="string", default=[], help='optionally index filename. Multiple allowed.' )
-    parser.add_option( '-o', '--output_vcf_filename', dest='output_vcf_filename', action='store', default = None, type="string", help='Output VCF filename' )
-    parser.add_option( '-r', '--reference_genome_filename', dest='reference_genome_filename', action='store', default = None, type="string", help='Input reference file' )
-    parser.add_option( '-v', '--variants_only', dest='variants_only', action='store_true', default = False, help='Report only sites with a possible variant allele.' )
-    parser.add_option( '-s', '--use_strand', dest='use_strand', action='store_true', default = False, help='Report counts by strand' )
-    parser.add_option( '-p', '--ploidy', dest='ploidy', action='store', type="int", default=2, help='Ploidy. Default=2.' )
-    parser.add_option( '-d', '--min_support_depth', dest='min_support_depth', action='store', type="int", default=0, help='Minimum number of reads needed to consider a REF/ALT. Default=0.' )
-    parser.add_option( '-q', '--min_base_quality', dest='min_base_quality', action='store', type="int", default=None, help='Minimum base quality.' )
-    parser.add_option( '-m', '--min_mapping_quality', dest='min_mapping_quality', action='store', type="int", default=None, help='Minimum mapping.' )
-    parser.add_option( '-t', '--coverage_dtype', dest='coverage_dtype', action='store', type="string", default=None, help='dtype to use for coverage array' )
-    parser.add_option( '--allow_out_of_bounds_positions', dest='allow_out_of_bounds_positions', action='store_true', default = False, help='Allows out of bounds positions to not throw fatal errors' )
-    parser.add_option( '--safe', dest='safe', action='store_true', default = False, help='Perform checks to prevent certain errors. Is slower.' )
-    parser.add_option( '--region', dest='region', action='append', type="string", default=[], help='region' )
-    parser.add_option( '', '--version', dest='version', action='store_true', default = False, help='Report version and quit' )
-    (options, args) = parser.parse_args()
-
-    if options.version:
-        print "%s version %s" % ( PROGRAM_NAME, PROGRAM_VERSION )
-        sys.exit(0)
-
-    if len( options.bam_file ) == 0:
-        print >>sys.stderr, 'You must provide at least one bam (-b) file.'
-        parser.print_help( sys.stderr )
-        sys.exit( 1 )
-    if options.index_file:
-        assert len( options.index_file ) == len( options.bam_file ), "If you provide a name for an index file, you must provide the index name for all bam files."
-        bam_files = zip( options.bam_file, options.index_file )
-    else:
-        bam_files = [ ( x, ) for x in options.bam_file ]
-    if not options.reference_genome_filename:
-        print >> sys.stderr, "Warning: Reference file has not been specified. Providing a reference genome is highly recommended."
-    if options.output_vcf_filename:
-        out = open( options.output_vcf_filename, 'wb' )
-    else:
-        out = sys.stdout
-
-    regions = []
-    if options.region:
-        for region in options.region:
-            region_split = region.split( ":" )
-            region = region_split.pop( 0 )
-            if region_split:
-                region_split = filter( bool, region_split[0].split( '-' ) )
-                if region_split:
-                    if len( region_split ) != 2:
-                        print >> sys.stderr, "You must specify both a start and an end, or only a chromosome when specifying regions."
-                        cleanup_before_exit( tmp_dir )
-                        sys.exit( 1 )
-                    region = tuple( [ region ] + map( int, region_split ) )
-            regions.append( region )
-
-    coverage = VCFReadGroupGenotyper( map( lambda x: Reader( *x ), bam_files ), options.reference_genome_filename, dtype=options.coverage_dtype,
-                                               min_support_depth=options.min_support_depth, min_base_quality=options.min_base_quality,
-                                               min_mapping_quality=options.min_mapping_quality, restrict_regions=regions, use_strand=options.use_strand,
-                                               allow_out_of_bounds_positions=options.allow_out_of_bounds_positions, safe=options.safe )
-    for line in coverage.iter_vcf( ploidy=options.ploidy, variants_only=options.variants_only ):
-        out.write( "%s\n" % line )
-    out.close()
-
-if __name__ == "__main__": main()
--- a/tools/naive_variant_caller.xml	Fri Feb 17 11:42:07 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,226 +0,0 @@
-<tool id="naive_variant_caller" name="Naive Variant Caller" version="0.0.2">
-  <description> - tabulate variable sites from BAM datasets</description>
-  <requirements>
-    <requirement type="package" version="1.7.1">numpy</requirement>
-    <requirement type="package" version="0.0.1">pyBamParser</requirement>
-    <requirement type="package" version="0.0.2">pyBamTools</requirement>
-  </requirements>
-  <stdio>
-    <exit_code range="1:" err_level="fatal" />
-    <exit_code range=":-1" err_level="fatal" />
-  </stdio>
-  <command interpreter="python">naive_variant_caller.py
-      -o "${output_vcf}"
-
-     #for $input_bam in $reference_source.input_bams:
-         -b "${input_bam.input_bam}"
-         -i "${input_bam.input_bam.metadata.bam_index}"
-     #end for
-
-     #if $reference_source.reference_source_selector != "history":
-         -r "${reference_source.ref_file.fields.path}"
-     #elif $reference_source.ref_file:
-         -r "${reference_source.ref_file}"
-     #end if
-
-     #for $region in $regions:
-         --region "${region.chromosome}:${region.start}-${region.end}"
-     #end for
-
-     ${variants_only}
-
-     ${use_strand}
-
-     --ploidy "${$ploidy}"
-
-     --min_support_depth "${min_support_depth}"
-
-     #if str($min_base_quality):
-         --min_base_quality "${min_base_quality}"
-     #end if
-
-     #if str($min_mapping_quality):
-         --min_mapping_quality "${min_mapping_quality}"
-     #end if
-
-     --allow_out_of_bounds_positions
-
-     #if str( $advanced_options.advanced_options_selector ) == "advanced":
-         #if str( $advanced_options.coverage_dtype ) != "guess":
-             --coverage_dtype "${advanced_options.coverage_dtype}"
-         #end if
-         ${advanced_options.safe}
-     #end if
-  </command>
-  <version_command interpreter="python">naive_variant_caller.py --version</version_command>
-  <inputs>
-    <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
-        <option value="cached">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="cached">
-        <repeat name="input_bams" title="BAM file" min="1" >
-            <param name="input_bam" type="data" format="bam" label="BAM file">
-              <validator type="unspecified_build" />
-              <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
-            </param>
-        </repeat>
-        <param name="ref_file" type="select" label="Using reference genome" >
-          <options from_data_table="sam_fa_indexes">
-            <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
-          </options>
-          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
-        </param>
-      </when>
-      <when value="history"> <!-- FIX ME!!!! -->
-        <repeat name="input_bams" title="BAM file" min="1" >
-            <param name="input_bam" type="data" format="bam" label="BAM file" >
-            </param>
-        </repeat>
-        <param name="ref_file" type="data" format="fasta" label="Using reference file" optional="True" />
-      </when>
-    </conditional>
-
-    <repeat name="regions" title="Restrict to regions" min="0" >
-        <param name="chromosome" type="text" value="" optional="False" label="Chromosome" />
-        <param name="start" type="integer" value="" optional="True" label="Start" />
-        <param name="end" type="integer" value="" optional="True" label="End" />
-    </repeat>
-
-    <!-- TODO: enhance filtering -->
-    <param name="min_support_depth" type="integer" value="0" min="0" label="Minimum number of reads needed to consider a REF/ALT" />
-    <param name="min_base_quality" type="integer" value="" label="Minimum base quality" optional="True" />
-    <param name="min_mapping_quality" type="integer" value="" label="Minimum mapping quality" optional="True" />
-
-    <param name="ploidy" type="integer" value="2" min="1" label="Ploidy" />
-    <param name="variants_only" type="boolean" truevalue="--variants_only" falsevalue="" checked="False" label="Only write out positions with possible alternate alleles"/>
-
-    <param name="use_strand" type="boolean" truevalue="--use_strand" falsevalue="" checked="False" label="Report counts by strand"/>
-
-    <conditional name="advanced_options">
-        <param name="advanced_options_selector" type="select" label="Show Advanced Options">
-            <option value="basic" selected="True">Hide Advanced Options</option>
-            <option value="advanced">Show Advanced Options</option>
-        </param>
-        <when value="basic">
-            <!-- Do nothing here -->
-        </when>
-        <when value="advanced">
-            <param name="coverage_dtype" type="select" label="Choose the dtype to use for storing coverage information" help="This affects the maximum recorded value for a position, e.g. uint8 would be 255 coverage, but will require the least amount of RAM">
-                <option value="guess" selected="True">Guess</option>
-                <option value="uint8">uint8</option>
-                <option value="uint16">uint16</option>
-                <option value="uint32">uint32</option>
-                <option value="uint64">uint64</option>
-            </param>
-            <param name="safe" type="boolean" truevalue="--safe" falsevalue="" checked="False" label="Be extra safe"/>
-        </when>
-    </conditional>
-
-  </inputs>
-  <outputs>
-    <data format="vcf" name="output_vcf" />
-  </outputs>
-  <help>
-**What it does**
-
-This tool is a naive variant caller that processes aligned sequencing reads from the BAM format and produces a VCF file containing per position variant calls. This tool allows multiple BAM files to be provided as input and utilizes read group information to make calls for individual samples.
-
-User configurable options allow filtering reads that do not pass mapping or base quality thresholds and minimum per base read depth; user's can also specify the ploidy and whether to consider each strand separately.
-
-In addition to calling alternate alleles based upon simple ratios of nucleotides at a position, per base nucleotide counts are also provided. A custom tag, NC, is used within the Genotype fields. The NC field is a comma-separated listing of nucleotide counts in the form of &lt;nucleotide&gt;=&lt;count&gt;, where a plus or minus character is prepended to indicate strand, if the strandedness option was specified.
-
-
-------
-
-**Inputs**
-
-Accepts one or more BAM input files and a reference genome from the built-in list or from a FASTA file in your history.
-
-
-**Outputs**
-
-The output is in VCF format.
-
-Example VCF output line, without reporting by strand:
-    ``chrM	16029	.	T	G,A,C	.	.	AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095	GT:AC:AF:NC	0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:A=9,C=5,T=9629,G=15,``
-
-Example VCF output line, when reporting by strand:
-    ``chrM	16029	.	T	G,A,C	.	.	AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095	GT:AC:AF:NC	0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:+T=3972,-A=9,-C=5,-T=5657,-G=15,``
-
-**Options**
-
-Reference Genome:
-
-    Ensure that you have selected the correct reference genome, either from the list of built-in genomes or by selecting the corresponding FASTA file from your history.
-
-Restrict to regions:
-
-    You can specify any number of regions on which you would like to receive results. You can specify just a chromosome name, or a chromosome name and start postion, or a chromosome name and start and end position for the set of desired regions.
-
-Minimum number of reads needed to consider a REF/ALT:
-
-    This value declares the minimum number of reads containing a particular base at each position in order to list and use said allele in genotyping calls. Default is 0.
-
-Minimum base quality:
-
-    The minimum base quality score needed for the position in a read to be used for nucleotide counts and genotyping. Default is no filter.
-
-Minimum mapping quality:
-
-    The minimum mapping quality score needed to consider a read for nucleotide counts and genotyping. Default is no filter.
-
-Ploidy:
-
-    The number of genotype calls to make at each reported position.
-
-Only write out positions with possible alternate alleles:
-
-    When set, only positions which have at least one non-reference nucleotide which passes declare filters will be present in the output.
-
-Report counts by strand:
-
-    When set, nucleotide counts (NC) will be reported in reference to the aligned read's source strand. Reported as: &lt;strand&gt;&lt;BASE&gt;=&lt;COUNT&gt;.
-
-Choose the dtype to use for storing coverage information:
-
-    This controls the maximum depth value for each nucleotide/position/strand (when specified). Smaller values require the least amount of memory, but have smaller maximal limits.
-
-        +--------+----------------------------+
-        | name   | maximum coverage value     |
-        +========+============================+
-        | uint8  | 255                        |
-        +--------+----------------------------+
-        | uint16 | 65,535                     |
-        +--------+----------------------------+
-        | uint32 | 4,294,967,295              |
-        +--------+----------------------------+
-        | uint64 | 18,446,744,073,709,551,615 |
-        +--------+----------------------------+
-
-
-  </help>
-  <tests>
-      <test>
-          <param name="reference_source_selector" value="history" />
-          <param name="input_bam" value="fake_phiX174_reads_1.bam" ftype="bam" />
-          <param name="ref_file" value="phiX174.fasta" ftype="fasta" />
-          <param name="regions" value="0" />
-          <param name="min_support_depth" value="0" />
-          <param name="min_base_quality" value="" />
-          <param name="min_mapping_quality" value="" />
-          <param name="ploidy" value="2" />
-          <param name="variants_only" value="False" />
-          <param name="use_strand" value="False" />
-          <param name="advanced_options_selector" value="advanced" />
-          <param name="coverage_dtype" value="uint8" />
-          <output name="output_vcf" file="fake_phiX174_reads_1_test_out_1.vcf" compare="contains" />
-      </test>
-  </tests>
-
-  <citations>
-    <citation type="doi">10.1186/gb4161</citation>
-  </citations>
-
-</tool>