changeset 0:182426b32995 draft default tip

Uploaded
author completegenomics
date Mon, 18 Jun 2012 20:15:00 -0400
parents
children
files cgatools/README.txt cgatools/datatypes_conf.xml cgatools/lib/galaxy/datatypes/completegenomics.py cgatools/tool-data/cg_crr_files.loc.sample cgatools/tool_config.xml.sample cgatools/tool_data_table_conf.xml.sample cgatools/tools/cgatools_1.5/calldiff.xml cgatools/tools/cgatools_1.5/cgatools cgatools/tools/cgatools_1.5/join.xml cgatools/tools/cgatools_1.5/junctiondiff.xml cgatools/tools/cgatools_1.5/listtestvariants.xml cgatools/tools/cgatools_1.5/listvariants.xml cgatools/tools/cgatools_1.5/snpdiff.xml cgatools/tools/cgatools_1.5/testvariants.xml cgatools/tools/cgatools_1.5/varfilter.xml cgatools/tools/cgatools_1.5/varfilter_wrapper.pl
diffstat 16 files changed, 1947 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/README.txt	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,74 @@
+Provides galaxy tools for Complete Genomics' cgatools package -  http://www.completegenomics.com
+
+This repository provides tools to execute functions of cgatools from Complete Genomics, Inc. 
+and includes the cgatools 1.5 executable.
+ 
+Reference genomes files for cgatools can be downloaded from Complete Genomics ftp site:
+ftp://ftp.completegenomics.com/ReferenceFiles/build37.crr
+ftp://ftp.completegenomics.com/ReferenceFiles/build36.crr
+After copying the files in the desired locations follow the instructions below to register
+the reference files with galaxy.
+
+
+
+
+AUTOMATIC INSTALL
+
+When prompted for a tool panel section to contain the installed tools create a new section 
+called 'Complete Genomics - cgatools 1.5'.
+
+After install create a cg_ccr_files.loc file in the tool-data directory of your Galaxy 
+instance by copying the cg_ccr_files.loc.sample file. In cg_ccr_files.loc edit the path 
+for the reference genome files (.crr files) downloaded from Complete Genomics' ftp site.
+
+Restart Galaxy instance after editing cg_crr_files.loc.
+
+
+
+
+MANUAL INSTALL
+
+For manual install from compressed files move/copy the following files into your Galaxy instance:
+directory tools/cgatools_v1.5                       to   tools/
+file      lib/galaxy/datatypes/completegenomics.py  to   lib/galaxy/datatypes/
+file      tool-data/cg_crr_files.loc.sample         to   tool-data/cg_crr_files.loc
+
+In cg_ccr_files.loc edit the path for the reference genome files (.crr files) downloaded 
+from Complete Genomics' ftp site.
+
+Paste from tool_config.xml.sample into the tool_config.xml of your Galaxy instance:
+  <!-- 
+    Copy the following section to tool_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance
+  -->
+  <section name="Complete Genomics - cgatools v1.5" id="cg_cgatools1.5">
+    <tool file="cgatools_v1.5/listvariants.xml" />
+    <tool file="cgatools_v1.5/testvariants.xml" />
+    <tool file="cgatools_v1.5/listtestvariants.xml" />
+    <tool file="cgatools_v1.5/calldiff.xml" />
+    <tool file="cgatools_v1.5/snpdiff.xml" />
+    <tool file="cgatools_v1.5/junctiondiff.xml" />
+    <tool file="cgatools_v1.5/join.xml" />
+    <tool file="cgatools_v1.5/varfilter.xml" />
+  </section>
+  <!-- End of copied section -->
+
+Paste from tool_data_table_config.xml.sample into the tool_data_table_config.xml of your Galaxy instance:
+    <!-- Start location of cgatools crr files -->
+    <table name="cg_crr_files" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/cg_crr_files.loc" />
+    </table>
+    <!-- End Location of cgatools crr files -->
+
+Paste from datatypes_conf.xml into the datatypes_conf.xml of your Galaxy instance:
+    <!-- 
+      Copy the following section to datatypes_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance
+    -->
+    <!-- Start Complete Genomics Datatypes -->
+    <datatype extension="cg_var" type="galaxy.datatypes.completegenomics:CG_Var" display_in_upload="true" />
+    <datatype extension="cg_mastervar" type="galaxy.datatypes.completegenomics:CG_MasterVar" display_in_upload="true" />
+    <datatype extension="cg_gene" type="galaxy.datatypes.completegenomics:CG_Gene" display_in_upload="true" />
+    <!-- End Complete Genomics Datatypes -->
+    <!-- End of copied section -->
+    
+Restart Galaxy instance.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/datatypes_conf.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="completegenomics.py"/>
+  </datatype_files>
+
+  <registration>
+    <!-- 
+      Copy the following section to datatypes_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance
+    -->
+    <!-- Start Complete Genomics Datatypes -->
+    <datatype extension="cg_var" type="galaxy.datatypes.completegenomics:CG_Var" display_in_upload="true" />
+    <datatype extension="cg_mastervar" type="galaxy.datatypes.completegenomics:CG_MasterVar" display_in_upload="true" />
+    <datatype extension="cg_gene" type="galaxy.datatypes.completegenomics:CG_Gene" display_in_upload="true" />
+    <!-- End Complete Genomics Datatypes -->
+    <!-- End of copied section -->
+  </registration>
+  <sniffers>
+  </sniffers>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/lib/galaxy/datatypes/completegenomics.py	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,71 @@
+"""
+Complete Genomics datatypes
+Birgit Crain - Complete Genomics, Inc
+"""
+
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
+import logging
+from galaxy.datatypes import data
+from galaxy import util
+from cgi import escape
+from galaxy.datatypes import metadata
+from galaxy.datatypes import tabular
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.tabular import Tabular
+import galaxy_utils.sequence.vcf
+from galaxy.datatypes.sniff import *
+
+log = logging.getLogger(__name__)
+
+class CG_Var( Tabular ):
+    file_ext = 'cg_var'
+    def __init__(self, **kwd):
+        """Initialize CG_Var datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['locus', 'ploidy', 'allele', 'chromosome', 'begin', 'end',
+                             'varType', 'reference', 'alleleSeq', 'varScoreVAF',
+                             'varScoreEAF', 'varQuality', 'hapLink', 'xRef'
+                             ]
+    def display_peek( self, dataset ):
+        """Returns formated html of peek"""
+        return Tabular.make_html_table( self, dataset, column_names=self.column_names )
+
+class CG_MasterVar( Tabular ):
+    file_ext = 'cg_mastervar'
+    def __init__(self, **kwd):
+        """Initialize CG_MasterVar datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['locus', 'ploidy', 'chromosome', 'begin', 'end', 'zygosity',
+                             'varType', 'reference', 'allele1Seq', 'allele2Seq',
+                             'allele1VarScoreVAF', 'allele2VarScoreVAF', 'allele1VarScoreEAF',
+                             'allele2VarScoreEAF', 'allele1VarQuality', 'allele2VarQuality',
+                             'allele1HapLink', 'allele2HapLink', 'allele1XRef', 'allele2XRef',
+                             'evidenceIntervalId', 'allele1ReadCount', 'allele2ReadCount',
+                             'referenceAlleleRead', 'totalReadCount', 'allele1Gene',
+                             'allele2Gene	pfam', 'miRBaseId', 'repeatMasker', 'segDupOverlap',
+                             'relativeCoverageDiploid', 'calledPloidy',
+                             'relativeCoverageNondiploid', 'calledLevel'
+                             ]
+    
+    def display_peek( self, dataset ):
+        """Returns formated html of peek"""
+        return Tabular.make_html_table( self, dataset, column_names=self.column_names )
+        
+class CG_Gene( Tabular ):
+    file_ext = 'cg_gene'
+    def __init__(self, **kwd):
+        """Initialize CG_Gene datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['index', 'locus', 'allele', 'chromosome', 'begin', 'end',
+                             'varType', 'reference', 'call', 'xRef', 'geneId',
+                             'mrnaAcc', 'proteinAcc', 'symbol', 'orientation', 'component',
+                             'componentIndex', 'hasCodingRegion', 'impact', 'nucleotidePos',
+                             'proteinPos', 'annotationRefSequence', 'sampleSequence',
+                             'genomeRefSequence', 'pfam'
+                             ]
+   
+    def display_peek( self, dataset ):
+        """Returns formated html of peek"""
+        return Tabular.make_html_table( self, dataset, column_names=self.column_names )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tool-data/cg_crr_files.loc.sample	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,17 @@
+#This is a sample file distributed with cgatools repository that enables the cgatools
+#functions to use the .crr reference files. After installation download the reference 
+#genome files form Complete Genomics' ftp site:
+#ftp://ftp.completegenomics.com/ReferenceFiles/build37.crr
+#ftp://ftp.completegenomics.com/ReferenceFiles/build36.crr
+#and edit the path for the reference genomes to correspond to their location.
+#
+#Restart your Galaxy instance to ensure the file locations are registered with Galaxy 
+#properly.
+#
+#The cg_crr_files.loc file has this format (white space characters are TAB characters):
+#
+#<value>	<dbkey>	<name>	<path>
+#
+#hg19	hg19	build 37	/absolute/path/to/build37.crr
+#hg18	hg18	build 36	/absolute/path/to/build36.crr
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tool_config.xml.sample	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<toolbox>
+  <!-- 
+    Copy the following section to tool_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance
+  -->
+  <section name="Complete Genomics - cgatools 1.5" id="cg_cgatools1.5">
+    <tool file="cgatools_1.5/listvariants.xml" />
+    <tool file="cgatools_1.5/testvariants.xml" />
+    <tool file="cgatools_1.5/listtestvariants.xml" />
+    <tool file="cgatools_1.5/calldiff.xml" />
+    <tool file="cgatools_1.5/snpdiff.xml" />
+    <tool file="cgatools_1.5/junctiondiff.xml" />
+    <tool file="cgatools_1.5/join.xml" />
+    <tool file="cgatools_1.5/varfilter.xml" />
+  </section>
+  <!-- End of copied section -->
+</toolbox>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tool_data_table_conf.xml.sample	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- 
+         Copy the following section to tool_data_table_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance
+    -->
+    <!-- Start location of cgatools crr files -->
+    <table name="cg_crr_files" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/cg_crr_files.loc" />
+    </table>
+    <!-- End Location of cgatools crr files -->
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/calldiff.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,350 @@
+<tool id="cg_calldiff" name="calldiff(beta) 1.5" version="1.0.0">
+<!--
+This tool creates a GUI for the calldiff function of cgatools from Complete Genomics, Inc.
+written 6-18-2012 by bcrain@completegenomics.com
+-->
+
+  <description>compares two Complete Genomics variant files.</description> <!--adds description in toolbar-->
+
+  <requirements>
+  	<requirement type="binary">cgatools</requirement>
+  </requirements>
+
+  <command> <!--run executable-->
+  	cgatools | head -1;
+  	cgatools calldiff --beta
+  	--reference ${crr.fields.path}
+		--variantsA $data_sources.inputA
+		--variantsB $data_sources.inputB
+		$validation 
+		$diploid 
+		--locus-stats-column-count $column 
+		--max-hypothesis-count $hypothesis
+		--output-prefix cg_
+		--reports `echo ${report1} ${report2} ${report3} ${report4} ${report5} ${somatic.report6} | sed 's/  */,/g'` 
+		#if $somatic.report6 == "SomaticOutput"
+			--genome-rootA $somatic.genomeA
+			--genome-rootB $somatic.genomeB
+			--calibration-root $somatic.calibration
+		#end if
+  </command>
+
+  <outputs>
+  	<data format="tabular" name="output1" from_work_dir="cg_SuperlocusOutput.tsv" label="${tool.name} on ${on_string}: SuperlocusOutput">
+  	<filter>(report1 == 'SuperlocusOutput')</filter>
+  	</data>
+  	<data format="tabular" name="output2" from_work_dir="cg_SuperlocusStats.tsv" label="${tool.name} on ${on_string}: SuperlocusStats">
+  	<filter>(report2 == 'SuperlocusStats')</filter>
+  	</data>
+  	<data format="tabular" name="output3" from_work_dir="cg_LocusOutput.tsv" label="${tool.name} on ${on_string}: LocusOutput">
+  	<filter>(report3 == 'LocusOutput')</filter>
+  	</data>
+  	<data format="tabular" name="output4" from_work_dir="cg_LocusStats.tsv" label="${tool.name} on ${on_string}: LocusStats">
+  	<filter>(report4 == 'LocusStats')</filter>
+  	</data>
+  	<data format="tabular" name="output5a" from_work_dir="cg_VariantsA.tsv" label="${tool.name} on ${on_string}: VariantsA">
+  	<filter>(report5 == 'VariantOutput')</filter>
+  	</data>
+  	<data format="tabular" name="output5b" from_work_dir="cg_VariantsB.tsv" label="${tool.name} on ${on_string}: VariantsB">
+  	<filter>(report5 == 'VariantOutput')</filter>
+  	</data>
+  	<data format="tabular" name="output6" from_work_dir="cg_SomaticOutput.tsv" label="${tool.name} on ${on_string}: SomaticOutput">
+  	<filter>(somatic['report6'] == 'SomaticOutput')</filter>
+  	</data>
+  </outputs>
+  
+  <inputs>
+		<!--form field to select crr file-->
+		<param name="crr" type="select" label="Reference genome (.crr file)">
+			<options from_data_table="cg_crr_files" />
+		</param>
+	
+		<!--conditional to select variant file input-->
+  	<conditional name="data_sources">
+      <param name="data_source" type="select" label="Where are the input varfiles?">
+        <option value="in" selected="true">imported into Galaxy</option>
+        <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+      </param>
+      <when value="in">
+				<!--form field to select variant files-->
+				<param name="inputA" type="data" format="cg_var" label="Var file A">
+					<validator type="unspecified_build" />
+					<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+					 metadata_name="dbkey" metadata_column="1"
+					 message="cgatools is not currently available for this build."/>
+				</param>
+				<param name="inputB" type="data" format="cg_var" label="Var file B">
+					<validator type="unspecified_build" />
+					<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+					 metadata_name="dbkey" metadata_column="1"
+					 message="cgatools is not currently available for this build."/>
+				</param>
+			</when>
+      <when value="out">
+				<!--form field to select crr file-->
+				<param name="inputA" type="text" label="Variant file A (/path/varfile)" size="300" help="Variant file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2"/>
+				<param name="inputB" type="text" label="Variant file B (/path/varfile)" size="300" help="Variant file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2."/>
+			</when>
+		</conditional>
+		
+		<param name="diploid" type="select" label="Use diploid variant model" help="Uses varScoreEAF instead of varScoreVAF in somatic score computations. Also, uses diploid variant model instead of variable allele mixture model.">
+			<option value="">no</option>
+			<option value="--diploid">yes</option>
+		</param>
+			
+		<param name="column" type="integer" label="Number of columns for locus compare classification in the locus stats file (default 15)" value="15"/>
+		
+		<param name="hypothesis" type="integer" label="Maximum number of possible phasings to consider for a superlocus (default 32)" value="32"/>
+		      
+		<param name="validation" type="select" label="Reference cover validation" help="Turns on/off validation that all bases of a chromosome are covered by calls of the variant file.">
+			<option value="">on</option>
+			<option value="--no-reference-cover-validation">off</option>
+		</param>
+			
+		<param name="report1" type="select" label="Create report SuperlocusOutput">
+			<option value="">no</option>
+			<option value="SuperlocusOutput">yes</option>
+		</param>
+		<param name="report2" type="select" label="Create report SuperlocusStats">
+			<option value="">no</option>
+			<option value="SuperlocusStats">yes</option>
+		</param>
+		<param name="report3" type="select" label="Create report LocusOutput">
+			<option value="">no</option>
+			<option value="LocusOutput">yes</option>
+		</param>
+		<param name="report4" type="select" label="Create report LocusStats">
+			<option value="">no</option>
+			<option value="LocusStats">yes</option>
+		</param>
+		<param name="report5" type="select" label="Create report VariantOutput" help="Both variant files annotated by comparison results.If the somatic output report is requested, file A is also annotated with the same score ranks as produced in that report.">
+			<option value="">no</option>
+			<option value="VariantOutput">yes</option>
+		</param>
+		
+		<conditional name="somatic">
+			<param name="report6" type="select" label="Create report SomaticOutput" help="This report can only be generated on local Galaxy instances. Report for the list of simple variations that are present only in file 'A', annotated with the score that indicates the probability of the variation being truly somatic. Note: generating this report slows calldiff by 10x-20x.">
+				<option value="">no</option>
+				<option value="SomaticOutput">yes</option>
+			</param>
+			<when value="SomaticOutput">
+				<param name="genomeA" type="text" size="300" label="Directory for genome A (/path/dir)" help="The 'A' genome directory, e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000; this directory is expected to contain ASM/REF and ASM/EVIDENCE subdirectories."/>
+				<param name="genomeB" type="text" size="300" label="Directory for genome B (/path/dir)" help="The 'B' genome directory, e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000; this directory is expected to contain ASM/REF and ASM/EVIDENCE subdirectories."/>
+				<param name="calibration" type="text" size="300" label="Directory calibration data (/path/dir)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v1.tgz"/>
+			</when>
+		</conditional>
+		
+  </inputs>
+
+  <help>
+  
+**What it does**
+
+This tool compares two Complete Genomics variant files.
+
+**cgatools 1.5.0 Documentation**
+
+Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
+
+Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
+
+**Command line reference**::
+
+		COMMAND NAME
+		  calldiff - Compares two Complete Genomics variant files.
+		
+		DESCRIPTION
+		  Compares two Complete Genomics variant files. Divides the genome up into 
+		  superloci of nearby variants, then compares the superloci. Also refines the
+		  comparison to determine per-call or per-locus comparison results.
+				
+		  Comparison results are usually described by a semi-colon separated string, 
+		  one per allele. Each allele's comparison result is one of the following 
+		  classifications:
+				
+		    ref-identical   The alleles of the two variant files are identical, and
+		                    they are consistent with the reference.
+		    alt-identical   The alleles of the two variant files are identical, and
+		                    they are inconsistent with the reference.
+		    ref-consistent  The alleles of the two variant files are consistent, 
+		                    and they are consistent with the reference.
+		    alt-consistent  The alleles of the two variant files are consistent, 
+		                    and they are inconsistent with the reference.
+		    onlyA           The alleles of the two variant files are inconsistent, 
+		                    and only file A is inconsistent with the reference.
+		    onlyB           The alleles of the two variant files are inconsistent, 
+		                    and only file B is inconsistent with the reference.
+		    mismatch        The alleles of the two variant files are inconsistent, 
+		                    and they are both inconsistent with the reference.
+		    phase-mismatch  The two variant files would be consistent if the 
+		                    hapLink field had been empty, but they are 
+		                    inconsistent.
+		    ploidy-mismatch The superlocus did not have uniform ploidy.
+				
+		  In some contexts, this classification is rolled up into a simplified 
+		  classification, which is one of "identical", "consistent", "onlyA", 
+		  "onlyB", or "mismatch".
+				
+		  A good place to start looking at the results is the superlocus-output file.
+		  It has columns defined as follows:
+				
+		    SuperlocusId   An identifier given to the superlocus.
+		    Chromosome     The name of the chromosome.
+		    Begin          The 0-based offset of the start of the superlocus.
+		    End            The 0-based offset of the base one past the end of the 
+		                   superlocus.
+		    Classification The match classification of the superlocus.
+		    Reference      The reference sequence.
+		    AllelesA       A semicolon-separated list of the alleles (one per 
+		                   haplotype) for variant file A, for the phasing with the 
+		                   best comparison result.
+		    AllelesB       A semicolon-separated list of the alleles (one per 
+		                   haplotype) for variant file B, for the phasing with the 
+		                   best comparison result.
+				
+		  The locus-output file contains, for each locus in file A and file B that is
+		  not consistent with the reference, an annotated set of calls for the locus.
+		  The calls are annotated with the following columns:
+				
+		    SuperlocusId            The id of the superlocus containing the locus.
+		    File                    The variant file (A or B).
+		    LocusClassification     The locus classification is determined by the 
+		                            varType column of the call that is inconsistent
+		                            with the reference, concatenated with a 
+		                            modifier that describes whether the locus is 
+		                            heterozygous, homozygous, or contains no-calls.
+		                            If there is no one variant in the locus (i.e., 
+		                            it is heterozygous alt-alt), the locus 
+		                            classification begins with "other".
+		    LocusDiffClassification The match classification for the locus. This is
+		                            defined to be the best of the comparison of the
+		                            locus to the same region in the other file, or 
+		                            the comparison of the superlocus.
+				
+		  The somatic output file contains a list of putative somatic variations of 
+		  genome A. The output includes only those loci that can be classified as 
+		  snp, del, ins or sub in file A, and are called reference in the file B. 
+		  Every locus is annotated with the following columns:
+				
+		    VarCvgA                 The totalReadCount from file A for this locus 
+		                            (computed on the fly if file A is not a 
+		                            masterVar file).
+		    VarScoreA               The varScoreVAF from file A, or varScoreEAF if 
+		                            the "--diploid" option is used.
+		    RefCvgB                 The maximum of the uniqueSequenceCoverage 
+		                            values for the locus in genome B.
+		    RefScoreB               Minimum of the reference scores of the locus in
+		                            genome B.
+		    SomaticCategory         The category used for determining the 
+		                            calibrated scores and the SomaticRank.
+		    VarScoreACalib          The calibrated variant score of file A, under 
+		                            the model selected by using or not using the 
+		                            "--diploid" option, and corrected for the count
+		                            of heterozygous variants observed in this 
+		                            genome. See user guide for more information.
+		    VarScoreBCalib          The calibrated reference score of file B, under
+		                            the model selected by using or not using the 
+		                            "--diploid" option, and corrected for the count
+		                            of heterozygous variants observed in this 
+		                            genome. See user guide for more information.
+		    SomaticRank             The estimated rank of this somatic mutation, 
+		                            amongst all true somatic mutations within this 
+		                            SomaticCategory. The value is a number between 
+		                            0 and 1; a value of 0.012 means, for example, 
+		                            that an estimated 1.2% of the true somatic 
+		                            mutations in this somaticCategory have a 
+		                            somaticScore less than the somaticScore for 
+		                            this mutation. See user guide for more 
+		                            information.
+		    SomaticScore            An integer that provides a total order on 
+		                            quality for all somatic mutations. It is equal 
+		                            to -10*log10( P(false)/P(true) ), under the 
+		                            assumption that this genome has a rate of 
+		                            somatic mutation equal to 1/Mb for 
+		                            SomaticCategory snp, 1/10Mb for SomaticCategory
+		                            ins, 1/10Mb for SomaticCategory del, and 1/20Mb
+		                            for SomaticCategory sub. The computation is 
+		                            based on the assumptions described in the user 
+		                            guide, and is affected by choice of variant 
+		                            model selected by using or not using the 
+		                            "--diploid" option.
+		    SomaticQuality          Equal to VQHIGH for all somatic mutations where
+		                            SomaticScore &gt;= -10. Otherwise, this column is 
+		                            empty.
+				
+		OPTIONS
+		  -h [ --help ] 
+		      Print this help message.
+		
+		  --reference arg
+		      The input crr file.
+		
+		  --variantsA arg
+		      The "A" input variant file.
+		
+		  --variantsB arg
+		      The "B" input variant file.
+		
+		  --output-prefix arg
+		      The path prefix for all output reports.
+		
+		  --reports arg (=SuperlocusOutput,SuperlocusStats,LocusOutput,LocusStats)
+		      Comma-separated list of reports to generate. (Beware any reports whose 
+		      name begins with "Debug".) A report is one of:
+		        SuperlocusOutput      Report for superlocus classification.
+		        SuperlocusStats       Report for superlocus classification stats.
+		        LocusOutput           Report for locus classification.
+		        LocusStats            Report for locus stats.
+		        VariantOutput         Both variant files annotated by comparison 
+		                              results.If the somatic output report is 
+		                              requested, file A is also annotated with the 
+		                              same score ranks as produced in that report.
+		        SomaticOutput         Report for the list of simple variations that
+		                              are present only in file "A", annotated with 
+		                              the score that indicates the probability of 
+		                              the variation being truly somatic. Requires 
+		                              beta, genome-rootA, and genome-rootB options 
+		                              to be provided as well. Note: generating this
+		                              report slows calldiff by 10x-20x.
+		        DebugCallOutput       Report for call classification.
+		        DebugSuperlocusOutput Report for debug superlocus information.
+		        DebugSomaticOutput    Report for distribution estimates used for 
+		                              somatic rescoring. Only produced if 
+		                              SomaticOutput is also turned on.
+		
+		  --diploid 
+		      Uses varScoreEAF instead of varScoreVAF in somatic score computations. 
+		      Also, uses diploid variant model instead of variable allele mixture 
+		      model.
+		
+		  --locus-stats-column-count arg (=15)
+		      The number of columns for locus compare classification in the locus 
+		      stats file.
+		
+		  --max-hypothesis-count arg (=32)
+		      The maximum number of possible phasings to consider for a superlocus.
+		
+		  --no-reference-cover-validation 
+		      Turns off validation that all bases of a chromosome are covered by 
+		      calls of the variant file.
+		
+		  --genome-rootA arg
+		      The "A" genome directory, for example /data/GS00118-DNA_A01; this 
+		      directory is expected to contain ASM/REF and ASM/EVIDENCE 
+		      subdirectories.
+		
+		  --genome-rootB arg
+		      The "B" genome directory.
+		
+		  --calibration-root arg
+		      The directory containing calibration data. For example, there should 
+		      exist a file calibration-root/0.0.0/metrics.tsv.
+		
+		  --beta 
+		      This flag enables the SomaticOutput report, which is beta 
+		      functionality.
+		
+		SUPPORTED FORMAT_VERSION
+		  0.3 or later
+  </help>
+</tool>
Binary file cgatools/tools/cgatools_1.5/cgatools has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/join.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,213 @@
+<tool id="cg_join" name="join(beta) 1.5" version="1.0.0">
+<!--
+This tool creates a GUI for the join function of cgatools from Complete Genomics, Inc.
+written 6-18-2012 by bcrain@completegenomics.com
+-->
+
+  <description>two tsv files based on equal fields or overlapping regions.</description> <!--adds description in toolbar-->
+
+  <requirements>
+  	<requirement type="binary">cgatools</requirement>
+  </requirements>
+
+  <command> <!--run executable-->
+		cgatools | head -1;
+		cgatools join --beta 
+		--input $inputA 
+		--input $inputB 
+		--output $output 
+		--output-mode $outmode 
+		$dump 
+		--select $col
+		#for $m in $matches <!--get all matched columns-->
+			--match ${m.match}
+		#end for
+		#if $range_overlap.range == 'yes'
+			#for $o in $range_overlap.overlaps <!--get all matched columns-->
+				--overlap ${o.overlap}
+			#end for
+			--overlap-mode $range_overlap.overlapmode
+			--overlap-fraction-A $range_overlap.fractionA
+			--boundary-uncertainty-A $range_overlap.boundaryA
+			--overlap-fraction-B $range_overlap.fractionB
+			--boundary-uncertainty-B $range_overlap.boundaryB
+		#end if
+  </command>
+
+  <outputs>
+		<data format="tabular" name="output" />
+  </outputs>
+  
+  <inputs>
+   	<!--form field to select input file A-->
+    <param name="inputA" type="data" format="tabular" label="Select input file A ">
+      <validator type="unspecified_build" />
+			<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+				metadata_name="dbkey" metadata_column="0"
+				message="cgatools is not currently available for this build."/>
+    </param>
+    
+  	<!--form field to select input file B-->
+    <param name="inputB" type="data" format="tabular" label="Select input file B ">
+      <validator type="unspecified_build" />
+			<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+				metadata_name="dbkey" metadata_column="0"
+				message="cgatools is not currently available for this build."/>
+    </param>
+    
+  	<!--form field to specify columns to print-->
+    <param name="col" type="text" value="A.*,B.*" size="40" label="Specify columns for output" help="The default value A.*,B.* prints all columns from both files, other selections enter in the format A.col_name1,A.col_name3,B.col_name1" />
+
+  	<!--form field to select output-mode-->
+		<param name="outmode" type="select" label="Select output mode">
+			<option value="full" selected="true">full (1 line for each match of records in A and B)</option>
+			<option value="compact">compact (1 line for each record in A, joining multiple records in B by semicolon)</option>
+			<option value="compact-pct">compact-pct (same as compact, annotated with % overlap)</option>
+		</param>
+
+		<!--form field to select columns to match-->
+		<param name="dump" type="select" label="Select records to print">
+			<option value="--always-dump" selected="true">print all records of A even if not matched in B</option>
+			<option value="">print only records of A that are matched in B</option>
+		</param>
+
+  	<!--form field to specify columns to match-->
+    <repeat name="matches" title="Exact match column">
+      <param name="match" type="text" size="40" label="Enter column:column" help="Enter column_from_A:column_from_B, e.g. chromosome:chromosome"/>
+    </repeat>
+    
+    <conditional name="range_overlap">
+    	<param name="range" type="select" label="Do you want to match columns by overlapping range?">
+    		<option value="no">no</option>
+    		<option value="yes">yes</option>
+    	</param>
+    	
+    	<when value="yes">
+				<!--form field to specify columns to overlap-->
+				<repeat name="overlaps" title="Range column">
+					<param name="overlap" type="text" size="40" label="Enter column&#91;,column&#93;:column&#91;,column&#93;" help="Enter range_start_from_A&#91;,range_stop_from_A&#93;:range_start_from_B&#91;,range_stop_from_B&#93;, e.g. begin,end:begin,end (overlapping range of positions) or begin,end:position"/>
+				</repeat>
+
+				<!--form field to select overlap-mode-->
+				<param name="overlapmode" type="select" label="Select overlap mode">
+					<option value="strict" selected="true">strict (overlap if A.begin&lt;B.end and B.begin&gt;A.end)</option>
+					<option value="allow-abutting-points">allow-abutting-points (overlap if A.begin&lt;B.end and B.begin&gt;A.end, or if A.begin&lt;=B.end and B.begin&lt;=A.end and either A or B has zero length.)</option>
+				</param>
+
+				<!--form fields to overlap options-->
+				<param name="fractionA" type="integer" value="0" label="Minimum fraction of A region overlap " />
+				<param name="boundaryA" type="integer" value="0" label="Boundary uncertainty for A for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-A * (A-range-length - boundary-uncertainty-A)"/>
+				
+				<param name="fractionB" type="integer" value="0" label="Minimum fraction of B region overlap " />
+				<param name="boundaryB" type="integer" value="0" label="Boundary uncertainty for overlap filtering "  help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-B * (B-range-length - boundary-uncertainty-B)"/>
+    	</when>
+		</conditional>
+  </inputs>
+  
+  <help>
+  
+**What it does**
+
+This tool joins two tab-delimited files based on equal fields or overlapping regions.
+
+**cgatools 1.5.0 Documentation**
+
+Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
+
+Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
+
+**Command line reference**::
+
+		COMMAND NAME
+		  join - Joins two tab-delimited files based on equal fields or overlapping regions.
+		
+		DESCRIPTION
+		  Joins two tab-delimited files based on equal fields or overlapping regions.
+		  By default, an output record is produced for each match found between file 
+		  A and file B, but output format can be controlled by the --output-mode 
+		  parameter.
+		
+		OPTIONS
+		  -h [ --help ] 
+		      Print this help message.
+		
+		  --beta 
+		      This is a beta command. To run this command, you must pass the --beta 
+		      flag.
+		
+		  --input arg
+		      File name to use as input (may be passed in as arguments at the end of 
+		      the command), or omitted for stdin). There must be exactly two input 
+		      files to join. If only one file is specified by name, file A is taken 
+		      to be stdin and file B is the named file. File B is read fully into 
+		      memory, and file A is streamed. File A's columns appear first in the 
+		      output.
+		
+		  --output arg (=STDOUT)
+		      The output file name (may be omitted for stdout).
+		
+		  --match arg
+		      A match specification, which is a column from A and a column from B 
+		      separated by a colon.
+		
+		  --overlap arg
+		      Overlap specification. An overlap specification consists of a range 
+		      definition for files A and B, separated by a colon. A range definition 
+		      may be two columns, in which case they are interpreted as the beginning
+		      and end of the range. Or it may be one column, in which case the range 
+		      is defined as the 1-base range starting at the given value. The records
+		      from the two files must overlap in order to be considered for output. 
+		      Two ranges are considered to overlap if the overlap is at least one 
+		      base long, or if one of the ranges is length 0 and the ranges overlap 
+		      or abut. For example, "begin,end:offset" will match wherever end-begin 
+		      &gt; 0, begin&lt;offset+1, and end&gt;offset, or wherever end-begin = 0, 
+		      begin&lt;=offset+1, and end&gt;=offset.
+
+
+		  -m [ --output-mode ] arg (=full)
+		      Output mode, one of the following:
+		        full        Print an output record for each match found between 
+		                    file A and file B.
+		        compact     Print at most one record for each record of file A, 
+		                    joining the file B values by a semicolon and 
+		                    suppressing repeated B values and empty B values.
+		        compact-pct Same as compact, but for each distinct B value, 
+		                    annotate with the percentage of the A record that is 
+		                    overlapped by B records with that B value. Percentage 
+		                    is rounded up to nearest integer.
+		
+		  --overlap-mode arg (=strict)
+		      Overlap mode, one of the following:
+		        strict                Range A and B overlap if A.begin &lt; B.end and 
+		                              B.begin &lt; A.end.
+		        allow-abutting-points Range A and B overlap they meet the strict 
+		                              requirements, or if A.begin &lt;= B.end and 
+		                              B.begin &lt;= A.end and either A or B has zero 
+		                              length.
+
+		  --select arg (=A.*,B.*)
+		      Set of fields to select for output.
+		
+		  -a [ --always-dump ] 
+		      Dump every record of A, even if there are no matches with file B.
+		
+		  --overlap-fraction-A arg (=0)
+		      Minimum fraction of A region overlap for filtering output.
+		
+		  --boundary-uncertainty-A arg (=0)
+		      Boundary uncertainty for overlap filtering. Specifically, records 
+		      failing the following predicate are filtered away: overlap &gt;= 
+		      overlap-fraction-A * ( A-range-length - boundary-uncertainty-A )
+		
+		  --overlap-fraction-B arg (=0)
+		      Minimum fraction of B region overlap for filtering output.
+		
+		  --boundary-uncertainty-B arg (=0)
+		      Boundary uncertainty for overlap filtering. Specifically, records 
+		      failing the following predicate are filtered away: overlap &gt;= 
+		      overlap-fraction-B * ( B-range-length - boundary-uncertainty-B )
+
+		SUPPORTED FORMAT_VERSION
+		  Any
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/junctiondiff.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,153 @@
+<tool id="cg_junctiondiff" name="junctiondiff(beta) 1.5" version="1.0.0">
+<!--
+This tool creates a GUI for the junctiondiff function of cgatools from Complete Genomics, Inc.
+written 6-18-2012 by bcrain@completegenomics.com
+-->
+
+  <description>reports difference between junction calls</description> <!--adds description in toolbar-->
+
+  <requirements>
+  	<requirement type="binary">cgatools</requirement>
+  </requirements>
+
+  <command> <!--run executable-->
+  	cgatools | head -1;
+  	cgatools junctiondiff --beta 
+  	--reference $crr.fields.path 
+  	--junctionsA $data_sources.inputA 
+  	--junctionsB $data_sources.inputB 
+  	--scoreThresholdA $scoreA
+  	--scoreThresholdB $scoreB
+  	--distance $distance
+  	--minlength $minlength
+    $stat  
+  	--output-prefix cg_ 
+  	;
+  	mv cg_diff-*tsv cg_diff.tsv
+  </command>
+
+  <outputs>
+  	<data format="tabular" name="output1" from_work_dir="cg_diff.tsv" label="${tool.name} on ${on_string}: diff"/>
+		<data format="tabular" name="output2" from_work_dir="cg_report.tsv" label="${tool.name} on ${on_string}: report">
+			<filter>(stat == '--statout')</filter>
+		</data>
+  </outputs>
+  
+  <inputs>
+		<!--form field to select crr file-->
+		<param name="crr" type="select" label="Reference genome (.crr file)">
+			<options from_data_table="cg_crr_files" />
+		</param>
+	
+		<!--conditional to select variant file input-->
+  	<conditional name="data_sources">
+      <param name="data_source" type="select" label="Where are the input junction files?">
+        <option value="in" selected="true">imported into Galaxy</option>
+        <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+      </param>
+      <when value="in">
+				<!--form field to select variant files-->
+				<param name="inputA" type="data" format="tabluar" label="Junction file A">
+					<validator type="unspecified_build" />
+					<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+					 metadata_name="dbkey" metadata_column="1"
+					 message="cgatools is not currently available for this build."/>
+				</param>
+				<param name="inputB" type="data" format="tabluar" label="Junction file B">
+					<validator type="unspecified_build" />
+					<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+					 metadata_name="dbkey" metadata_column="1"
+					 message="cgatools is not currently available for this build."/>
+				</param>
+			</when>
+      <when value="out">
+				<!--form field to enter external input files-->
+				<param name="inputA" type="text" label="Junction file A (/path/junction_file)" size="40" help="e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv"/>
+				<param name="inputB" type="text" label="Junction file B (/path/junction_file)" size="40" help="e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv"/>
+			</when>
+		</conditional>
+		
+		<!--form field to select stats output-->
+		<param name="stat" type="select" label="Print input file stats">
+			<option value="">no</option>
+			<option value="--statout">yes</option>
+		</param>
+
+		<param name="scoreA" type="integer" label="Score threshold value for input file A (default 10)" value="10"/>
+		<param name="scoreB" type="integer" label="Score threshold value for input file B (default 0)" value="0"/>
+		<param name="distance" type="integer" label="Max distance between coordinates of potentially compatible junctions (default 200)" value="200"/>
+		<param name="minlength" type="integer" label="Minimum deletion junction length to be included into the difference file (default 500)" value="500"/>
+  </inputs>
+
+
+  <help>
+  
+**What it does**
+
+This tool reports difference between junction calls of Complete Genomics junctions files
+
+**cgatools 1.5.0 Documentation**
+
+Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
+
+Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
+
+**Command line reference**::
+
+		COMMAND NAME
+		  junctiondiff - Reports difference between junction calls of Complete Genomics junctions files.
+		
+		DESCRIPTION
+		  junctiondiff takes two junction files A and B as input and produces the 
+		  following output:
+		    - "diff-inputFileName" - the junctions from an input file A that are not 
+		      present in input file B.
+		    - "report.txt" - a brief summary report (if --statout is used)
+				
+		  Two junctions are considered equivalent if:
+		    - they come from different files
+		    - left and right positions of one junction are not more than "--distance"
+		      bases apart from the corresponding positions of another junction
+		    - the junction scores are equal or above the scoreThreshold
+		    - they are on the same strands
+		
+		OPTIONS
+		  -h [ --help ] 
+		      Print this help message.
+		
+		  --beta 
+		      This is a beta command. To run this command, you must pass the --beta 
+		      flag.
+		
+		  -s [ --reference ] arg
+		      Reference file.
+		
+		  -a [ --junctionsA ] arg
+		      input junction file A.
+		
+		  -b [ --junctionsB ] arg
+		      input junction file B.
+		
+		  -A [ --scoreThresholdA ] arg (=10)
+		      score threshold value for the input file A.
+		
+		  -B [ --scoreThresholdB ] arg (=0)
+		      score threshold value for the input file B.
+		
+		  -d [ --distance ] arg (=200)
+		      Max distance between coordinates of potentially compatible junctions.
+		
+		  -l [ --minlength ] arg (=500)
+		      Minimum deletion junction length to be included into the difference 
+		      file.
+		
+		  -o [ --output-prefix ] arg
+		      The path prefix for all the output reports.
+		
+		  -S [ --statout ] 
+		      (Debug) Report various input file statistics. Experimental feature.
+		
+		SUPPORTED FORMAT_VERSION
+		  1.5 or later
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/listtestvariants.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,242 @@
+<tool id="cg_listtestvariants" name="listvariants(beta)-testvariants(beta) 1.5" version="1.0.0">
+<!--
+This tool creates a GUI for the listvariants and testvariants functions of cgatools from Complete Genomics, Inc.
+to be run consecutively with the same input files.
+written 6-18-2012 by bcrain@completegenomics.com
+-->
+
+  <description>performs listsvariants and testvariants consecutively</description> <!--adds description in toolbar-->
+
+  <requirements>
+  	<requirement type="binary">cgatools</requirement>
+  </requirements>
+
+  <command> <!--run executable-->
+		cgatools | head -1;
+		cgatools listvariants
+		--beta
+		--reference ${crr.fields.path}
+		--output $output1
+		#if $include_list.listing == "yes" <!--only added when yes-->
+			--variant-listing $include_list.list
+		#end if
+		$longvar
+		--variants 
+		#if $file_types.data_sources.data_source == "in" 
+			#for $v in $file_types.data_sources.varfiles <!--get each var file-->
+				${v.input}
+			#end for
+		#else
+			`cat $file_types.data_sources.varlist`
+		#end if
+		;
+    
+		cgatools testvariants
+		--beta
+		--reference ${crr.fields.path}
+		--output $output2
+		--input $output1
+		--variants 
+		#if $file_types.data_sources.data_source == "in" 
+			#for $v in $file_types.data_sources.varfiles <!--get each var/mastervar file-->
+				${v.input}
+			#end for
+		#else
+			`cat $file_types.data_sources.varlist`
+		#end if
+  </command>
+
+  <outputs>
+    <data format="tabular" name="output1" label="listvariants output"/>
+    <data format="tabular" name="output2" label="testvariants output"/>
+  </outputs>
+  
+  <inputs>
+  	<!--form field to select crr file-->
+    <param name="crr" type="select" label="Reference genome (.crr file)">
+      <options from_data_table="cg_crr_files" />
+    </param>
+
+  	<!--form field to select long variants option-->
+    <param name="longvar" type="select" label="List long variants?">
+      <option value="" selected="true">no</option>
+      <option value="--list-long-variants">yes</option>
+    </param>
+
+  	<!--form fields to include existing variant list-->
+    <conditional name="include_list">
+      <param name="listing" type="select" label="Include variant listing?">
+        <option value="no" selected="true">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <when value="yes">
+        <param name="list" type="data" format="tabular" label="Variant listing">
+        <validator type="unspecified_build" />
+						<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+						 metadata_name="dbkey" metadata_column="1"
+						 message="cgatools is not currently available for this build."/>
+        </param>
+      </when>
+    </conditional>
+
+		<!--conditional to select input file type-->
+  	<conditional name="file_types">
+      <param name="file_type" type="select" label="Select the input file type">
+        <option value="var" selected="true">var files</option>
+        <option value="mastervar">mastervar files</option>
+      </param>
+      
+      <when value="var">			
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input var files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="varfiles" title="Variant files">
+							<param name="input" type="data" format="cg_var" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="This file should contain a list of var files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2), var files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+			
+	    <when value="mastervar">			
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input mastervar files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="varfiles" title="Variant files">
+							<param name="input" type="data" format="cg_mastervar" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="varlist" type="text" label="List of mastervar files (/path/file)" size="200" help="This file should contain a list of mastervar files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2), mastervar files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+		</conditional>
+  </inputs>
+
+  <help>
+  
+**What it does**
+
+This tool uses the cgatools testvariants to test variant or mastervar files for the presence of variants.
+
+**cgatools 1.5.0 Documentation**
+
+Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
+
+Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
+
+**Command line reference**::
+
+		COMMAND NAME
+		  listvariants - Lists the variants present in a variant file.
+
+		DESCRIPTION
+		  Lists all called variants present in the specified variant files, in a 
+		  format suitable for processing by the testvariants command. The output is a
+		  tab-delimited file consisting of the following columns:
+			 
+		    variantId  Sequential id assigned to each variant.
+		    chromosome The chromosome of the variant.
+		    begin      0-based reference offset of the beginning of the variant.
+		    end        0-based reference offset of the end of the variant.
+		    varType    The varType as extracted from the variant file.
+		    reference  The reference sequence.
+		    alleleSeq  The variant allele sequence as extracted from the variant 
+		               file.
+		    xRef       The xRef as extrated from the variant file.
+
+		OPTIONS
+		  -h [ --help ] 
+		      Print this help message.
+
+		  --beta 
+		      This is a beta command. To run this command, you must pass the --beta 
+		      flag.
+
+		  --reference arg
+		      The reference crr file.
+
+		  --output arg (=STDOUT)
+		      The output file (may be omitted for stdout).
+
+		  --variants arg
+		      The input variant files (may be positional args).
+
+		  --variant-listing arg
+		      The output of another listvariants run, to be merged in to produce the 
+		      output of this run.
+
+		  --list-long-variants 
+		      In addition to listing short variants, list longer variants as well 
+		      (10's of bases) by concatenating nearby calls.
+
+		SUPPORTED FORMAT_VERSION
+		  0.3 or later
+		
+		
+		
+		COMMAND NAME
+		  testvariants - Tests variant files for presence of variants.
+		
+		DESCRIPTION
+		  Tests variant files for presence of variants. The output is a tab-delimited
+		  file consisting of the columns of the input variants file, plus a column 
+		  for each assembly results file that contains a character code for each 
+		  allele. The character codes have meaning as follows:
+			
+		    0 This allele of this genome is consistent with the reference at this 
+		      locus but inconsistent with the variant.
+		    1 This allele of this genome has the input variant at this locus.
+		    N This allele of this genome has no-calls but is consistent with the 
+		      input variant.
+		
+		OPTIONS
+		  -h [ --help ] 
+		      Print this help message.
+		
+		  --beta 
+		      This is a beta command. To run this command, you must pass the --beta 
+		      flag.
+		
+		  --reference arg
+		      The reference crr file.
+		
+		  --input arg (=STDIN)
+		      The input variants to test for.
+		
+		  --output arg (=STDOUT)
+		      The output file (may be omitted for stdout).
+		
+		  --variants arg
+		      The input variant files (may be passed in as arguments at the end of 
+		      the command).
+		
+		SUPPORTED FORMAT_VERSION
+		  0.3 or later			
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/listvariants.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,191 @@
+<tool id="cg_listvariant" name="listvariants(beta) 1.5" version="1.0.0">
+<!--
+This tool creates a GUI for the listvariants function of cgatools from Complete Genomics, Inc.
+written 6-18-2012 by bcrain@completegenomics.com
+-->
+
+  <description>lists all called variants</description> <!--adds description in toolbar-->
+
+  <requirements>
+  	<requirement type="binary">cgatools</requirement>
+  </requirements>
+
+  <command> <!--run executable-->
+  	cgatools | head -1;
+    cgatools listvariants
+		--beta
+		--reference ${crr.fields.path}
+		--output $output
+		#if $include_list.listing == "yes" <!--only added when yes-->
+			--variant-listing $include_list.list
+		#end if
+		$longvar
+		--variants 
+		#if $file_types.data_sources.data_source == "in" 
+			#for $v in $file_types.data_sources.varfiles <!--get each var/mastervar file-->
+				${v.input}
+			#end for
+		#else
+			`cat $file_types.data_sources.varlist`
+		#end if
+  </command>
+
+  <inputs>
+		<!--form field to select crr file-->
+		<param name="crr" type="select" label="Reference genome (.crr file)">
+			<options from_data_table="cg_crr_files" />
+		</param>
+	
+  	<!--form field to select long variants option-->
+    <param name="longvar" type="select" label="List long variants?">
+      <option value="" selected="true">no</option>
+      <option value="--list-long-variants">yes</option>
+    </param>
+
+  	<!--form fields to include existing variant list-->
+    <conditional name="include_list">
+      <param name="listing" type="select" label="Include variant listing?">
+        <option value="no" selected="true">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <when value="yes">
+        <param name="list" type="data" format="tabular" label="Variant listing"/>
+      </when>
+    </conditional>
+
+		<!--conditional to select input file type-->
+  	<conditional name="file_types">
+      <param name="file_type" type="select" label="Select the input file type">
+        <option value="var" selected="true">var files</option>
+        <option value="mastervar">mastervar files</option>
+      </param>
+      
+      <when value="var">			
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input var files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="varfiles" title="Variant files">
+							<param name="input" type="data" format="cg_var" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+								 <!--<validator type="expression" message="Dataset does not match selected build.">$dbkey == $crr.fields.dbkey</validator>-->
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="This file should contain a list of var files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2), var files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+			
+	    <when value="mastervar">			
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input mastervar files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="varfiles" title="Variant files">
+							<param name="input" type="data" format="cg_mastervar" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="varlist" type="text" label="List of mastervar files (/path/file)" size="200" help="This file should contain a list of mastervar files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2), mastervar files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+		</conditional>
+  </inputs>
+  
+  <outputs>
+    <data format="tabular" name="output"/>
+  </outputs>
+
+<!--	<tests>
+    <test>
+    	<param name="reference" value="hg19.crr"/>
+      <param name="file_type" value="var"/>
+      <param name="data_source" value="in"/>
+      <param name="varfiles?input" value="??"/>
+      <param name="varfiles?input" value="??"/>
+      <output name="output" file="??"/>
+    </test>
+	</tests>-->
+  
+  <help>
+
+**What it does**
+
+This tool uses the cgatools listvariants to list all called variants present in the var or mastervar files.
+
+**cgatools 1.5.0 Documentation**
+
+Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
+
+Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
+
+**Command line reference**::
+
+		COMMAND NAME
+		  listvariants - Lists the variants present in a variant file.
+
+		DESCRIPTION
+		  Lists all called variants present in the specified variant files, in a 
+		  format suitable for processing by the testvariants command. The output is a
+		  tab-delimited file consisting of the following columns:
+			 
+		    variantId  Sequential id assigned to each variant.
+		    chromosome The chromosome of the variant.
+		    begin      0-based reference offset of the beginning of the variant.
+		    end        0-based reference offset of the end of the variant.
+		    varType    The varType as extracted from the variant file.
+		    reference  The reference sequence.
+		    alleleSeq  The variant allele sequence as extracted from the variant 
+		               file.
+		    xRef       The xRef as extrated from the variant file.
+
+		OPTIONS
+		  -h [ --help ] 
+		      Print this help message.
+
+		  --beta 
+		      This is a beta command. To run this command, you must pass the --beta 
+		      flag.
+
+		  --reference arg
+		      The reference crr file.
+
+		  --output arg (=STDOUT)
+		      The output file (may be omitted for stdout).
+
+		  --variants arg
+		      The input variant files (may be positional args).
+
+		  --variant-listing arg
+		      The output of another listvariants run, to be merged in to produce the 
+		      output of this run.
+
+		  --list-long-variants 
+		      In addition to listing short variants, list longer variants as well 
+		      (10's of bases) by concatenating nearby calls.
+
+		SUPPORTED FORMAT_VERSION
+		  0.3 or later
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/snpdiff.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,184 @@
+<tool id="cg_snpdiff" name="snpdiff 1.5" version="1.0.0">
+<!--
+This tool creates a GUI for the snpdiff function of cgatools from Complete Genomics, Inc.
+written 6-18-2012 by bcrain@completegenomics.com
+-->
+
+  <description>compares snp calls to a Complete Genomics variant file.</description> <!--adds description in toolbar-->
+
+  <requirements>
+  	<requirement type="binary">cgatools</requirement>
+  </requirements>
+
+  <command> <!--run executable-->
+  	cgatools | head -1;
+  	cgatools snpdiff 
+  	--reference $crr.fields.path 
+  	--variants $varfile
+  	--genotypes $genotype
+  	--output-prefix cg_
+  	--reports `echo ${report1} ${report2} ${report3} | sed 's/  */,/g'` 
+  </command>
+
+  <outputs>
+  	<data format="tabular" name="output1" from_work_dir="cg_Output.tsv" label="${tool.name} on ${on_string}: Output">
+  	<filter>(report1 == 'Output')</filter>
+  	</data>
+  	<data format="tabular" name="output2" from_work_dir="cg_Verbose.tsv" label="${tool.name} on ${on_string}: Verbose">
+  	<filter>(report2 == 'Verbose')</filter>
+  	</data>
+  	<data format="tabular" name="output3" from_work_dir="cg_Stats.tsv" label="${tool.name} on ${on_string}: Stats">
+  	<filter>(report3 == 'Stats')</filter>
+  	</data>
+  </outputs>
+  
+  <inputs>
+		<!--form field to select crr file-->
+		<param name="crr" type="select" label="Reference genome (.crr file)">
+			<options from_data_table="cg_crr_files" />
+		</param>
+	
+		<!--conditional to select variant file input-->
+  	<conditional name="data_sources">
+      <param name="data_source" type="select" label="Where is the input varfile?">
+        <option value="in" selected="true">imported into Galaxy</option>
+        <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+      </param>
+      <when value="in">
+				<!--form field to select variant files-->
+				<param name="varfile" type="data" format="cg_var" label="Var file">
+					<validator type="unspecified_build" />
+					<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+					 metadata_name="dbkey" metadata_column="1"
+					 message="cgatools is not currently available for this build."/>
+				</param>
+			</when>
+      <when value="out">
+				<!--form field to select crr file-->
+				<param name="varfile" type="text" label="Variant file (/path/varfile)" size="40" help="Variant file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2"/>
+			</when>
+		</conditional>
+
+		<!--conditional to select genotypes file input-->
+		<param name="genotype" type="data" format="tabular" label="Genotypes file with SNP calls" help="The genotypes file is a tab-delimited file with at 
+    least the following columns (additional columns may be given): Chromosome (Required), Offset0Based (Required), GenotypesStrand (Optional), Genotypes (Optional)">
+			<validator type="unspecified_build" />
+			<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+			 metadata_name="dbkey" metadata_column="1"
+			 message="cgatools is not currently available for this build."/>
+		</param>
+
+		<param name="report1" type="select" label="Create report Output">
+			<option value="">no</option>
+			<option value="Output">yes</option>
+		</param>
+		<param name="report2" type="select" label="Create report Verbose">
+			<option value="">no</option>
+			<option value="Verbose">yes</option>
+		</param>
+		<param name="report3" type="select" label="Create report Stats">
+			<option value="">no</option>
+			<option value="Stats">yes</option>
+		</param>
+
+  </inputs>
+
+  <help>
+  
+**What it does**
+
+This tool ompares snp calls to a Complete Genomics variant file.
+
+**cgatools 1.5.0 Documentation**
+
+Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
+
+Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
+
+**Command line reference**::
+
+		COMMAND NAME
+		  snpdiff - Compares snp calls to a Complete Genomics variant file.
+		
+		DESCRIPTION
+		  Compares the snp calls in the "genotypes" file to the calls in a Complete 
+		  Genomics variant file. The genotypes file is a tab-delimited file with at 
+		  least the following columns (additional columns may be given):
+				
+		    Chromosome      (Required) The name of the chromosome.
+		    Offset0Based    (Required) The 0-based offset in the chromosome.
+		    GenotypesStrand (Optional) The strand of the calls in the Genotypes 
+		                    column (+ or -, defaults to +).
+		    Genotypes       (Optional) The calls, one per allele. The following 
+		                    calls are recognized:
+		                    A,C,G,T A called base.
+		                    N       A no-call.
+		                    -       A deleted base.
+		                    .       A non-snp variation.
+				
+		  The output is a tab-delimited file consisting of the columns of the 
+		  original genotypes file, plus the following additional columns:
+				
+		    Reference         The reference base at the given position.
+		    VariantFile       The calls made by the variant file, one per allele. 
+		                      The character codes are the same as is described for 
+		                      the Genotypes column.
+		    DiscordantAlleles (Only if Genotypes is present) The number of 
+		                      Genotypes alleles that are discordant with calls in 
+		                      the VariantFile. If the VariantFile is described as 
+		                      haploid at the given position but the Genotypes is 
+		                      diploid, then each genotype allele is compared 
+		                      against the haploid call of the VariantFile.
+		    NoCallAlleles     (Only if Genotypes is present) The number of 
+		                      Genotypes alleles that were no-called by the 
+		                      VariantFile. If the VariantFile is described as 
+		                      haploid at the given position but the Genotypes is 
+		                      diploid, then a VariantFile no-call is counted twice.
+				
+		  The verbose output is a tab-delimited file consisting of the columns of the
+		  original genotypes file, plus the following additional columns:
+				
+		    Reference   The reference base at the given position.
+		    VariantFile The call made by the variant file for one allele (there is 
+		                a line in this file for each allele). The character codes 
+		                are the same as is described for the Genotypes column.
+		    [CALLS]     The rest of the columns are pasted in from the VariantFile,
+		                describing the variant file line used to make the call.
+				
+		  The stats output is a comma-separated file with several tables describing 
+		  the results of the snp comparison, for each diploid genotype. The tables 
+		  all describe the comparison result (column headers) versus the genotype 
+		  classification (row labels) in different ways. The "Locus classification" 
+		  tables have the most detailed match classifications, while the "Locus 
+		  concordance" tables roll these match classifications up into "discordance" 
+		  and "no-call". A locus is considered discordant if it is discordant for 
+		  either allele. A locus is considered no-call if it is concordant for both 
+		  alleles but has a no-call on either allele. The "Allele concordance" 
+		  describes the comparison result on a per-allele basis.
+		
+		OPTIONS
+		  -h [ --help ] 
+		      Print this help message.
+		
+		  --reference arg
+		      The input crr file.
+		
+		  --variants arg
+		      The input variant file.
+		
+		  --genotypes arg
+		      The input genotypes file.
+		
+		  --output-prefix arg
+		      The path prefix for all output reports.
+		
+		  --reports arg (=Output,Verbose,Stats)
+		      Comma-separated list of reports to generate. A report is one of:
+		        Output  The output genotypes file.
+		        Verbose The verbose output file.
+		        Stats   The stats output file.
+						
+		SUPPORTED FORMAT_VERSION
+		  0.3 or later
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/testvariants.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,160 @@
+<tool id="cg_testvariants" name="testvariants(beta) 1.5" version="1.0.0">
+<!--
+This tool creates a GUI for the testvariants function of cgatools from Complete Genomics, Inc.
+written 6-18-2012 by bcrain@completegenomics.com
+-->
+
+  <description>test for the presence of variants</description> <!--adds description in toolbar-->
+
+  <requirements>
+  	<requirement type="binary">cgatools</requirement>
+  </requirements>
+
+  <command> <!--run executable-->
+    cgatools | head -1;
+    cgatools testvariants
+		--beta
+		--reference ${crr.fields.path}
+		--output $output
+		--input $listing
+		--variants 
+		#if $file_types.data_sources.data_source == "in" 
+			#for $v in $file_types.data_sources.varfiles <!--get each var/mastervar file-->
+				${v.input}
+			#end for
+		#else
+			`cat $file_types.data_sources.varlist`
+		#end if
+  </command>
+
+  <outputs>
+    <data format="tabular" name="output" />
+  </outputs>
+  
+  <inputs>
+  	<!--form field to select crr file-->
+    <param name="crr" type="select" label="Reference genome (.crr file)">
+      <options from_data_table="cg_crr_files" />
+    </param>
+
+	  <!--form fields to select variant list-->
+	  <param name="listing" type="data" format="tabular" label="Select variant list">
+			<validator type="unspecified_build" />
+			<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+				metadata_name="dbkey" metadata_column="1"
+				message="cgatools is not currently available for this build."/>
+	  </param>
+
+		<!--conditional to select input file type-->
+  	<conditional name="file_types">
+      <param name="file_type" type="select" label="Select the input file type">
+        <option value="var" selected="true">var files</option>
+        <option value="mastervar">mastervar files</option>
+      </param>
+      
+      <when value="var">			
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input var files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="varfiles" title="Variant files">
+							<param name="input" type="data" format="cg_var" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="This file should contain a list of var files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2), var files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+			
+	    <when value="mastervar">			
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input mastervar files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="varfiles" title="Variant files">
+							<param name="input" type="data" format="cg_mastervar" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="varlist" type="text" label="List of mastervar files (/path/file)" size="200" help="This file should contain a list of mastervar files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2), mastervar files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+		</conditional>
+  </inputs>
+
+  <help>
+  
+**What it does**
+
+This tool uses the cgatools testvariants to test variant or mastervar files for the presence of variants.
+
+**cgatools 1.5.0 Documentation**
+
+Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
+
+Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
+
+**Command line reference**::
+
+		COMMAND NAME
+		  testvariants - Tests variant files for presence of variants.
+		
+		DESCRIPTION
+		  Tests variant files for presence of variants. The output is a tab-delimited
+		  file consisting of the columns of the input variants file, plus a column 
+		  for each assembly results file that contains a character code for each 
+		  allele. The character codes have meaning as follows:
+			
+		    0 This allele of this genome is consistent with the reference at this 
+		      locus but inconsistent with the variant.
+		    1 This allele of this genome has the input variant at this locus.
+		    N This allele of this genome has no-calls but is consistent with the 
+		      input variant.
+		
+		OPTIONS
+		  -h [ --help ] 
+		      Print this help message.
+		
+		  --beta 
+		      This is a beta command. To run this command, you must pass the --beta 
+		      flag.
+		
+		  --reference arg
+		      The reference crr file.
+		
+		  --input arg (=STDIN)
+		      The input variants to test for.
+		
+		  --output arg (=STDOUT)
+		      The output file (may be omitted for stdout).
+		
+		  --variants arg
+		      The input variant files (may be passed in as arguments at the end of 
+		      the command).
+		
+		SUPPORTED FORMAT_VERSION
+		  0.3 or later
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/varfilter.xml	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,187 @@
+<tool id="cg_varfilter" name="varfilter(beta) 1.5" version="1.0.0">
+<!--
+This tool creates a GUI for the varfilter function of cgatools from Complete Genomics, Inc.
+The function is called via a Perl script vartools_wrapper.pl, designed to generate the correctly formated filters to append the input file on the command line.
+written 6-18-2012 by bcrain@completegenomics.com
+-->
+
+  <description>copies input file, applying filters.</description> <!--adds description in toolbar-->
+
+  <requirements>
+  	<requirement type="binary">cgatools</requirement>
+  </requirements>
+
+  <command interpreter="perl">
+  	cgatools | head -1;
+  	varfilter_wrapper.pl
+  	--reference $crr.fields.path
+  	--output $output
+		--input $file_types.data_sources.input
+		#for $f in $filters
+			--zygosity $f.zygosity
+			--vartype $f.vartype
+			--varscorevaf x$f.varscorevaf
+			--varscoreeaf x$f.varscoreeaf
+			--varquality $f.varquality
+		#end for
+  </command>
+
+  <outputs>
+  	<data format="cg_var" name="output" />
+  </outputs>
+  
+  <inputs>
+  	<!--form field to select crr file-->
+		<param name="crr" type="select" label="Reference genome (.crr file)">
+			<options from_data_table="cg_crr_files" />
+		</param>
+	
+		<!--conditional to select input file type-->
+  	<conditional name="file_types">
+      <param name="file_type" type="select" label="Select the input file type">
+        <option value="var" selected="true">var files</option>
+        <option value="mastervar">mastervar files</option>
+      </param>
+      
+      <when value="var">			
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where is the input var file?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<param name="input" type="data" format="cg_var" label="Var file">
+							<validator type="unspecified_build" />
+							<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+							 metadata_name="dbkey" metadata_column="1"
+							 message="cgatools is not currently available for this build."/>
+						</param>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="input" type="text" label="Var file (/path/file)" size="200" help="Variant file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2"/>
+					</when>
+				</conditional>
+			</when>
+			
+	    <when value="mastervar">			
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where is the input mastervar file?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<param name="input" type="data" format="cg_mastervar" label="Mastervar file">
+							<validator type="unspecified_build" />
+							<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+							 metadata_name="dbkey" metadata_column="1"
+							 message="cgatools is not currently available for this build."/>
+						</param>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="input" type="text" label="Mastervar file (/path/file)" size="200" help="Mastervar file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2"/>
+					</when>
+				</conditional>
+			</when>
+		</conditional>
+		
+		<!-- formfields to add filters -->
+		<repeat name="filters" title="Filter">
+			<param name="zygosity" type="select" label="Filter out call (set to no-call) IF locus IS">
+				<option value="NA">- all loci -</option>
+				<option value="hom">homozygous</option>
+				<option value="het">heterzygous</option>
+			</param>
+
+			<param name="vartype" type="select" label="AND varType IS">
+				<option value="NA">- any varType -</option>
+				<option value="snp">snp</option>
+				<option value="ins">ins</option>
+				<option value="del">del</option>
+				<option value="sub">sub</option>
+				<option value="ref">ref</option>
+			</param>
+			
+			<param name="varscorevaf" type="text" label="AND varScoreVAF IS LESS THAN"/>
+			<param name="varscoreeaf" type="text" label="AND varScoreEAF IS LESS THAN"/>
+			
+			<param name="varquality" type="select" label="AND varQuality IS NOT">
+				<option value="NA"> </option>
+				<option value="VQHigh">VQHigh</option>
+				<option value="VQLOW">VQLOW</option>
+			</param>
+		</repeat>
+  </inputs>
+
+  <help>
+  
+**What it does**
+
+This tool copies input var file or masterVar file to output, applying specified filters.
+
+**cgatools 1.5.0 Documentation**
+
+Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
+
+Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
+
+**Command line reference**::
+
+		COMMAND NAME
+		  varfilter - Copies input var file or masterVar file to output, applying
+		  specified filters.
+		
+		DESCRIPTION
+		  Copies input var file or masterVar file to output, applying specified 
+		  filters (which are available to all cgatools commands that read a var file 
+		  or masterVar file as input). Filters are specified by appending the filter 
+		  specification to the var file name on the command line. For example:
+				
+		  /path/to/var.tsv.bz2#varQuality!=VQHIGH
+				
+		  The preceding example filters out any calls marked as VQLOW. The filter 
+		  specification follows the "#" sign, and consists of a list of filters to 
+		  apply, separated by a comma. Each filter is a colon-separated list of call 
+		  selectors. Any scored call that passes all the colon-separated call 
+		  selectors for one or more of the comma-separated filters is turned into a 
+		  no-call. The following call selectors are available:
+				
+		    hom             Selects only calls in homozygous loci.
+		    het             Selects any scored call not selected by the hom selector.
+		    varType=XX      Selects calls whose varType is XX.
+		    varScoreVAF&lt;XX  Selects calls whose varScoreVAF&lt;XX.
+		    varScoreEAF&lt;XX  Selects calls whose varScoreEAF&lt;XX.
+		    varQuality!=XX  Selects calls whose varQuality is not XX.
+				
+		  Here is an example that filters homozygous SNPs with varScoreVAF &lt; 25 and 
+		  heterozygous insertions with varScoreEAF &lt; 50:
+				
+				
+		  '/path/to/var.tsv.bz2#hom:varType=snp:varScoreVAF&lt;25,het:varType=ins:varScoreEAF&lt;50'
+				
+		
+		OPTIONS
+		  -h [ --help ]
+		      Print this help message.
+		
+		  --beta
+		      This is a beta command. To run this command, you must pass the --beta flag.
+		
+		  --reference arg
+		      The reference crr file.
+		
+		  --input arg
+		      The input var file or masterVar file (typically with filters specified).
+		
+		  --output arg (=STDOUT)
+		      The output file (may be omitted for stdout).
+		
+		SUPPORTED FORMAT_VERSION
+		  0.3 or later
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools_1.5/varfilter_wrapper.pl	Mon Jun 18 20:15:00 2012 -0400
@@ -0,0 +1,56 @@
+#!/usr/bin/perl
+use strict;
+use Getopt::Long;
+use vars qw($opt_reference $opt_input $opt_output @opt_zygosity @opt_vartype  @opt_varscorevaf @opt_varscoreeaf @opt_varquality);
+$| = 1; # set autoflush to screen
+
+# This is a wrapper for the cgatools varfilter function to run cgatools varfilter in Galaxy.
+# The wrapper generates the filter(s) in the correct format to be used with the input file.
+# written 6-1-2012 by bcrain@completegenomics.com
+
+
+#print join("\n", @ARGV), "\n";
+&GetOptions("reference=s", "input=s", "output=s", "zygosity=s@", "vartype=s@", "varscorevaf=s@", "varscoreeaf=s@", "varquality=s@");
+
+my $append = '';
+
+for (my $i = 0; $i <= $#opt_zygosity; $i ++)
+{
+	my $filter = '';
+	unless ($opt_zygosity[$i] eq 'NA') {$filter = $opt_zygosity[$i];}
+	unless ($opt_vartype[$i] eq 'NA')
+	{
+		$filter ne '' and $filter .= ':';
+		$filter .= 'varType=' . $opt_vartype[$i];
+	}
+	unless ($opt_varscorevaf[$i] eq 'x')
+	{
+		$filter ne '' and $filter .= ':';
+		$opt_varscorevaf[$i] =~ s/^x//;
+		$filter .= 'varScoreVAF<' . $opt_varscorevaf[$i];
+	}
+	unless ($opt_varscoreeaf[$i] eq 'x')
+	{
+		$filter ne '' and $filter .= ':';
+		$opt_varscoreeaf[$i] =~ s/^x//;
+		$filter .= 'varScoreEAF<' . $opt_varscoreeaf[$i];
+	}
+	unless ($opt_varquality[$i] eq 'NA')
+	{
+		$filter ne '' and $filter .= ':';
+		$filter .= 'varQuality!=' . $opt_varquality[$i];
+	}
+	
+	if ($filter ne '')
+	{
+		if ($append eq '') {$append = '#' . $filter;}
+		else {$append .= ',' . $filter;}
+	}
+}
+print "cgatools varfilter
+--beta
+--reference $opt_reference
+--output $opt_output
+--input '${opt_input}${append}'\n";
+
+`cgatools varfilter --beta --reference $opt_reference --output $opt_output --input '${opt_input}${append}'`;
\ No newline at end of file