changeset 11:b22f8634ff84 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/defuse commit 23b94b5747c6956360cd2eca0a07a669929ea141-dirty
author jjohnson
date Sun, 17 Jan 2016 14:11:06 -0500
parents f65857c1b92e
children 4fe2e80d4ae1
files README create_reference_dataset.xml data_manager_conf.xml datamanager_create_reference.py datamanager_create_reference.xml datatypes_conf.xml defuse.xml defuse_bamfastq.xml defuse_results_to_vcf.py defuse_results_to_vcf.xml defuse_trinity_analysis.py defuse_trinity_analysis.xml macros.xml test-data/mm10_results.filtered.tsv test-data/mm10_results.filtered.vcf test-data/tophat_out2h.bam tool-data/defuse.loc.sample tool-data/defuse_reference.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 20 files changed, 2170 insertions(+), 432 deletions(-) [+]
line wrap: on
line diff
--- a/README	Mon Jan 14 12:24:28 2013 -0600
+++ b/README	Sun Jan 17 14:11:06 2016 -0500
@@ -1,11 +1,12 @@
-The DeFuse galaxy tool is based on DeFuse_Version_0.6.0
+The DeFuse galaxy tool is based on DeFuse_Version_0.6.2
 http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
+https://bitbucket.org/dranew/defuse
 
 DeFuse is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.
 
 
 Manual:
-http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.0
+http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.2
 
 The included tool_dependencies.xml will download and install the defuse code.  
 It will set the environment variable: "DEFUSE_PATH" to the location of the defuse install.  
@@ -34,8 +35,13 @@
 
 These datasets should be referenced in the tool-data/defuse.loc file. 
 
+The create_reference_dataset will run the create_reference_dataset.pl script to generate deFuse genome reference data in a galaxy dataset.   
+This should me made available in the future as a Galaxy DataManager.
 
-External Tools  ( http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.0 )
+
+Galaxy will try to auto-install dependencies:
+
+External Tools  ( http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.2 )
 deFuse relies on other publically available tools as part of its pipeline. Some of these tools are not included with the deFuse download. Obtain these tools as detailed below.
 Download samtools
 The latest version of samtools can be downloaded from sourceforge: https://sourceforge.net/projects/samtools/files/samtools.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/create_reference_dataset.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,308 @@
+<tool id="create_defuse_reference" name="Create DeFuse Reference" version="@DEFUSE_VERSION@.1">
+ <description>create a defuse reference from Ensembl and UCSC sources</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="defuse_requirement" />
+        <expand macro="mapping_requirements" />
+    </requirements>
+  <command interpreter="command"> /bin/bash $defuse_script </command>
+ <inputs>
+  <conditional name="genome">
+    <param name="choice" type="select" label="Select a Genome Build">
+      <option value="GRCh38">Homo_sapiens GRCh38  hg38</option>
+      <option value="GRCh37">Homo_sapiens GRCh37  hg19</option>
+      <option value="NCBI36">Homo_sapiens NCBI36 hg18</option>
+      <option value="GRCm38">Mus_musculus GRCm38 mm10</option>
+      <option value="NCBIM37">Mus_musculus NCBIM37 mm9</option>
+      <option value="Rnor_5.0">Rattus_norvegicus Rnor_5.0 rn5</option>
+      <option value="user_specified">User specified</option>
+    </param>
+    <when value="GRCh38">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCh38"/>
+      <param name="ensembl_version" type="hidden" value="80"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg38"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="GRCh37">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCh37"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg19"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="NCBI36">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="NCBI36"/>
+      <param name="ensembl_version" type="hidden" value="54"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg18"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="GRCm38">
+      <param name="ensembl_organism" type="hidden" value="mus_musculus"/>
+      <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCm38"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Mus_musculus"/>
+      <param name="ncbi_prefix" type="hidden" value="Mm"/>
+      <param name="ucsc_genome_version" type="hidden" value="mm10"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="NCBIM37">
+      <param name="ensembl_organism" type="hidden" value="mus_musculus"/>
+      <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/>
+      <param name="ensembl_genome_version" type="hidden" value="NCBIM37"/>
+      <param name="ensembl_version" type="hidden" value="67"/>
+      <param name="ncbi_organism" type="hidden" value="Mus_musculus"/>
+      <param name="ncbi_prefix" type="hidden" value="Mm"/>
+      <param name="ucsc_genome_version" type="hidden" value="mm9"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="Rnor_5.0">
+      <param name="ensembl_organism" type="hidden" value="rattus_norvegicus"/>
+      <param name="ensembl_prefix" type="hidden" value="Rattus_norvegicus"/>
+      <param name="ensembl_genome_version" type="hidden" value="Rnor_5.0"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Rattus_norvegicus"/>
+      <param name="ncbi_prefix" type="hidden" value="Rn"/>
+      <param name="ucsc_genome_version" type="hidden" value="rn5"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="user_specified">
+      <param name="ensembl_organism" type="text" value="" label="Ensembl Organism Name">
+       <help>
+       Examples: homo_sapiens, mus_musculus, rattus_norvegicus
+       ftp://ftp.ensembl.org/pub/release-$ensembl_version/fasta/$ensembl_organism/dna/$ensembl_prefix.$ensembl_genome_version.$ensembl_version.dna.chromosome.$chromosome.fa.gz
+       </help>
+       </param>
+      <param name="ensembl_prefix" type="text" value="" label="Ensembl Organism prefix" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/>
+      <param name="ensembl_genome_version" type="text" value="" label="Ensembl Genome Version" help="Examples: GRCh37, GRCm38, Rnor_5.0"/>
+      <param name="ensembl_version" type="integer" value="" label="Ensembl Release Version" help="Example: 71"/>
+      <param name="ncbi_organism" type="text" value="" label="NCBI Organism Name" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/>
+      <param name="ncbi_prefix" type="text" value="" label="NCBI Organism Unigene prefix" help="Examples: Hs, Mm, Rn"/>
+      <param name="ucsc_genome_version" type="text" value="" label="UCSC Genome Version" help="Examples: hg19, mm10, rn5"/>
+      <param name="chromosomes" type="text" value="" label="Chromosomes for Ensembl genome build" >
+       <help>  Examples: 
+         Homo_sapiens: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT
+         Mus_musculus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT
+         Rattus_norvegicus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT
+         ( ftp://ftp.ensembl.org/pub/release-71/fasta/homo_sapiens/dna/ )
+       </help>
+      </param>
+      <param name="mt_chromosome" type="text" value="MT" label="Ensembl Mitochonrial Chromosome name" />
+      <param name="gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding" label="Gene sources" />
+      <param name="ig_gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene" label="IG Gene sources" />
+      <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" />
+    </when>
+  </conditional>
+ </inputs>
+ <outputs>
+  <data format="defuse.conf" name="config_txt" label="${tool.name} on ${genome.ensembl_genome_version} : config.txt"/>
+ </outputs>
+  <stdio>
+    <exit_code range="1:"  level="fatal"   description="Error running Create DeFuse Reference" />
+    <regex match="Error:" 
+           source="both" 
+           level="fatal" 
+           description="Error running Create DeFuse Reference" />
+
+  </stdio>
+ <configfiles>
+  <configfile name="defuse_config">
+#
+# Configuration file for defuse
+#
+# Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ 
+#   will be set by the runtime script using the ENV PATH
+#
+
+# Directory where the defuse code was unpacked
+source_directory = __DEFUSE_PATH__
+
+# Organism IDs
+ensembl_organism = $genome.ensembl_organism
+ensembl_prefix = $genome.ensembl_prefix
+ensembl_version = $genome.ensembl_version
+ensembl_genome_version = $genome.ensembl_genome_version
+ucsc_genome_version = $genome.ucsc_genome_version
+ncbi_organism = $genome.ncbi_organism
+ncbi_prefix = $genome.ncbi_prefix
+
+# Directory where you want your dataset
+dataset_directory = $config_txt.dataset.extra_files_path
+
+#raw
+# Input genome and gene models
+gene_models                                 = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).gtf
+genome_fasta                                = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).dna.chromosomes.fa
+
+# Repeat table from ucsc genome browser
+repeats_filename                            = $(dataset_directory)/repeats.txt
+
+# EST info downloaded from ucsc genome browser
+est_fasta                                   = $(dataset_directory)/est.fa
+est_alignments                              = $(dataset_directory)/intronEst.txt
+
+# Unigene clusters downloaded from ncbi
+unigene_fasta                               = $(dataset_directory)/$(ncbi_prefix).seq.uniq
+#end raw
+
+# Paths to external tools
+samtools_bin =  __SAMTOOLS_BIN__
+bowtie_bin = __BOWTIE_BIN__
+bowtie_build_bin = __BOWTIE_BUILD_BIN__
+blat_bin = __BLAT_BIN__
+fatotwobit_bin = __FATOTWOBIT_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_setup_bin = __GMAP_SETUP_BIN__
+r_bin = __R_BIN__
+rscript_bin = __RSCRIPT_BIN__
+
+#raw
+# Directory where you want your dataset
+gmap_index_directory                        = $(dataset_directory)/gmap
+#end raw
+
+#raw
+# Dataset files
+dataset_prefix       = $(dataset_directory)/defuse
+chromosome_prefix    = $(dataset_prefix).dna.chromosomes
+exons_fasta          = $(dataset_prefix).exons.fa
+cds_fasta            = $(dataset_prefix).cds.fa
+cdna_regions         = $(dataset_prefix).cdna.regions
+cdna_fasta           = $(dataset_prefix).cdna.fa
+reference_fasta      = $(dataset_prefix).reference.fa
+rrna_fasta           = $(dataset_prefix).rrna.fa
+ig_gene_list         = $(dataset_prefix).ig.gene.list
+repeats_regions      = $(dataset_directory)/repeats.regions
+est_split_fasta1     = $(dataset_directory)/est.1.fa
+est_split_fasta2     = $(dataset_directory)/est.2.fa
+est_split_fasta3     = $(dataset_directory)/est.3.fa
+est_split_fasta4     = $(dataset_directory)/est.4.fa
+est_split_fasta5     = $(dataset_directory)/est.5.fa
+est_split_fasta6     = $(dataset_directory)/est.6.fa
+est_split_fasta7     = $(dataset_directory)/est.7.fa
+est_split_fasta8     = $(dataset_directory)/est.8.fa
+est_split_fasta9     = $(dataset_directory)/est.9.fa
+
+# Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs
+prefilter1           = $(unigene_fasta)
+
+# deFuse scripts and tools
+scripts_directory    = $(source_directory)/scripts
+tools_directory      = $(source_directory)/tools
+data_directory       = $(source_directory)/data
+#end raw
+
+# Parameters for building the dataset
+chromosomes = $genome.chromosomes
+mt_chromosome = $genome.mt_chromosome
+gene_sources = $genome.gene_sources
+ig_gene_sources = $genome.ig_gene_sources
+rrna_gene_sources = $genome.rrna_gene_sources
+gene_biotypes = $genome.gene_sources
+ig_gene_biotypes = $genome.ig_gene_sources
+rrna_gene_biotypes = $genome.rrna_gene_sources
+
+#raw
+# Remove temp files
+remove_job_files                            = yes
+remove_job_temp_files                       = yes
+#end raw
+  </configfile>
+  <configfile name="defuse_script">
+#!/bin/bash
+## define some things for cheetah proccessing
+#set $amp = chr(38)
+#set $gt = chr(62)
+## substitute pathnames into config file
+if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi
+if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi
+if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi
+if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi
+if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi
+if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi
+if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi
+if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi
+if `grep __GMAP_INDEX_DIR__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_INDEX_DIR=`pwd`/gmap;then sed -i'.tmp' "s#__GMAP_INDEX_DIR__#\${GMAP_INDEX_DIR}#" $defuse_config; fi
+if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi
+if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi
+## copy config to output
+cp $defuse_config $config_txt
+## make a data_dir  and ln -s the input fastq
+mkdir -p $config_txt.dataset.extra_files_path
+## create_reference_dataset.pl
+perl \${DEFUSE_PATH}/scripts/create_reference_dataset.pl -c $defuse_config 
+  </configfile>
+ </configfiles>
+
+ <tests>
+ </tests>
+ <help>
+**DeFuse**
+
+DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.  See the DeFuse_Version_0.6_ manual for details.
+
+DeFuse uses a Reference Dataset to search for gene fusions.  The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_:
+    - genome_fasta from Ensembl
+    - gene_models from Ensembl
+    - repeats_filename from UCSC RepeatMasker rmsk.txt
+    - est_fasta from UCSC
+    - est_alignments from UCSC intronEst.txt
+    - unigene_fasta from NCBI
+
+The create_defuse_reference Galaxy tool downloads the reference genome and other source files, and builds any derivative files including bowtie indices, gmap indices, and 2bit files. Expect this step to take at least 12 hours.
+
+
+It will generate a config.txt file that can be input into the deFuse Galaxy tool.  
+
+Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138
+
+.. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
+
+.. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1
+
+------
+
+**Outputs**
+
+The galaxy history will contain: the config.txt file that provides DeFuse with the reference data paths.  
+
+ </help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<data_managers>
+  <data_manager tool_file="datamanager_create_reference.xml" id="data_manager_defuse_reference" >
+    <data_table name="defuse_reference">  <!-- Defines a Data Table to be modified. -->
+            <output> <!-- Handle the output of the Data Manager Tool -->
+                <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" output_ref="out_file" >  <!-- The value of this column will be modified based upon data in "out_file". example value "phiX.fa" -->
+                    <move type="directory"> <!-- Moving a file from the extra files path of "out_file" -->
+                        <!-- <source>${path}</source>--> <!-- out_file.extra_files_path is used as base by default --> <!-- if no source, eg for type=directory, then refers to base -->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${value}/defuse</target> <!-- Target Location to store the file, directories are created as needed -->
+                    </move>
+                    <!-- datamanager_create_reference.py should have copied the defuse config file to the working directory.  
+                         so if we put the ${dbkey}.config path in this column,  defuse.xml can set the data_directory to this this directory.
+                     -->
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${value}/defuse/${value}.config</value_translation> <!-- Store this value in the final Data Table -->
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+  </data_manager>
+</data_managers>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datamanager_create_reference.py	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import re
+import tempfile
+import subprocess
+import fileinput
+import shutil
+import optparse
+import urllib2
+from ftplib import FTP
+import tarfile
+
+from galaxy.util.json import from_json_string, to_json_string
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
+
+def get_config_dict(config,dataset_directory=None):
+    keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources']
+    pat = '^([^=]+?)\s*=\s*(.*)$'
+    config_dict = {}
+    try:
+        fh = open(config)
+        for i,l in enumerate(fh):
+           line = l.strip() 
+           if line.startswith('#'):
+               continue
+           m = re.match(pat,line)
+           if m and len(m.groups()) == 2:
+               (k,v) = m.groups()
+               if k in keys:
+                   config_dict[k] = v
+    except Exception, e:
+        stop_err( 'Error parsing %s %s\n' % (config,str( e )) )
+    else:
+        fh.close()
+    if dataset_directory:
+        config_dict['dataset_directory'] = dataset_directory
+    return config_dict
+
+def run_defuse_script(data_manager_dict, params, target_directory, dbkey, description, config, script):
+    if not os.path.isdir(target_directory):
+        os.makedirs(target_directory)
+    ## Name the config consistently with data_manager_conf.xml
+    #  copy the config file to the target_directory
+    #  when DataManager moves files to there tool-data location, the config will get moved as well,
+    #   and the value_translation in data_manager_conf.xml will tell us the new location
+    #  defuse.xml will use the path to this config file to set the dataset_directory
+    config_name = '%s.config' % dbkey
+    defuse_config = os.path.join( target_directory, config_name)
+    shutil.copyfile(config,defuse_config) 
+    cmd = "/bin/bash %s %s" % (script,target_directory)
+    # Run
+    try:
+        tmp_out = tempfile.NamedTemporaryFile().name
+        tmp_stdout = open( tmp_out, 'wb' )
+        tmp_err = tempfile.NamedTemporaryFile().name
+        tmp_stderr = open( tmp_err, 'wb' )
+        proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
+        returncode = proc.wait()
+        tmp_stderr.close()
+        # get stderr, allowing for case where it's very large
+        tmp_stderr = open( tmp_err, 'rb' )
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += tmp_stderr.read( buffsize )
+                if not stderr or len( stderr ) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        tmp_stdout.close()
+        tmp_stderr.close()
+        if returncode != 0:
+            raise Exception, stderr
+
+        # TODO: look for errors in program output.
+    except Exception, e:
+        stop_err( 'Error creating defuse reference:\n' + str( e ) )
+    config_dict = get_config_dict(config, dataset_directory=target_directory)
+    data_table_entry = dict(value=dbkey, dbkey=dbkey, name=description, path=config_name)
+    _add_data_table_entry( data_manager_dict, data_table_entry )
+def _add_data_table_entry( data_manager_dict, data_table_entry ):
+    data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
+    data_manager_dict['data_tables']['defuse_reference'] = data_manager_dict['data_tables'].get( 'defuse_reference', [] )
+    data_manager_dict['data_tables']['defuse_reference'].append( data_table_entry )
+    return data_manager_dict
+
+def main():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option( '-k', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='dbkey' )
+    parser.add_option( '-d', '--description', dest='description', action='store', type="string", default=None, help='description' )
+    parser.add_option( '-c', '--defuse_config', dest='defuse_config', action='store', type="string", default=None, help='defuse_config' )
+    parser.add_option( '-s', '--defuse_script', dest='defuse_script', action='store', type="string", default=None, help='defuse_script' )
+    (options, args) = parser.parse_args()
+
+    filename = args[0]
+
+    params = from_json_string( open( filename ).read() )
+    target_directory = params[ 'output_data' ][0]['extra_files_path']
+    os.mkdir( target_directory )
+    data_manager_dict = {}
+
+     
+    #Create Defuse Reference Data
+    run_defuse_script( data_manager_dict, params, target_directory, options.dbkey, options.description,options.defuse_config,options.defuse_script)
+
+    #save info to json file
+    open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
+
+if __name__ == "__main__": main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datamanager_create_reference.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,307 @@
+<tool id="data_manager_defuse_reference" name="DeFuse Reference DataManager" version="1.6.1" tool_type="manage_data">
+ <description>create a defuse reference from Ensembl and UCSC sources</description>
+ <requirements>
+  <requirement type="package" version="0.6.1">defuse</requirement>
+  <requirement type="package" version="0.1.18">samtools</requirement>
+  <requirement type="package" version="1.0.0">bowtie</requirement>
+  <requirement type="package" version="2013-05-09">gmap</requirement>
+  <requirement type="package" version="latest">kent</requirement>
+ </requirements>
+ <command interpreter="python"> datamanager_create_reference.py 
+    --dbkey $genome.ensembl_genome_version 
+    --description "$genome.ensembl_prefix $genome.ensembl_genome_version ($genome.ucsc_genome_version)"
+    --defuse_config $defuse_config
+    --defuse_script $defuse_script
+    $out_file
+ </command>
+ <inputs>
+  <conditional name="genome">
+    <param name="choice" type="select" label="Select a Genome Build">
+      <option value="GRCh38">Homo_sapiens GRCh38  hg38</option>
+      <option value="GRCh37">Homo_sapiens GRCh37  hg19</option>
+      <option value="NCBI36">Homo_sapiens NCBI36 hg18</option>
+      <option value="GRCm38">Mus_musculus GRCm38 mm10</option>
+      <option value="NCBIM37">Mus_musculus NCBIM37 mm9</option>
+      <option value="Rnor_5.0">Rattus_norvegicus Rnor_5.0 rn5</option>
+      <option value="user_specified">User specified</option>
+    </param>
+    <when value="GRCh38">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCh38"/>
+      <param name="ensembl_version" type="hidden" value="80"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg38"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="GRCh37">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCh37"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg19"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="NCBI36">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="NCBI36"/>
+      <param name="ensembl_version" type="hidden" value="54"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg18"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="GRCm38">
+      <param name="ensembl_organism" type="hidden" value="mus_musculus"/>
+      <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCm38"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Mus_musculus"/>
+      <param name="ncbi_prefix" type="hidden" value="Mm"/>
+      <param name="ucsc_genome_version" type="hidden" value="mm10"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="NCBIM37">
+      <param name="ensembl_organism" type="hidden" value="mus_musculus"/>
+      <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/>
+      <param name="ensembl_genome_version" type="hidden" value="NCBIM37"/>
+      <param name="ensembl_version" type="hidden" value="67"/>
+      <param name="ncbi_organism" type="hidden" value="Mus_musculus"/>
+      <param name="ncbi_prefix" type="hidden" value="Mm"/>
+      <param name="ucsc_genome_version" type="hidden" value="mm9"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="Rnor_5.0">
+      <param name="ensembl_organism" type="hidden" value="rattus_norvegicus"/>
+      <param name="ensembl_prefix" type="hidden" value="Rattus_norvegicus"/>
+      <param name="ensembl_genome_version" type="hidden" value="Rnor_5.0"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Rattus_norvegicus"/>
+      <param name="ncbi_prefix" type="hidden" value="Rn"/>
+      <param name="ucsc_genome_version" type="hidden" value="rn5"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="user_specified">
+      <param name="ensembl_organism" type="text" value="" label="Ensembl Organism Name" help="Examples: homo_sapiens, mus_musculus, rattus_norvegicus"/>
+      <param name="ensembl_prefix" type="text" value="" label="Ensembl Organism prefix" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/>
+      <param name="ensembl_genome_version" type="text" value="" label="Ensembl Genome Version" help="Examples: GRCh37, GRCm38, Rnor_5.0"/>
+      <param name="ensembl_version" type="integer" value="" label="Ensembl Release Version" help="Example: 71"/>
+      <param name="ncbi_organism" type="text" value="" label="NCBI Organism Name" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/>
+      <param name="ncbi_prefix" type="text" value="" label="NCBI Organism Unigene prefix" help="Examples: Hs, Mm, Rn"/>
+      <param name="ucsc_genome_version" type="text" value="" label="UCSC Genome Version" help="Examples: hg19, mm10, rn5"/>
+      <param name="chromosomes" type="text" value="" label="Chromosomes for Ensembl genome build" >
+       <help>  Examples: 
+         Homo_sapiens: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT
+         Mus_musculus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT
+         Rattus_norvegicus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT
+         ( ftp://ftp.ensembl.org/pub/release-71/fasta/homo_sapiens/dna/ )
+       </help>
+      </param>
+      <param name="mt_chromosome" type="text" value="MT" label="Ensembl Mitochonrial Chromosome name" />
+      <param name="gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding" label="Gene sources" />
+      <param name="ig_gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene" label="IG Gene sources" />
+      <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" />
+    </when>
+  </conditional>
+ </inputs>
+ <outputs>
+  <data name="out_file" format="data_manager_json" label="${tool.name} : ${genome.ensembl_genome_version}"/>
+ </outputs>
+  <stdio>
+    <exit_code range="1:"  level="fatal"   description="Error running Create DeFuse Reference" />
+    <regex match="Error:" 
+           source="both" 
+           level="fatal" 
+           description="Error running Create DeFuse Reference" />
+
+  </stdio>
+ <configfiles>
+  <configfile name="defuse_config">
+#
+# Configuration file for defuse
+#
+# Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ 
+#   will be set by the runtime script using the ENV PATH
+#
+
+# Directory where the defuse code was unpacked
+source_directory = __DEFUSE_PATH__
+
+# Organism IDs
+ensembl_organism = $genome.ensembl_organism
+ensembl_prefix = $genome.ensembl_prefix
+ensembl_version = $genome.ensembl_version
+ensembl_genome_version = $genome.ensembl_genome_version
+ucsc_genome_version = $genome.ucsc_genome_version
+ncbi_organism = $genome.ncbi_organism
+ncbi_prefix = $genome.ncbi_prefix
+
+# Directory where you want your dataset
+dataset_directory = __DATASET_DIRECTORY__
+
+#raw
+# Input genome and gene models
+gene_models                                 = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).gtf
+genome_fasta                                = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).dna.chromosomes.fa
+
+# Repeat table from ucsc genome browser
+repeats_filename                            = $(dataset_directory)/repeats.txt
+
+# EST info downloaded from ucsc genome browser
+est_fasta                                   = $(dataset_directory)/est.fa
+est_alignments                              = $(dataset_directory)/intronEst.txt
+
+# Unigene clusters downloaded from ncbi
+unigene_fasta                               = $(dataset_directory)/$(ncbi_prefix).seq.uniq
+#end raw
+
+# Paths to external tools
+samtools_bin =  __SAMTOOLS_BIN__
+bowtie_bin = __BOWTIE_BIN__
+bowtie_build_bin = __BOWTIE_BUILD_BIN__
+blat_bin = __BLAT_BIN__
+fatotwobit_bin = __FATOTWOBIT_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_setup_bin = __GMAP_SETUP_BIN__
+r_bin = __R_BIN__
+rscript_bin = __RSCRIPT_BIN__
+
+#raw
+# Directory where you want your dataset
+gmap_index_directory                        = $(dataset_directory)/gmap
+#end raw
+
+#raw
+# Dataset files
+dataset_prefix       = $(dataset_directory)/defuse
+chromosome_prefix    = $(dataset_prefix).dna.chromosomes
+exons_fasta          = $(dataset_prefix).exons.fa
+cds_fasta            = $(dataset_prefix).cds.fa
+cdna_regions         = $(dataset_prefix).cdna.regions
+cdna_fasta           = $(dataset_prefix).cdna.fa
+reference_fasta      = $(dataset_prefix).reference.fa
+rrna_fasta           = $(dataset_prefix).rrna.fa
+ig_gene_list         = $(dataset_prefix).ig.gene.list
+repeats_regions      = $(dataset_directory)/repeats.regions
+est_split_fasta1     = $(dataset_directory)/est.1.fa
+est_split_fasta2     = $(dataset_directory)/est.2.fa
+est_split_fasta3     = $(dataset_directory)/est.3.fa
+est_split_fasta4     = $(dataset_directory)/est.4.fa
+est_split_fasta5     = $(dataset_directory)/est.5.fa
+est_split_fasta6     = $(dataset_directory)/est.6.fa
+est_split_fasta7     = $(dataset_directory)/est.7.fa
+est_split_fasta8     = $(dataset_directory)/est.8.fa
+est_split_fasta9     = $(dataset_directory)/est.9.fa
+
+# Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs
+prefilter1           = $(unigene_fasta)
+
+# deFuse scripts and tools
+scripts_directory    = $(source_directory)/scripts
+tools_directory      = $(source_directory)/tools
+data_directory       = $(source_directory)/data
+#end raw
+
+# Parameters for building the dataset
+chromosomes = $genome.chromosomes
+mt_chromosome = $genome.mt_chromosome
+gene_sources = $genome.gene_sources
+ig_gene_sources = $genome.ig_gene_sources
+rrna_gene_sources = $genome.rrna_gene_sources
+gene_biotypes = $genome.gene_sources
+ig_gene_biotypes = $genome.ig_gene_sources
+rrna_gene_biotypes = $genome.rrna_gene_sources
+
+#raw
+# Remove temp files
+remove_job_files                            = yes
+remove_job_temp_files                       = yes
+#end raw
+  </configfile>
+  <configfile name="defuse_script">#slurp
+#!/bin/bash
+## define some things for cheetah proccessing
+#set $amp = chr(38)
+#set $gt = chr(62)
+## substitute pathnames into config file
+if `grep __DATASET_DIRECTORY__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DATASET_DIRECTORY__#\$1#" $defuse_config; fi
+if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi
+if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi
+if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi
+if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi
+if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi
+if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi
+if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi
+if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi
+if `grep __GMAP_INDEX_DIR__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_INDEX_DIR=`pwd`/gmap;then sed -i'.tmp' "s#__GMAP_INDEX_DIR__#\${GMAP_INDEX_DIR}#" $defuse_config; fi
+if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi
+if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi
+## copy config to output
+cp $defuse_config \$1/defuse_config.txt
+## Run the create_reference_dataset.pl
+perl \${DEFUSE_PATH}/scripts/create_reference_dataset.pl -c $defuse_config 
+  </configfile>
+ </configfiles>
+
+ <tests>
+ </tests>
+ <help>
+**DeFuse**
+
+DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.  See the DeFuse_Version_0.6_ manual for details.
+
+DeFuse uses a Reference Dataset to search for gene fusions.  The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_:
+    - genome_fasta from Ensembl
+    - gene_models from Ensembl
+    - repeats_filename from UCSC RepeatMasker rmsk.txt
+    - est_fasta from UCSC
+    - est_alignments from UCSC intronEst.txt
+    - unigene_fasta from NCBI
+
+The create_defuse_reference Galaxy tool downloads the reference genome and other source files, and builds any derivative files including bowtie indices, gmap indices, and 2bit files. Expect this step to take at least 12 hours.
+
+
+It will generate the refernce data for deFuse Galaxy tool.  
+
+Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138
+
+.. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
+
+.. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1
+
+------
+
+**Outputs**
+
+The galaxy history will contain: the config.txt file that provides DeFuse with the reference data paths.  
+
+ </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<datatypes>
+    <registration>
+        <datatype extension="defuse.conf" type="galaxy.datatypes.data:Text" subclass="True" display_in_upload="true"/>
+        <datatype extension="defuse.results.tsv" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="true"/>
+    </registration>
+</datatypes>
--- a/defuse.xml	Mon Jan 14 12:24:28 2013 -0600
+++ b/defuse.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -1,103 +1,150 @@
-<tool id="defuse" name="DeFuse" version="1.6">
- <description>identify fusion transcripts</description>
- <requirements>
-  <requirement type="package" version="0.6.0">defuse</requirement>
-  <requirement type="package" version="0.1.18">samtools</requirement>
-  <requirement type="package" version="0.12.7">bowtie</requirement>
-  <requirement type="package" version="2012-07-20">gmap</requirement>
-  <requirement type="package" version="34x10">blat</requirement>
-  <requirement type="package" version="34x10">fatotwobit</requirement>
- </requirements>
+<tool id="defuse" name="DeFuse" version="@DEFUSE_VERSION@.1">
+    <description>identify fusion transcripts</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="defuse_requirement" />
+        <expand macro="mapping_requirements" />
+        <expand macro="r_requirements" />
+    </requirements>
   <command interpreter="command"> /bin/bash $shscript </command>
  <inputs>
   <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads.  (FASTQ interlacer will pair reads and remove the unpaired.   FASTQ de-interlacer will separate the result into left and right reads.)"/>
   <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/>
+  <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column">
+    <validator type="length" min="1"/>
+  </param>
   <conditional name="refGenomeSource">
-      <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">
-        <option value="indexed">Use a built-in DeFuse Reference Dataset</option>
-        <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>
+    <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">
+      <option value="indexed">Use a built-in DeFuse Reference Dataset</option>
+      <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>
+    </param>
+    <when value="indexed">
+      <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
+        <options from_file="defuse_reference.loc">
+          <column name="name" index="1"/>
+          <column name="value" index="3"/>
+          <filter type="sort_by" column="0" />
+          <validator type="no_options" message="No indexes are available" />
+        </options>
+      </param>
+    </when>
+    <when value="history">
+      <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/>
+    </when>  <!-- history -->
+  </conditional>  <!-- refGenomeSource -->
+  <conditional name="defuse_param">
+    <param name="settings" type="select" label="Defuse parameter settings" help="">
+      <option value="preSet">Default settings</option>
+      <option value="full">Full parameter list</option>
+    </param>
+    <when value="preSet" />
+    <when value="full">
+      <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
+      <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
+      <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
+      <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help="">
+        <option value="">Use Default</option>
+        <option value="no">no</option>
+        <option value="yes">yes</option>
       </param>
-      <when value="indexed">
-        <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
-          <options from_file="defuse.loc">
-            <column name="name" index="1"/>
-            <column name="value" index="2"/>
-            <filter type="sort_by" column="0" />
-            <validator type="no_options" message="No indexes are available" />
-          </options>
-        </param>
-        <conditional name="defuse_param">
-          <param name="settings" type="select" label="Defuse parameter settings" help="">
-            <option value="preSet">Default settings</option>
-            <option value="full">Full parameter list</option>
-          </param>
-          <when value="preSet" />
-          <when value="full">
-            <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
-            <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
-            <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
-            <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
-              <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
-            </param>
-            <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
-            <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" />
-            <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
-              <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
-            </param>
-            <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" />
-            <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" />
-            <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
-            <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio">
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
-            <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
-              <help>Position density when calculating covariance</help>
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
-              <option value="">Use Default</option>
-              <option value="no">no</option>
-              <option value="yes">yes</option>
-            </param>
-            <!--
-              <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
-            -->
-          </when> <!-- full -->
-        </conditional>  <!-- defuse_param -->
-      </when>
-      <when value="history">
-        <param name="config" type="data" format="txt" label="Defuse Config file" help=""/>
-      </when>  <!-- history -->
-  </conditional>  <!-- refGenomeSource -->
+      <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
+        <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
+      </param>
+      <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
+      <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
+        <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
+      </param>
+      <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
+      <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
+      <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
+        <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
+      </param>
+      <param name="multi_exon_transcripts_stats" type="select" label="Use multiple exon transcripts for stats calculations" help="should be enabled for very small libraries">
+        <option value="no" select="true">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
+        <help>Position density when calculating covariance</help>
+        <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
+      </param>
+      <param name="max_paired_alignments" type="integer" value="10" optional="true" label="max_paired_alignments">
+        <help>Maximum number of alignments for a read pair, Pairs with more alignments are filtered, default is 10</help>
+        <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="1" max="100"/>
+      </param>
+      <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
+        <option value="">Use Default</option>
+        <option value="no">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <!--
+        <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
+      -->
+      <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" />
+    </when> <!-- full -->
+  </conditional>  <!-- defuse_param -->
+  <param name="breakpoints_bam" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/>
   <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" 
          help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, 
                but they require considerable diskspace, and should be deleted and purged when no longer needed."/>
-  <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
+  <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
  </inputs>
+ <stdio>
+   <exit_code range="1:"  level="fatal" description="Error Running Defuse" />
+ </stdio>
  <outputs>
   <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
   <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" />
   <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)">
     <filter>keep_output == True</filter>
   </data>
-  <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" />
-  <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" />
-  <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" />
+  <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" />
+  <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" />
   <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads">
     <filter>do_get_reads == True</filter>
   </data>
+  <data format="bam" name="fusions_bam" label="${tool.name} on ${on_string}: fusions.bam">
+    <filter>breakpoints_bam == True</filter>
+  </data>
+  <!--
+   expression_plot
+   circos plot
+  -->
  </outputs>
  <configfiles>
   <configfile name="defuse_config">
-#import ast
+#import re
+#set $ds = chr(36)
 #if $refGenomeSource.genomeSource == "history":
-#include raw $refGenomeSource.config.__str__
+#set config_file = $refGenomeSource.config.__str__
 #else 
-#set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value))
+#set config_file = $refGenomeSource.index.value
+#end if
+#set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$'
+#set fh = open($config_file)
+#set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources']
+#set kv = []
+#for $line in $fh:
+  #set m = $re.match($pat,$line)
+  #if $m and len($m.groups()) == 2:
+    ## #echo $line
+    #if $m.groups()[0] in keys:
+      #set k = $m.groups()[0]
+      #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed":
+        ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory
+        #set v = $os.path.dirname($config_file)
+      #else:
+        #set v = $m.groups()[1]
+      #end if
+      #set kv = $kv + [[$k, $v]]
+    #end if
+  #end if
+#end for
+## #echo $kv
+#set ref_dict = dict($kv)
+## #echo $ref_dict
+## include raw $refGenomeSource.config.__str__
 #
 # Configuration file for defuse
 #
@@ -107,12 +154,7 @@
 # Directory where the defuse code was unpacked
 ## Default location in the tool/defuse directory  
 # source_directory = ${__root_dir__}/tools/defuse
-source_directory = #slurp
-#try
-$ref_dict['source_directory']
-#except
-__DEFUSE_PATH__
-#end try
+source_directory = __DEFUSE_PATH__
 
 # Directory where you want your dataset
 dataset_directory = #slurp
@@ -122,18 +164,68 @@
 /project/db/genomes/Hsapiens/hg19/defuse
 #end try
 
+# Organism IDs
+ensembl_organism = #slurp
+#try
+$ref_dict['ensembl_organism']
+#except
+homo_sapiens
+#end try
+
+ensembl_prefix = #slurp
+#try
+$ref_dict['ensembl_prefix']
+#except
+Homo_sapiens
+#end try
+
+ensembl_version = #slurp
+#try
+$ref_dict['ensembl_version']
+#except
+71
+#end try
+
+ensembl_genome_version = #slurp
+#try
+$ref_dict['ensembl_genome_version']
+#except
+GRCh37
+#end try
+
+ucsc_genome_version = #slurp
+#try
+$ref_dict['ucsc_genome_version']
+#except
+hg19
+#end try
+
+ncbi_organism = #slurp
+#try
+$ref_dict['ncbi_organism']
+#except
+Homo_sapiens
+#end try
+
+ncbi_prefix = #slurp
+#try
+$ref_dict['ncbi_prefix']
+#except
+Hs
+#end try
+
 # Input genome and gene models
 gene_models = #slurp
 #try
 $ref_dict['gene_models']
 #except
-\$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf
+\$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).gtf
 #end try
 genome_fasta = #slurp
 #try
 $ref_dict['genome_fasta']
 #except
-\$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa
+\$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).dna.chromosomes.fa
 #end try
 
 # Repeat table from ucsc genome browser
@@ -163,71 +255,28 @@
 #try
 $ref_dict['unigene_fasta']
 #except
-\$(dataset_directory)/Hs.seq.uniq
+\$(dataset_directory)/\$(ncbi_prefix).seq.uniq
 #end try
 
 # Paths to external tools
-bowtie_bin = #slurp
-#try
-$ref_dict['bowtie_bin']
-#except
-__BOWTIE_BIN__
-#end try
-bowtie_build_bin = #slurp
-#try
-$ref_dict['bowtie_build_bin']
-#except
-__BOWTIE_BUILD_BIN__
-#end try
-blat_bin = #slurp
-#try
-$ref_dict['blat_bin']
-#except
-__BLAT_BIN__
-#end try
-fatotwobit_bin = #slurp
-#try
-$ref_dict['fatotwobit_bin']
-#except
-__FATOTWOBIT_BIN__
-#end try
-gmap_bin = #slurp
-#try
-$ref_dict['gmap_bin']
-#except
-__GMAP_BIN__
-#end try
-gmap_bin = #slurp
-#try
-$ref_dict['gmap_bin']
-#except
-__GMAP_BIN__
-#end try
-gmap_setup_bin = #slurp
-#try
-$ref_dict['gmap_setup_bin']
-#except
-__GMAP_SETUP_BIN__
-#end try
-r_bin = #slurp
-#try
-$ref_dict['r_bin']
-#except
-__R_BIN__
-#end try
-rscript_bin = #slurp
-#try
-$ref_dict['rscript_bin']
-#except
-__RSCRIPT_BIN__
-#end try
+bowtie_bin = __BOWTIE_BIN__
+bowtie_build_bin = __BOWTIE_BUILD_BIN__
+blat_bin = __BLAT_BIN__
+fatotwobit_bin = __FATOTWOBIT_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_setup_bin = __GMAP_SETUP_BIN__
+r_bin = __R_BIN__
+rscript_bin = __RSCRIPT_BIN__
 
 # Directory where you want your dataset
 gmap_index_directory = #slurp
 #try
 $ref_dict['gmap_index_directory']
 #except
-\$(dataset_directory)/gmap
+#raw
+$(dataset_directory)/gmap
+#end raw
 #end try
 
 #raw
@@ -282,9 +331,15 @@
 #except
 --phred33-quals
 #end try
+bowtie_params = #slurp
+#try
+$ref_dict['bowtie_params']
+#except
+--chunkmbs 200
+#end try
 max_insert_size = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "":
-$refGenomeSource.defuse_param.max_insert_size
+#if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "":
+$defuse_param.max_insert_size
 #else
 #try
 $ref_dict['max_insert_size']
@@ -335,8 +390,8 @@
 
 # Minimum gene fusion range
 dna_concordant_length = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "":
-$refGenomeSource.defuse_param.dna_concordant_length
+#if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "":
+$defuse_param.dna_concordant_length
 #else
 #try
 $ref_dict['dna_concordant_length']
@@ -347,8 +402,8 @@
 
 # Trim length for discordant reads (split reads are not trimmed)
 discord_read_trim = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "":
-$refGenomeSource.defuse_param.discord_read_trim
+#if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "":
+$defuse_param.discord_read_trim
 #else
 #try
 $ref_dict['discord_read_trim']
@@ -356,11 +411,21 @@
 50
 #end try
 #end if
-
+# Calculate extra annotations, fusion splice index and interrupted index
+calculate_extra_annotations = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "":
+$defuse_param.calculate_extra_annotations
+#else
+#try
+$ref_dict['calculate_extra_annotations']
+#except
+no
+#end try
+#end if
 # Filtering parameters
 clustering_precision = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != ""
-$refGenomeSource.defuse_param.clustering_precision
+#if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != ""
+$defuse_param.clustering_precision
 #else
 #try
 $ref_dict['clustering_precision']
@@ -369,8 +434,8 @@
 #end try
 #end if
 span_count_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != ""
-$refGenomeSource.defuse_param.span_count_threshold
+#if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != ""
+$defuse_param.span_count_threshold
 #else
 #try
 $ref_dict['span_count_threshold']
@@ -378,19 +443,9 @@
 5
 #end try
 #end if
-split_count_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != ""
-$refGenomeSource.defuse_param.split_count_threshold
-#else
-#try
-$ref_dict['split_count_threshold']
-#except
-3
-#end try
-#end if
 percent_identity_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != ""
-$refGenomeSource.defuse_param.percent_identity_threshold
+#if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != ""
+$defuse_param.percent_identity_threshold
 #else
 #try
 $ref_dict['percent_identity_threshold']
@@ -398,29 +453,9 @@
 0.90
 #end try
 #end if
-max_dist_pos = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != ""
-$refGenomeSource.defuse_param.max_dist_pos
-#else
-#try
-$ref_dict['max_dist_pos']
-#except
-600
-#end try
-#end if
-num_dist_genes = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != ""
-$refGenomeSource.defuse_param.num_dist_genes
-#else
-#try
-$ref_dict['num_dist_genes']
-#except
-500
-#end try
-#end if
 split_min_anchor = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != ""
-$refGenomeSource.defuse_param.split_min_anchor
+#if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != ""
+$defuse_param.split_min_anchor
 #else
 #try
 $ref_dict['split_min_anchor']
@@ -428,19 +463,9 @@
 4
 #end try
 #end if
-max_concordant_ratio = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != ""
-$refGenomeSource.defuse_param.max_concordant_ratio
-#else
-#try
-$ref_dict['max_concordant_ratio']
-#except
-0.1
-#end try
-#end if
 splice_bias = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != ""
-$refGenomeSource.defuse_param.splice_bias
+#if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != ""
+$defuse_param.splice_bias
 #else
 #try
 $ref_dict['splice_bias']
@@ -449,8 +474,8 @@
 #end try
 #end if
 denovo_assembly = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != ""
-$refGenomeSource.defuse_param.denovo_assembly
+#if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != ""
+$defuse_param.denovo_assembly
 #else
 #try
 $ref_dict['denovo_assembly']
@@ -459,8 +484,8 @@
 #end try
 #end if
 probability_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != ""
-$refGenomeSource.defuse_param.probability_threshold
+#if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != ""
+$defuse_param.probability_threshold
 #else
 #try
 $ref_dict['probability_threshold']
@@ -470,10 +495,23 @@
 #end if
 positive_controls                           = \$(data_directory)/controls.txt
 
+# Use multiple exon transcripts for stats calculations (yes/no)
+# should be enabled for very small libraries
+multi_exon_transcripts_stats = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.multi_exon_transcripts_stats.__str__ != ""
+$defuse_param.multi_exon_transcripts_stats
+#else
+#try
+$ref_dict['multi_exon_transcripts_stats']
+#except
+no
+#end try
+#end if
+
 # Position density when calculating covariance
 covariance_sampling_density = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != ""
-$refGenomeSource.defuse_param.covariance_sampling_density
+#if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != ""
+$defuse_param.covariance_sampling_density
 #else
 #try
 $ref_dict['covariance_sampling_density']
@@ -482,12 +520,30 @@
 #end try
 #end if
 
+# Maximum number of alignments for a read pair
+# Pairs with more alignments are filtered
+max_paired_alignments = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.max_paired_alignments.__str__ != ""
+$defuse_param.max_paired_alignments
+#else
+#try
+$ref_dict['max_paired_alignments']
+#except
+10
+#end try
+#end if
 
 # Number of reads for each job in split
-reads_per_job                               = 1000000
-
-# Number of regions for each breakpoint sequence job in split
-regions_per_job                             = 20
+reads_per_job = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != ""
+$defuse_param.reads_per_job
+#else
+#try
+$ref_dict['reads_per_job']
+#except
+1000000
+#end try
+#end if
 
 #raw
 # If you have command line 'mail' and wish to be notified
@@ -497,40 +553,10 @@
 remove_job_files                            = yes
 remove_job_temp_files                       = yes
 
-# Converting to fastq
-# Fastq converter config format 1 for reads stored in separate files for each end
-#  data_lane_rexex_N is a perl regex which stores the lane id in $1
-#  data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1
-#  data_compress_regex_N is a perl regex which stores the compression extension in $1
-#  data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout
-# Fastq converter config format 2 for reads stored in separate files for each end
-#  data_lane_regex_N is a perl regex which stores the lane id in $1
-#  data_compress_regex_N is a perl regex which stores the compression extension in $1
-#  data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout
-#  data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout
+qsub_params                                 = ""
 
-data_lane_regex_1                           = ^(.+)_[12]_export\.txt.*$
-data_end_regex_1                            = ^.+_([12])_export\.txt.*$
-data_compress_regex_1                       = ^.+_[12]_export\.txt(.*)$
-data_converter_1                            = $(scripts_directory)/fq_all2std.pl export2std
-
-data_lane_regex_2                           = ^(.+)_[12]_concat_qseq\.txt.*$
-data_end_regex_2                            = ^.+_([12])_concat_qseq\.txt.*$
-data_compress_regex_2                       = ^.+_[12]_concat_qseq\.txt(.*)$
-data_converter_2                            = $(scripts_directory)/qseq2fastq.pl
-
-data_lane_regex_3                           = ^(.+)\.bam.*$
-data_compress_regex_3                       = ^.+\.bam(.*)$
-data_end1_converter_3                       = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl
-data_end2_converter_3                       = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl
-
-data_lane_regex_4                           = ^(.+).[12].fastq.*$
-data_end_regex_4                            = ^.+.([12]).fastq.*$
-data_compress_regex_4                       = ^.+.[12].fastq(.*)$
-data_converter_4                            = cat
 #end raw
 
-#end if
 
   </configfile>
   <configfile name="shscript">
@@ -588,29 +614,42 @@
 cp $defuse_config $config_txt
 ## make a data_dir  and ln -s the input fastq
 mkdir -p data_dir
-ln -s $left_pairendreads data_dir/reads_1.fastq
-ln -s $right_pairendreads data_dir/reads_2.fastq
+## ln -s "$left_pairendreads" data_dir/reads_1.fastq
+## ln -s "$right_pairendreads" data_dir/reads_2.fastq
+cp "$left_pairendreads" data_dir/reads_1.fastq
+cp "$right_pairendreads" data_dir/reads_2.fastq
 ## ln to output_dir in from_work_dir
 #if $defuse_out.__str__ != 'None':
-mkdir -p $defuse_out.extra_files_path
-ln -s $defuse_out.extra_files_path  output_dir
+mkdir -p $defuse_out.dataset.extra_files_path
+ln -s $defuse_out.dataset.extra_files_path  output_dir
 #else
 mkdir -p output_dir
 #end if
 ## run defuse.pl
-perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -d data_dir -o output_dir  -p 8
+perl \${DEFUSE_PATH}/scripts/defuse.pl -name "$library_name" -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir  -p \$GALAXY_SLOTS
 ## copy primary results to output datasets
 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi
-if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
+## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi
 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi
+#if $breakpoints_bam:
+if [ -e output_dir/results.filtered.tsv ] ${amp}${amp}  [ -e output_dir/breakpoints.genome.psl ]
+then
+  awk "\\$10 ~ /^(`awk '\\$1 ~ /[0-9]+/{print \\$1}' output_dir/results.filtered.tsv | tr '\n' '|'`)\\$/{print \\$0}" output_dir/breakpoints.genome.psl > breakpoints.genome.filtered.psl ${amp}${amp}
+  psl2sam.pl breakpoints.genome.filtered.psl > breakpoints.genome.filtered.sam ${amp}${amp}
+  samtools view -b -T /panfs/roc/rissdb/galaxy/genomes/NCBIM37/defuse/defuse.reference.fa -o breakpoints.genome.filtered.bam breakpoints.genome.filtered.sam ${amp}${amp}
+  samtools sort breakpoints.genome.filtered.bam breakpoints ${amp}${amp}
+  ## samtools index breakpoints.bam
+  cp breakpoints.bam $fusions_bam
+fi
+#end if
 ## create html with links for output_dir
 #if $defuse_out.__str__ != 'None':
 if [ -e $defuse_out ]
 then
   echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out
   echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt}  $defuse_out
-  pushd $defuse_out.extra_files_path
+  pushd $defuse_out.dataset.extra_files_path
   for f in `find -L . -maxdepth 1 -type f`; 
    do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt}  $defuse_out; 
   done
@@ -623,8 +662,8 @@
 #if $fusion_reads.__str__ != 'None':
 if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ] 
 then
-  mkdir -p $fusion_reads.extra_files_path
-  results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.extra_files_path
+  mkdir -p $fusion_reads.dataset.extra_files_path
+  results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.dataset.extra_files_path
 fi
 #end if
   </configfile>
@@ -753,4 +792,5 @@
   3596	TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG	250	7.00711162298275e-72	0.00912124762512338	0.00684237452309549	N	N	3.31745197152461	3.47233119514066	3.31745197152461	splitr	7	0.0157657657657656	0	0	N	0.0135135135135136	N	N	0	0	ENSG00000156860	ENSG00000212932	-	+	16	21	30682131	48111157	coding	upstream	FBRS	RPL23AP4	30670289	48110676	+	+	0.0157657657657656	30680678	9827473	-	+	Y	-	-	N	output_dir	2	1	1.11111111111111	1	1	1	N	N	0	1	9	0.325530693397641	0.296465452915709	0.325530693397641	0.296465452915709	2	-	-	
 
  </help>
+    <expand macro="citations"/>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/defuse_bamfastq.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,63 @@
+<?xml version="1.0"?>
+<tool id="defuse_bamfastq" name="Defuse BamFastq" version="@DEFUSE_VERSION@.1">
+  <description>converts a bam file to fastq files.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="defuse_requirement" />
+    </requirements>
+  <command>bamfastq
+    #if $pair == True :
+      $pair
+    #end if
+    #if $multiple == True :
+      $multiple
+    #end if
+    #if $rename == True :
+      $rename
+    #end if
+    -b $bamfile
+    -1 $fastq1
+    -2 $fastq2
+  </command>
+  <inputs>
+    <param name="bamfile" type="data" format="bam" label="Bam file"/> 
+    <param name="pair" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Name contains pair info as /1 /2."/>
+    <param name="multiple" type="boolean" truevalue="-m" falsevalue="" checked="true" label="Bam contains multiple mappings per read."/>
+    <param name="rename" type="boolean" truevalue="-r" falsevalue="" checked="true" label="Rename with integer IDs."/>
+  </inputs>
+  <stdio>
+    <exit_code range="1:" level="fatal" description="Error" />
+  </stdio>
+  <outputs>
+    <data format="fastqsanger" name="fastq1" label="fastq1"  />
+    <data format="fastqsanger" name="fastq2" label="fastq2"  />
+  </outputs>
+  <tests>
+    <test>
+      <param name="bamfile" ftype="bam" value="tophat_out2h.bam" />
+      <param name="pair" value="True" />
+      <param name="multiple" value="True" />
+      <param name="rename" value="True" />
+      <output name="fastq1">
+        <assert_contents>
+          <has_text text="@test_mRNA_36_146_27/1" />
+          <not_has_text text="@test_mRNA_36_146_27/2" />
+          <not_has_text text="test_mRNA_150_290_0" />
+        </assert_contents>
+      </output>
+      <output name="fastq2">
+        <assert_contents>
+          <has_text text="@test_mRNA_36_146_27/2" />
+          <not_has_text text="@test_mRNA_36_146_27/1" />
+          <not_has_text text="test_mRNA_150_290_0" />
+        </assert_contents>
+      </output>
+    </test>
+  </tests>
+  <help>
+    bamfastq converts a bam file input into a pair of fastq files that can be used as input to deFuse.
+  </help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/defuse_results_to_vcf.py	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,273 @@
+#!/usr/bin/env python
+"""
+#
+#------------------------------------------------------------------------------
+#                         University of Minnesota
+#         Copyright 2012, Regents of the University of Minnesota
+#------------------------------------------------------------------------------
+# Author:
+#
+#  James E Johnson
+#  Jesse Erdmann
+#
+#------------------------------------------------------------------------------
+"""
+
+
+"""
+This tool takes the defuse results.tsv  tab-delimited file as input and creates a Variant Call Format file as output.
+"""
+
+import sys,re,os.path
+import optparse
+from optparse import OptionParser
+
+"""
+http://www.1000genomes.org/wiki/analysis/variant-call-format/vcf-variant-call-format-version-42
+
+5. INFO keys used for structural variants
+When the INFO keys reserved for encoding structural variants are used for imprecise variants, the values should be best estimates. When a key reflects a property of a single alt allele (e.g. SVLEN), then when there are multiple alt alleles there will be multiple values for the key corresponding to each alelle (e.g. SVLEN=-100,-110 for a deletion with two distinct alt alleles).
+The following INFO keys are reserved for encoding structural variants. In general, when these keys are used by imprecise variants, the values should be best estimates. When a key reflects a property of a single alt allele (e.g. SVLEN), then when there are multiple alt alleles there will be multiple values for the key corresponding to each alelle (e.g. SVLEN=-100,-110 for a deletion with two distinct alt alleles).
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=NOVEL,Number=0,Type=Flag,Description="Indicates a novel structural variation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+For precise variants, END is POS + length of REF allele - 1, and the for imprecise variants the corresponding best estimate.
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+Value should be one of DEL, INS, DUP, INV, CNV, BND. This key can be derived from the REF/ALT fields but is useful for filtering.
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+One value for each ALT allele. Longer ALT alleles (e.g. insertions) have positive values, shorter ALT alleles (e.g. deletions) have negative values.
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=BKPTID,Number=.,Type=String,Description="ID of the assembled alternate allele in the assembly file">
+For precise variants, the consensus sequence the alternate allele assembly is derivable from the REF and ALT fields. However, the alternate allele assembly file may contain additional information about the characteristics of the alt allele contigs.
+##INFO=<ID=MEINFO,Number=4,Type=String,Description="Mobile element info of the form NAME,START,END,POLARITY">
+##INFO=<ID=METRANS,Number=4,Type=String,Description="Mobile element transduction info of the form CHR,START,END,POLARITY">
+##INFO=<ID=DGVID,Number=1,Type=String,Description="ID of this element in Database of Genomic Variation">
+##INFO=<ID=DBVARID,Number=1,Type=String,Description="ID of this element in DBVAR">
+##INFO=<ID=DBRIPID,Number=1,Type=String,Description="ID of this element in DBRIP">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=PARID,Number=1,Type=String,Description="ID of partner breakend">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=CILEN,Number=2,Type=Integer,Description="Confidence interval around the length of the inserted material between breakends">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Read Depth of segment containing breakend">
+##INFO=<ID=DPADJ,Number=.,Type=Integer,Description="Read Depth of adjacency">
+##INFO=<ID=CN,Number=1,Type=Integer,Description="Copy number of segment containing breakend">
+##INFO=<ID=CNADJ,Number=.,Type=Integer,Description="Copy number of adjacency">
+##INFO=<ID=CICN,Number=2,Type=Integer,Description="Confidence interval around copy number for the segment">
+##INFO=<ID=CICNADJ,Number=.,Type=Integer,Description="Confidence interval around copy number for the adjacency">
+6. FORMAT keys used for structural variants
+##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
+##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
+##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number genotype likelihood for imprecise events">
+##FORMAT=<ID=NQ,Number=1,Type=Integer,Description="Phred style probability score that the variant is novel with respect to the genome's ancestor">
+##FORMAT=<ID=HAP,Number=1,Type=Integer,Description="Unique haplotype identifier">
+##FORMAT=<ID=AHAP,Number=1,Type=Integer,Description="Unique identifier of ancestral haplotype">
+These keys are analogous to GT/GQ/GL and are provided for genotyping imprecise events by copy number (either because there is an unknown number of alternate alleles or because the haplotypes cannot be determined). CN specifies the integer copy number of the variant in this sample. CNQ is encoded as a phred quality -10log_10p(copy number genotype call is wrong). CNL specifies a list of log10 likelihoods for each potential copy number, starting from zero. When possible, GT/GQ/GL should be used instead of (or in addition to) these keys.
+
+Specifying Complex Rearrangements with Breakends
+An arbitrary rearrangement event can be summarized as a set of novel adjacencies.
+Each adjacency ties together 2 breakends. The two breakends at either end of a novel adjacency are called mates.
+There is one line of VCF (i.e. one record) for each of the two breakends in a novel adjacency. A breakend record is identified with the tag SYTYPE=BND" in the INFO field. The REF field of a breakend record indicates a base or sequence s of bases beginning at position POS, as in all VCF records. The ALT field of a breakend record indicates a replacement for s. This "breakend replacement" has three parts:
+the string t that replaces places s. The string t may be an extended version of s if some novel bases are inserted during the formation of the novel adjacency.
+The position p of the mate breakend, indicated by a string of the form "chr:pos". This is the location of the first mapped base in the piece being joined at this novel adjacency.
+The direction that the joined sequence continues in, starting from p. This is indicated by the orientation of square brackets surrounding p.
+These 3 elements are combined in 4 possible ways to create the ALT. In each of the 4 cases, the assertion is that s is replaced with t, and then some piece starting at position p is joined to t. The cases are:
+REF   ALT    Meaning
+s     t[p[   piece extending to the right of p is joined after t
+s     t]p]   reverse comp piece extending left of p is joined after t
+s     ]p]t   piece extending to the left of p is joined before t
+s     [p[t   reverse comp piece extending right of p is joined before t
+
+Examples:
+#CHROM POS    ID     REF ALT           QUAL FILT INFO
+2      321681 bnd_W  G   G]17:198982]  6    PASS SVTYPE=BND;MATEID=bnd_Y
+2      321682 bnd_V  T   ]13:123456]T  6    PASS SVTYPE=BND;MATEID=bnd_U
+13     123456 bnd_U  C   C[2:321682[   6    PASS SVTYPE=BND;MATEID=bnd_V
+13     123457 bnd_X  A   [17:198983[A  6    PASS SVTYPE=BND;MATEID=bnd_Z
+17     198982 bnd_Y  A   A]2:321681]   6    PASS SVTYPE=BND;MATEID=bnd_W
+17     198983 bnd_Z  C   [13:123457[C  6    PASS SVTYPE=BND;MATEID=bnd_X
+"""
+
+vcf_header =  """\
+##fileformat=VCFv4.1
+##source=defuse
+##reference=%s
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of the BND mate">
+##INFO=<ID=MATELOC,Number=1,Type=String,Description="The chrom:position of the BND mate">
+##INFO=<ID=GENESTRAND,Number=2,Type=String,Description="Strands">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Read Depth of segment containing breakend">
+##INFO=<ID=SPLITCNT,Number=1,Type=Integer,Description="number of split reads supporting the prediction">
+##INFO=<ID=SPANCNT,Number=1,Type=Integer,Description="number of spanning reads supporting the fusion">
+##INFO=<ID=HOMLEN,Number=1,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=SPLICESCORE,Number=1,Type=Integer,Description="number of nucleotides similar to GTAG at fusion splice">
+##INFO=<ID=GENE,Number=2,Type=String,Description="Gene Names at each breakend">
+##INFO=<ID=GENEID,Number=2,Type=String,Description="Gene IDs at each breakend">
+##INFO=<ID=GENELOC,Number=2,Type=String,Description="location of breakpoint releative to genes">
+##INFO=<ID=EXPR,Number=2,Type=Integer,Description="expression of genes as number of concordant pairs aligned to exons">
+##INFO=<ID=ORF,Number=0,Type=Flag,Description="fusion combines genes in a way that preserves a reading frame">
+##INFO=<ID=EXONBND,Number=0,Type=Flag,Description="fusion splice at exon boundaries">
+##INFO=<ID=INTERCHROM,Number=0,Type=Flag,Description="fusion produced by an interchromosomal translocation">
+##INFO=<ID=READTHROUGH,Number=0,Type=Flag,Description="fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement">
+##INFO=<ID=ADJACENT,Number=0,Type=Flag,Description="fusion between adjacent genes">
+##INFO=<ID=ALTSPLICE,Number=0,Type=Flag,Description="fusion likely the product of alternative splicing between adjacent genes">
+##INFO=<ID=DELETION,Number=0,Type=Flag,Description="fusion produced by a genomic deletion">
+##INFO=<ID=EVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic eversion">
+##INFO=<ID=INVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic inversion">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO\
+"""
+
+def cmp_alphanumeric(s1,s2):
+  if s1 == s2:
+    return 0
+  a1 = re.findall("\d+|[a-zA-Z]+",s1)
+  a2 = re.findall("\d+|[a-zA-Z]+",s2)
+  for i in range(min(len(a1),len(a2))):
+    if a1[i] == a2[i]:
+      continue
+    if a1[i].isdigit() and a2[i].isdigit():
+      return int(a1[i]) - int(a2[i])
+    return 1 if a1[i] >  a2[i] else -1
+  return len(a1) - len(a2)
+
+def __main__():
+  # VCF functions
+  chr_dict = dict()
+  def add_vcf_line(chr,pos,id,line):
+    if chr not in chr_dict:
+      pos_dict = dict()
+      chr_dict[chr] = pos_dict
+    if pos not in chr_dict[chr]:
+      id_dict = dict()
+      chr_dict[chr][pos] = id_dict
+    chr_dict[chr][pos][id] = line
+
+  def write_vcf():
+    print >> outputFile, vcf_header % (refname)
+    for chr in sorted(chr_dict.keys(),cmp=cmp_alphanumeric):
+      for pos in sorted(chr_dict[chr].keys()):
+        for id in chr_dict[chr][pos]:
+          print >> outputFile, chr_dict[chr][pos][id]
+  #Parse Command Line
+  parser = optparse.OptionParser()
+  # files
+  parser.add_option( '-i', '--input', dest='input', help='The input defuse results.tsv file (else read from stdin)' )
+  parser.add_option( '-o', '--output', dest='output', help='The output vcf file (else write to stdout)' )
+  parser.add_option( '-r', '--reference', dest='reference', default=None, help='The genomic reference id' )
+  (options, args) = parser.parse_args()
+
+  # results.tsv input 
+  if options.input != None:
+    try:
+      inputPath = os.path.abspath(options.input)
+      inputFile = open(inputPath, 'r')
+    except Exception, e:
+      print >> sys.stderr, "failed: %s" % e
+      exit(2)
+  else:
+    inputFile = sys.stdin
+  # vcf output 
+  if options.output != None:
+    try:
+      outputPath = os.path.abspath(options.output)
+      outputFile = open(outputPath, 'w')
+    except Exception, e:
+      print >> sys.stderr, "failed: %s" % e
+      exit(3)
+  else:
+    outputFile = sys.stdout
+
+  refname = options.reference if options.reference else 'unknown'
+
+  svtype = 'SVTYPE=BND'
+  filt = 'PASS'
+  columns = []
+  try:
+    for linenum,line in enumerate(inputFile):
+      ## print >> sys.stderr, "%d: %s\n" % (linenum,line)
+      fields = line.strip().split('\t')
+      if line.startswith('cluster_id'):
+        columns = fields
+        ## print >> sys.stderr, "columns: %s\n" % columns
+        continue
+      cluster_id = fields[columns.index('cluster_id')]
+      gene_chromosome1 = fields[columns.index('gene_chromosome1')]
+      gene_chromosome2 = fields[columns.index('gene_chromosome2')]
+      genomic_strand1 = fields[columns.index('genomic_strand1')]
+      genomic_strand2 = fields[columns.index('genomic_strand2')]
+      gene1 = fields[columns.index('gene1')]
+      gene2 = fields[columns.index('gene2')]
+      gene_info = 'GENEID=%s,%s' % (gene1,gene2)
+      gene_name1 = fields[columns.index('gene_name1')]
+      gene_name2 = fields[columns.index('gene_name2')]
+      gene_name_info = 'GENE=%s,%s' % (gene_name1,gene_name2)
+      gene_location1 = fields[columns.index('gene_location1')]
+      gene_location2 = fields[columns.index('gene_location2')]
+      gene_loc = 'GENELOC=%s,%s' % (gene_location1,gene_location2)
+      expression1 = int(fields[columns.index('expression1')])
+      expression2 = int(fields[columns.index('expression2')])
+      expr = 'EXPR=%d,%d' % (expression1,expression2)
+      genomic_break_pos1 = int(fields[columns.index('genomic_break_pos1')])
+      genomic_break_pos2 = int(fields[columns.index('genomic_break_pos2')])
+      breakpoint_homology = int(fields[columns.index('breakpoint_homology')])
+      homlen = 'HOMLEN=%s' % breakpoint_homology
+      orf = fields[columns.index('orf')] == 'Y'
+      exonboundaries = fields[columns.index('exonboundaries')] == 'Y'
+      read_through = fields[columns.index('read_through')] == 'Y'
+      interchromosomal = fields[columns.index('interchromosomal')] == 'Y'
+      adjacent = fields[columns.index('adjacent')] == 'Y'
+      altsplice = fields[columns.index('altsplice')] == 'Y'
+      deletion = fields[columns.index('deletion')] == 'Y'
+      eversion = fields[columns.index('eversion')] == 'Y'
+      inversion = fields[columns.index('inversion')] == 'Y'
+      span_count = int(fields[columns.index('span_count')])
+      splitr_count = int(fields[columns.index('splitr_count')])
+      splice_score = int(fields[columns.index('splice_score')])
+      probability = fields[columns.index('probability')] if columns.index('probability') else '.'
+      splitr_sequence = fields[columns.index('splitr_sequence')]
+      split_seqs = splitr_sequence.split('|')
+      mate_id1 = "bnd_%s_1" % cluster_id
+      mate_id2 = "bnd_%s_2" % cluster_id
+      ref1 = split_seqs[0][-1]
+      ref2 = split_seqs[1][0]
+      b1 = '[' if genomic_strand1 == '+' else ']'
+      b2 = '[' if genomic_strand2 == '+' else ']'
+      alt1 = "%s%s%s:%d%s" %  (ref1,b2,gene_chromosome2,genomic_break_pos2,b2) 
+      alt2 = "%s%s:%d%s%s" %  (b1,gene_chromosome1,genomic_break_pos1,b1,ref2) 
+      #TODO evaluate what should be included in the INFO field
+      info = ['DP=%d' % (span_count + splitr_count),'SPLITCNT=%d' % splitr_count,'SPANCNT=%d' % span_count,gene_name_info,gene_info,gene_loc,expr,homlen,'SPLICESCORE=%d' % splice_score]
+      if orf:
+        info.append('ORF')
+      if exonboundaries:
+        info.append('EXONBND')
+      if interchromosomal:
+        info.append('INTERCHROM')
+      if read_through:
+        info.append('READTHROUGH')
+      if adjacent:
+        info.append('ADJACENT')
+      if altsplice:
+        info.append('ALTSPLICE')
+      if deletion:
+        info.append('DELETION')
+      if eversion:
+        info.append('EVERSION')
+      if inversion:
+        info.append('INVERSION')
+      info1 = [svtype,'MATEID=%s;MATELOC=%s:%d' % (mate_id2,gene_chromosome2,genomic_break_pos2)] + info
+      info2 = [svtype,'MATEID=%s;MATELOC=%s:%d' % (mate_id1,gene_chromosome1,genomic_break_pos1)] + info
+      qual = int(float(fields[columns.index('probability')]) * 255) if columns.index('probability') else '.'
+      vcf1 = '%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s'% (gene_chromosome1,genomic_break_pos1, mate_id1, ref1, alt1, qual, filt, ';'.join(info1) )
+      vcf2 = '%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s'% (gene_chromosome2,genomic_break_pos2, mate_id2, ref2, alt2, qual, filt, ';'.join(info2) )
+      add_vcf_line(gene_chromosome1,genomic_break_pos1,mate_id1,vcf1)
+      add_vcf_line(gene_chromosome2,genomic_break_pos2,mate_id2,vcf2)
+    write_vcf()
+  except Exception, e:
+    print >> sys.stderr, "failed: %s" % e
+    sys.exit(1)
+
+if __name__ == "__main__" : __main__()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/defuse_results_to_vcf.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<tool id="defuse_results_to_vcf" name="Defuse Results to VCF" version="0.6.1">
+  <description>generate a VCF from a DeFuse Results file</description>
+  <requirements>
+    <requirement type="package" version="0.6.1">defuse</requirement>
+  </requirements>
+  <command interpreter="python">defuse_results_to_vcf.py  --input $defuse_results --reference ${defuse_results.metadata.dbkey} --output $vcf
+  </command>
+  <inputs>
+    <param name="defuse_results" type="data" format="defuse.results.tsv" label="Defuse Results file"/> 
+  </inputs>
+  <stdio>
+    <exit_code range="1:" level="fatal" description="Error" />
+  </stdio>
+  <outputs>
+    <data name="vcf" metadata_source="defuse_results" format="vcf"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="defuse_results" value="mm10_results.filtered.tsv" ftype="defuse.results.tsv" dbkey="mm10"/>
+      <output name="vcf" file="mm10_results.filtered.vcf"/>
+    </test>
+  </tests>
+  <help>
+**Defuse Results to VCF**
+
+Generates a VCF_ Variant Call Format file from a DeFuse_ results.tsv file.   
+
+This program relies on the header line of the results.tsv to determine which columns to use for genrating the VCF file.   
+
+.. _VCF: http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
+.. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/defuse_trinity_analysis.py	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,466 @@
+#!/usr/bin/env python
+"""
+#
+#------------------------------------------------------------------------------
+#                         University of Minnesota
+#         Copyright 2014, Regents of the University of Minnesota
+#------------------------------------------------------------------------------
+# Author:
+#
+#  James E Johnson
+#
+#------------------------------------------------------------------------------
+"""
+
+
+"""
+This tool takes the defuse results.tsv  tab-delimited file, trinity 
+and creates a tabular report
+
+Would it be possible to create 2 additional files from the deFuse-Trinity comparison program.  
+One containing all the Trinity records matched to deFuse records (with the deFuse ID number), 
+and the other with the ORFs records matching back to the Trinity records in the first files?
+
+M045_Report.csv
+"","deFuse_subset.count","deFuse.gene_name1","deFuse.gene_name2","deFuse.span_count","deFuse.probability","deFuse.gene_chromosome1","deFuse.gene_location1","deFuse.gene_chromosome2","deFuse.gene_location2","deFuse_subset.type"
+"1",1,"Rps6","Dennd4c",7,0.814853504,"4","coding","4","coding","TIC  "
+
+
+
+OS03_Matched_Rev.csv
+"count","gene1","gene2","breakpoint","fusion","Trinity_transcript_ID","Trinity_transcript","ID1","protein"
+
+"","deFuse.splitr_sequence","deFuse.gene_chromosome1","deFuse.gene_chromosome2","deFuse.gene_location1","deFuse.gene_location2","deFuse.gene_name1","deFuse.gene_name2","deFuse.span_count","deFuse.probability","word1","word2","fusion_part_1","fusion_part_2","fusion_point","fusion_point_rc","count","transcript"
+
+"""
+
+import sys,re,os.path,math
+import textwrap
+import optparse
+from optparse import OptionParser
+
+revcompl = lambda x: ''.join([{'A':'T','C':'G','G':'C','T':'A','a':'t','c':'g','g':'c','t':'a','N':'N','n':'n'}[B] for B in x][::-1])
+
+codon_map = {"UUU":"F", "UUC":"F", "UUA":"L", "UUG":"L",
+    "UCU":"S", "UCC":"S", "UCA":"S", "UCG":"S",
+    "UAU":"Y", "UAC":"Y", "UAA":"*", "UAG":"*",
+    "UGU":"C", "UGC":"C", "UGA":"*", "UGG":"W",
+    "CUU":"L", "CUC":"L", "CUA":"L", "CUG":"L",
+    "CCU":"P", "CCC":"P", "CCA":"P", "CCG":"P",
+    "CAU":"H", "CAC":"H", "CAA":"Q", "CAG":"Q",
+    "CGU":"R", "CGC":"R", "CGA":"R", "CGG":"R",
+    "AUU":"I", "AUC":"I", "AUA":"I", "AUG":"M",
+    "ACU":"T", "ACC":"T", "ACA":"T", "ACG":"T",
+    "AAU":"N", "AAC":"N", "AAA":"K", "AAG":"K",
+    "AGU":"S", "AGC":"S", "AGA":"R", "AGG":"R",
+    "GUU":"V", "GUC":"V", "GUA":"V", "GUG":"V",
+    "GCU":"A", "GCC":"A", "GCA":"A", "GCG":"A",
+    "GAU":"D", "GAC":"D", "GAA":"E", "GAG":"E",
+    "GGU":"G", "GGC":"G", "GGA":"G", "GGG":"G",}
+
+def translate(seq) :
+  rna = seq.upper().replace('T','U')
+  aa = []
+  for i in range(0,len(rna) - 2, 3):
+    codon = rna[i:i+3]
+    aa.append(codon_map[codon] if codon in codon_map else 'X')
+  return ''.join(aa)
+
+def get_stop_codons(seq) :
+  rna = seq.upper().replace('T','U')
+  stop_codons = []
+  for i in range(0,len(rna) - 2, 3):
+    codon = rna[i:i+3]
+    aa = codon_map[codon] if codon in codon_map else 'X'
+    if aa == '*':
+      stop_codons.append(codon)
+  return stop_codons
+
+def read_fasta(fp):
+    name, seq = None, []
+    for line in fp:
+        line = line.rstrip()
+        if line.startswith(">"):
+            if name: yield (name, ''.join(seq))
+            name, seq = line, []
+        else:
+            seq.append(line)
+    if name: yield (name, ''.join(seq))
+
+
+def test_rcomplement(seq, target):
+  try:
+    comp = revcompl(seq)
+    return comp in target
+  except:
+    pass
+  return False
+
+def test_reverse(seq,target):
+  return options.test_reverse and seq and seq[::-1] in target
+
+def cmp_alphanumeric(s1,s2):
+  if s1 == s2:
+    return 0
+  a1 = re.findall("\d+|[a-zA-Z]+",s1)
+  a2 = re.findall("\d+|[a-zA-Z]+",s2)
+  for i in range(min(len(a1),len(a2))):
+    if a1[i] == a2[i]:
+      continue
+    if a1[i].isdigit() and a2[i].isdigit():
+      return int(a1[i]) - int(a2[i])
+    return 1 if a1[i] >  a2[i] else -1
+  return len(a1) - len(a2)
+
+def parse_defuse_results(inputFile): 
+  defuse_results = []
+  columns = []
+  coltype_int = ['expression1', 'expression2', 'gene_start1', 'gene_start2', 'gene_end1', 'gene_end2', 'genomic_break_pos1', 'genomic_break_pos2', 'breakpoint_homology', 'span_count', 'splitr_count', 'splice_score']
+  coltype_float = ['probability']
+  coltype_yn = [ 'orf', 'exonboundaries', 'read_through', 'interchromosomal', 'adjacent', 'altsplice', 'deletion', 'eversion', 'inversion']
+  try:
+    for linenum,line in enumerate(inputFile):
+      ## print >> sys.stderr, "%d: %s\n" % (linenum,line)
+      fields = line.strip().split('\t')
+      if line.startswith('cluster_id'):
+        columns = fields
+        ## print >> sys.stderr, "columns: %s\n" % columns
+        continue
+      elif fields and len(fields) == len(columns):
+        cluster_id = fields[columns.index('cluster_id')]
+        cluster = dict()
+        flags = []
+        defuse_results.append(cluster)
+        for i,v in enumerate(columns):
+          if v in coltype_int:
+            cluster[v] = int(fields[i])
+          elif v in coltype_float:
+            cluster[v] = float(fields[i])
+          elif v in coltype_yn:
+            cluster[v] = fields[i] == 'Y'
+            if cluster[v]:
+              flags.append(columns[i]) 
+          else:
+            cluster[v] = fields[i]
+        cluster['flags'] = ','.join(flags)
+  except Exception, e:
+    print >> sys.stderr, "failed to read cluster_dict: %s" % e
+    exit(1)
+  return defuse_results
+
+## deFuse params to the mapping application?
+
+def __main__():
+  #Parse Command Line
+  parser = optparse.OptionParser()
+  # files
+  parser.add_option( '-i', '--input', dest='input', default=None, help='The input defuse results.tsv file (else read from stdin)' )
+  parser.add_option( '-t', '--transcripts', dest='transcripts', default=None, help='Trinity transcripts' )
+  parser.add_option( '-p', '--peptides', dest='peptides', default=None, help='Trinity ORFs' )
+  parser.add_option( '-o', '--output', dest='output', default=None, help='The output report (else write to stdout)' )
+  parser.add_option( '-m', '--matched', dest='matched', default=None, help='The output matched report' )
+  parser.add_option( '-a', '--transcript_alignment', dest='transcript_alignment', default=None, help='The output alignment file' )
+  parser.add_option( '-A', '--orf_alignment', dest='orf_alignment', default=None, help='The output ORF alignment file' )
+  parser.add_option( '-N', '--nbases', dest='nbases', type='int', default=12, help='Number of bases on either side of the fusion to compare' )
+  parser.add_option( '-L', '--min_pep_len', dest='min_pep_len', type='int', default=100, help='Minimum length of peptide to report' )
+  parser.add_option( '-T', '--ticdist', dest='ticdist', type='int', default=1000000, help='Maximum intrachromosomal distance to be classified a Transcription-induced chimera (TIC)' )
+  parser.add_option( '-P', '--prior_aa', dest='prior_aa', type='int', default=11, help='Number of protein AAs to show preceeding fusion point' )
+  parser.add_option( '-I', '--incomplete_orfs', dest='incomplete_orfs', action='store_true', default=False, help='Count incomplete ORFs'  )
+  parser.add_option( '-O', '--orf_type', dest='orf_type', action='append', default=['complete','5prime_partial'], choices=['complete','5prime_partial','3prime_partial','internal'], help='ORF types to report'  )
+  parser.add_option( '-r', '--readthrough', dest='readthrough', type='int', default=3, help='Number of stop_codons to read through' )
+  # min_orf_len
+  # split_na_len
+  # tic_len = 1000000
+  # prior
+  # deFuse direction reversed 
+  # in frame ?
+  # contain known protein elements
+  # what protein change
+  # trinity provides full transctipt, defuse doesn't show full
+  #parser.add_option( '-r', '--reference', dest='reference', default=None, help='The genomic reference fasta' )
+  #parser.add_option( '-g', '--gtf', dest='gtf', default=None, help='The genomic reference gtf feature file')
+  (options, args) = parser.parse_args()
+
+  # results.tsv input 
+  if options.input != None:
+    try:
+      inputPath = os.path.abspath(options.input)
+      inputFile = open(inputPath, 'r')
+    except Exception, e:
+      print >> sys.stderr, "failed: %s" % e
+      exit(2)
+  else:
+    inputFile = sys.stdin
+  # vcf output 
+  if options.output != None:
+    try:
+      outputPath = os.path.abspath(options.output)
+      outputFile = open(outputPath, 'w')
+    except Exception, e:
+      print >> sys.stderr, "failed: %s" % e
+      exit(3)
+  else:
+    outputFile = sys.stdout
+  outputTxFile = None
+  outputOrfFile = None
+  if options.transcript_alignment: 
+    try:
+      outputTxFile = open(options.transcript_alignment,'w')
+    except Exception, e:
+      print >> sys.stderr, "failed: %s" % e
+      exit(3)
+  if options.orf_alignment: 
+    try:
+      outputOrfFile = open(options.orf_alignment,'w')
+    except Exception, e:
+      print >> sys.stderr, "failed: %s" % e
+      exit(3)
+  # Add percent match after transcript
+  report_fields = ['gene_name1','gene_name2','span_count','probability','gene_chromosome1','gene_location1','gene_chromosome2','gene_location2','fusion_type','Transcript','coverage','Protein','flags','alignments1','alignments2']
+  report_fields = ['cluster_id','gene_name1','gene_name2','span_count','probability','genomic_bkpt1','gene_location1','genomic_bkpt2','gene_location2','fusion_type','Transcript','coverage','Protein','flags','alignments1','alignments2']
+  report_colnames = {'gene_name1':'Gene 1','gene_name2':'Gene 2','span_count':'Span cnt','probability':'Probability','gene_chromosome1':'From Chr','gene_location1':'Fusion point','gene_chromosome2':'To Chr','gene_location2':'Fusion point', 'cluster_id':'cluster_id', 'splitr_sequence':'splitr_sequence', 'splitr_count':'splitr_count', 'splitr_span_pvalue':'splitr_span_pvalue', 'splitr_pos_pvalue':'splitr_pos_pvalue', 'splitr_min_pvalue':'splitr_min_pvalue', 'adjacent':'adjacent', 'altsplice':'altsplice', 'break_adj_entropy1':'break_adj_entropy1', 'break_adj_entropy2':'break_adj_entropy2', 'break_adj_entropy_min':'break_adj_entropy_min', 'breakpoint_homology':'breakpoint_homology', 'breakseqs_estislands_percident':'breakseqs_estislands_percident', 'cdna_breakseqs_percident':'cdna_breakseqs_percident', 'deletion':'deletion', 'est_breakseqs_percident':'est_breakseqs_percident', 'eversion':'eversion', 'exonboundaries':'exonboundaries', 'expression1':'expression1', 'expression2':'expression2', 'gene1':'gene1', 'gene2':'gene2', 'gene_align_strand1':'gene_align_strand1', 'gene_align_strand2':'gene_align_strand2', 'gene_end1':'gene_end1', 'gene_end2':'gene_end2', 'gene_start1':'gene_start1', 'gene_start2':'gene_start2', 'gene_strand1':'gene_strand1', 'gene_strand2':'gene_strand2', 'genome_breakseqs_percident':'genome_breakseqs_percident', 'genomic_break_pos1':'genomic_break_pos1', 'genomic_break_pos2':'genomic_break_pos2', 'genomic_strand1':'genomic_strand1', 'genomic_strand2':'genomic_strand2', 'interchromosomal':'interchromosomal', 'interrupted_index1':'interrupted_index1', 'interrupted_index2':'interrupted_index2', 'inversion':'inversion', 'library_name':'library_name', 'max_map_count':'max_map_count', 'max_repeat_proportion':'max_repeat_proportion', 'mean_map_count':'mean_map_count', 'min_map_count':'min_map_count', 'num_multi_map':'num_multi_map', 'num_splice_variants':'num_splice_variants', 'orf':'orf', 'read_through':'read_through', 'repeat_proportion1':'repeat_proportion1', 'repeat_proportion2':'repeat_proportion2', 'span_coverage1':'span_coverage1', 'span_coverage2':'span_coverage2', 'span_coverage_max':'span_coverage_max', 'span_coverage_min':'span_coverage_min', 'splice_score':'splice_score', 'splicing_index1':'splicing_index1', 'splicing_index2':'splicing_index2', 'fusion_type':'Type', 'coverage':'fusion%','Transcript':'Transcript?','Protein':'Protein?','flags':'descriptions','fwd_seq':'fusion','alignments1':'alignments1','alignments2':'alignments2','genomic_bkpt1':'From Chr', 'genomic_bkpt2':'To Chr'}
+
+  ## Read defuse results
+  fusions = parse_defuse_results(inputFile)
+  ## Create a field with the 12 nt before and after the fusion point. 
+  ## Create a field with the reverse complement of the 24 nt fusion point field.
+  ## Add fusion type filed (INTER, INTRA, TIC)
+  for i,fusion in enumerate(fusions):
+      fusion['ordinal'] = i + 1
+      fusion['genomic_bkpt1'] = "%s:%d" % (fusion['gene_chromosome1'], fusion['genomic_break_pos1'])
+      fusion['genomic_bkpt2'] = "%s:%d" % (fusion['gene_chromosome2'], fusion['genomic_break_pos2'])
+      fusion['alignments1'] = "%s%s%s" % (fusion['genomic_strand1'], fusion['gene_strand1'], fusion['gene_align_strand1'])
+      fusion['alignments2'] = "%s%s%s" % (fusion['genomic_strand2'], fusion['gene_strand2'], fusion['gene_align_strand2'])
+      split_seqs = fusion['splitr_sequence'].split('|')
+      fusion['split_seqs'] = split_seqs
+      fusion['split_seqs'] = split_seqs
+      fusion['split_seq_lens'] = [len(split_seqs[0]),len(split_seqs[1])]
+      fusion['split_max_lens'] = [len(split_seqs[0]),len(split_seqs[1])]
+      fwd_off = min(abs(options.nbases),len(split_seqs[0]))
+      rev_off = min(abs(options.nbases),len(split_seqs[1]))
+      fusion['fwd_off'] = fwd_off
+      fusion['rev_off'] = rev_off
+      fwd_seq = split_seqs[0][-fwd_off:] + split_seqs[1][:rev_off]
+      rev_seq =  revcompl(fwd_seq)
+      fusion['fwd_seq'] = fwd_seq
+      fusion['rev_seq'] = rev_seq
+      fusion_type = 'inter' if fusion['gene_chromosome1'] != fusion['gene_chromosome2'] else 'intra' if abs(fusion['genomic_break_pos1'] - fusion['genomic_break_pos2']) > options.ticdist else 'TIC'
+      fusion['fusion_type'] = fusion_type
+      fusion['transcripts'] = dict()
+      fusion['Transcript'] = 'No'
+      fusion['coverage'] = 0
+      fusion['Protein'] = 'No'
+      # print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2'])
+  inputFile.close()
+
+  ## Process Trinity data and compare to deFuse
+  matched_transcripts = dict()
+  matched_orfs = dict()
+  transcript_orfs = dict()
+  fusions_with_transcripts = set()
+  fusions_with_orfs = set()
+  ## fusion['transcripts'][tx_id] { revcompl:?, bkpt:n, seq1: ,  seq2: , match1:n, match2:n}
+  n = 0
+  if options.transcripts: 
+    with open(options.transcripts) as fp:
+      for tx_full_id, seq in read_fasta(fp):
+        n += 1
+        for i,fusion in enumerate(fusions):
+          if fusion['fwd_seq'] in seq or fusion['rev_seq'] in seq:
+            fusions_with_transcripts.add(i)
+            fusion['Transcript'] = 'Yes'
+            tx_id = tx_full_id.lstrip('>').split()[0]
+            matched_transcripts[tx_full_id] = seq
+            fusion['transcripts'][tx_id] = dict()
+            fusion['transcripts'][tx_id]['seq'] = seq
+            fusion['transcripts'][tx_id]['full_id'] = tx_full_id
+            pos = seq.find(fusion['fwd_seq'])
+            if pos >= 0:
+              tx_bkpt = pos + fusion['fwd_off']
+              # fusion['transcripts'][tx_full_id] = tx_bkpt
+              if tx_bkpt > fusion['split_max_lens'][0]:
+                fusion['split_max_lens'][0] = tx_bkpt 
+              len2 = len(seq) - tx_bkpt
+              if len2 > fusion['split_max_lens'][1]:
+                fusion['split_max_lens'][1] = len2 
+              fusion['transcripts'][tx_id]['bkpt'] = tx_bkpt
+              fusion['transcripts'][tx_id]['revcompl'] = False
+              fusion['transcripts'][tx_id]['seq1'] = seq[:tx_bkpt]
+              fusion['transcripts'][tx_id]['seq2'] = seq[tx_bkpt:]
+            else: 
+              pos = seq.find(fusion['rev_seq'])
+              tx_bkpt = pos + fusion['rev_off']
+              # fusion['transcripts'][tx_full_id] = -tx_bkpt
+              if tx_bkpt > fusion['split_max_lens'][1]:
+                fusion['split_max_lens'][1] = tx_bkpt
+              len2 = len(seq) - tx_bkpt
+              if len2 > fusion['split_max_lens'][0]:
+                fusion['split_max_lens'][0] = len2 
+              rseq = revcompl(seq)
+              pos = rseq.find(fusion['fwd_seq'])
+              tx_bkpt = pos + fusion['fwd_off']
+              fusion['transcripts'][tx_id]['bkpt'] = tx_bkpt
+              fusion['transcripts'][tx_id]['revcompl'] = True
+              fusion['transcripts'][tx_id]['seq1'] = rseq[:tx_bkpt]
+              fusion['transcripts'][tx_id]['seq2'] = rseq[tx_bkpt:]
+            fseq = fusion['split_seqs'][0]
+            tseq = fusion['transcripts'][tx_id]['seq1']
+            mlen = min(len(fseq),len(tseq))
+            fusion['transcripts'][tx_id]['match1'] = mlen
+            for j in range(1,mlen+1):
+              if fseq[-j] != tseq[-j]:
+                fusion['transcripts'][tx_id]['match1'] = j - 1
+                break
+            fseq = fusion['split_seqs'][1]
+            tseq = fusion['transcripts'][tx_id]['seq2']
+            mlen = min(len(fseq),len(tseq))
+            fusion['transcripts'][tx_id]['match2'] = mlen
+            for j in range(mlen):
+              if fseq[j] != tseq[j]:
+                fusion['transcripts'][tx_id]['match2'] = j
+                break
+            # coverage = math.floor(float(fusion['transcripts'][tx_id]['match1'] + fusion['transcripts'][tx_id]['match2']) * 100. / len(fusion['split_seqs'][0]+fusion['split_seqs'][1]))
+            coverage = int((fusion['transcripts'][tx_id]['match1'] + fusion['transcripts'][tx_id]['match2']) * 1000. / len(fusion['split_seqs'][0]+fusion['split_seqs'][1])) * .1
+            # print >> sys.stderr, "%s\t%d\t%d\t%d\%s\t\t%d\t%d\t%d\t%d" % (tx_id,fusion['transcripts'][tx_id]['match1'],fusion['transcripts'][tx_id]['match2'],len(fusion['split_seqs'][0]+fusion['split_seqs'][1]),coverage,len( fusion['split_seqs'][0]),len(fusion['transcripts'][tx_id]['seq1']),len(fusion['split_seqs'][1]),len(fusion['transcripts'][tx_id]['seq2']))
+            fusion['coverage'] = max(coverage,fusion['coverage'])
+    print >> sys.stdout, "fusions_with_transcripts: %d  %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts))
+    ##for i,fusion in enumerate(fusions):
+    ##  print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts'])
+    ## Process ORFs and compare to matched deFuse and Trinity data.
+    ## Proteins must be at least 100 aa long, starting at the first "M" and must end with an "*".
+    if options.peptides: 
+      with open(options.peptides) as fp:
+        for orf_full_id, seq in read_fasta(fp):
+          n += 1
+          if len(seq) < options.min_pep_len:
+            continue
+          orf_type = re.match('^.* type:(\S+) .*$',orf_full_id).groups()[0]
+          ## if not seq[-1] == '*' and not options.incomplete_orfs:
+          ## if not orf_type 'complete' and not options.incomplete_orfs:
+          if orf_type not in options.orf_type:
+            continue
+          for i,fusion in enumerate(fusions):
+            if len(fusion['transcripts']) > 0:
+              for tx_id in fusion['transcripts']:
+                ## >m.196252 g.196252  ORF g.196252 m.196252 type:complete len:237 (+) comp100000_c5_seq2:315-1025(+)
+                ## >m.134565 g.134565  ORF g.134565 m.134565 type:5prime_partial len:126 (-) comp98702_c1_seq21:52-429(-)
+                if tx_id+':' not in orf_full_id:
+                  continue
+                m = re.match("^.*%s:(\d+)-(\d+)[(]([+-])[)].*" % re.sub('([|.{}()$?^])','[\\1]',tx_id),orf_full_id)
+                if m:
+                  if not m.groups() or len(m.groups()) < 3 or m.groups()[0] == None:
+                    print >> sys.stderr, "Error:\n%s\n%s\n" % (tx_id,orf_full_id)
+                  orf_id = orf_full_id.lstrip('>').split()[0]
+                  if not tx_id in transcript_orfs:
+                    transcript_orfs[tx_id] = []
+                  alignments = "%s%s%s %s%s%s" % (fusion['genomic_strand1'], fusion['gene_strand1'], fusion['gene_align_strand1'], fusion['genomic_strand2'], fusion['gene_strand2'], fusion['gene_align_strand2'])
+                  # print >> sys.stdout, "%d %s bkpt:%d %s rc:%s (%s)   %s" % (fusion['ordinal'], tx_id, int(fusion['transcripts'][tx_id]['bkpt']), str(m.groups()), str(fusion['transcripts'][tx_id]['revcompl']), alignments, orf_full_id) 
+                  start = seq.find('M')
+                  pep_len = len(seq)
+                  if pep_len - start < options.min_pep_len:
+                    continue
+                  orf_dict = dict()
+                  transcript_orfs[tx_id].append(orf_dict)
+                  fusions_with_orfs.add(i)
+                  matched_orfs[orf_full_id] = seq
+                  fusion['Protein'] = 'Yes'
+                  tx_start = int(m.groups()[0])
+                  tx_end = int(m.groups()[1])
+                  tx_strand = m.groups()[2]
+                  tx_bkpt = fusion['transcripts'][tx_id]['bkpt']
+                  orf_dict['orf_id'] = orf_id
+                  orf_dict['tx_start'] = tx_start
+                  orf_dict['tx_end'] = tx_end
+                  orf_dict['tx_strand'] = tx_strand
+                  orf_dict['tx_bkpt'] = tx_bkpt
+                  orf_dict['seq'] = seq[:start].lower() + seq[start:] if start > 0 else seq
+                  ## >m.208656 g.208656  ORF g.208656 m.208656 type:5prime_partial len:303 (+) comp100185_c2_seq9:2-910(+)
+                  ## translate(tx34[1:910])
+                  ## translate(tx34[1:2048])
+                  ## comp99273_c1_seq1 len=3146 (-2772) 
+                  ## >m.158338 g.158338  ORF g.158338 m.158338 type:complete len:785 (-) comp99273_c1_seq1:404-2758(-)
+                  ##  translate(tx[-2758:-403])
+                  ## comp100185_c2_seq9 len=2048 (904)
+                  ## novel protein sequence
+                  ## find first novel AA
+                  ## get prior n AAs
+                  ## get novel AA seq thru n stop codons 
+                  ### tx_seq = matched_transcripts[tx_full_id] if tx_bkpt >= 0 else revcompl(tx_seq)
+                  tx_seq = fusion['transcripts'][tx_id]['seq']
+                  orf_dict['tx_seq'] = tx_seq
+                  novel_tx_seq = tx_seq[tx_start - 1:] if tx_strand == '+' else revcompl(tx_seq[:tx_end])
+                  read_thru_pep = translate(novel_tx_seq)
+                  # fusion['transcripts'][tx_id]['revcompl'] = True
+                  # tx_bkpt = fusion['transcripts'][tx_id]['bkpt'] 
+                  # bkpt_aa_pos = tx_bkpt - tx_start - 1 
+                  # bkpt_aa_pos = (tx_bkpt - tx_start - 1) / 3 if tx_strand == '+' else tx_end
+                  # print >> sys.stdout, "%s\n%s" % (seq,read_thru_pep) 
+                  stop_codons = get_stop_codons(novel_tx_seq)
+                  if options.readthrough: 
+                    readthrough = options.readthrough + 1
+                    read_thru_pep = '*'.join(read_thru_pep.split('*')[:readthrough])
+                    stop_codons = stop_codons[:readthrough]
+                  orf_dict['read_thru_pep'] = read_thru_pep
+                  orf_dict['stop_codons'] = ','.join(stop_codons)
+      print >> sys.stdout, "fusions_with_orfs: %d  %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs))
+  ## Alignments 3 columns, seq columns padded out to longest seq, UPPERCASE_match  diffs lowercase
+  ### defuse_id		pre_split_seq		post_split_seq
+  ### trinity_id	pre_split_seq		post_split_seq
+  ## Transcripts alignment output
+  ## Peptide alignment output
+  ## Write reports
+  ## OS03_Matched_Rev.csv
+  ## "count","gene1","gene2","breakpoint","fusion","Trinity_transcript_ID","Trinity_transcript","ID1","protein"
+  if options.transcripts and options.matched: 
+    #match_fields = ['ordinal','gene_name1','gene_name2','fwd_seq']
+    outputMatchFile = open(options.matched,'w')
+    #print >> outputMatchFile, '\t'.join(["#fusion_id","cluster_id","gene1","gene2","breakpoint","fusion","Trinity_transcript_ID","Trinity_transcript","Trinity_ORF_Transcript","Trinity_ORF_ID","protein","read_through","stop_codons"])
+    print >> outputMatchFile, '\t'.join(["#fusion_id","cluster_id","gene1","gene2","breakpoint","fusion","Trinity_transcript_ID","Trinity_transcript","Trinity_ORF_Transcript","Trinity_ORF_ID","protein","stop_codons"])
+    for i,fusion in enumerate(fusions):
+      if len(fusion['transcripts']) > 0:
+        for tx_id in fusion['transcripts'].keys():
+          if tx_id in transcript_orfs:
+            for orf_dict in transcript_orfs[tx_id]: 
+              if 'tx_seq' not in orf_dict:
+                print >> sys.stderr, "orf_dict %s" % orf_dict
+              #fields = [str(fusion['ordinal']),str(fusion['cluster_id']),fusion['gene_name1'],fusion['gene_name2'],fusion['fwd_seq'],fusion['splitr_sequence'],tx_id, fusion['transcripts'][tx_id]['seq1']+'|'+fusion['transcripts'][tx_id]['seq2'],orf_dict['tx_seq'],orf_dict['orf_id'],orf_dict['seq'],orf_dict['read_thru_pep'],orf_dict['stop_codons']]
+              fields = [str(fusion['ordinal']),str(fusion['cluster_id']),fusion['gene_name1'],fusion['gene_name2'],fusion['fwd_seq'],fusion['splitr_sequence'],tx_id, fusion['transcripts'][tx_id]['seq1']+'|'+fusion['transcripts'][tx_id]['seq2'],orf_dict['tx_seq'],orf_dict['orf_id'],orf_dict['read_thru_pep'],orf_dict['stop_codons']]
+              print >> outputMatchFile, '\t'.join(fields)
+    outputMatchFile.close()
+  if options.transcripts and options.transcript_alignment: 
+    if outputTxFile:
+      id_fields = ['gene_name1','alignments1','gene_name2','alignments2','span_count','probability','gene_chromosome1','gene_location1','gene_chromosome2','gene_location2','fusion_type','Transcript','Protein','flags']
+      fa_width = 80
+      for i,fusion in enumerate(fusions):
+        if len(fusion['transcripts']) > 0:
+          alignments1 = "%s%s%s" % (fusion['genomic_strand1'], fusion['gene_strand1'], fusion['gene_align_strand1'])
+          alignments2 = "%s%s%s" % (fusion['genomic_strand2'], fusion['gene_strand2'], fusion['gene_align_strand2'])
+          alignments = "%s%s%s %s%s%s" % (fusion['genomic_strand1'], fusion['gene_strand1'], fusion['gene_align_strand1'], fusion['genomic_strand2'], fusion['gene_strand2'], fusion['gene_align_strand2'])
+          fusion_id = "%s (%s) %s" % (i + 1,alignments,' '.join([str(fusion[x]) for x in report_fields]))
+          for tx_id in fusion['transcripts'].keys():
+            m1 = fusion['transcripts'][tx_id]['match1']
+            f_seq1 = fusion['split_seqs'][0][:-m1].lower() +  fusion['split_seqs'][0][-m1:]
+            t_seq1 = fusion['transcripts'][tx_id]['seq1'][:-m1].lower() + fusion['transcripts'][tx_id]['seq1'][-m1:]
+            if len(f_seq1) > len(t_seq1):
+              t_seq1 = t_seq1.rjust(len(f_seq1),'.')
+            elif len(f_seq1) < len(t_seq1):
+              f_seq1 = f_seq1.rjust(len(t_seq1),'.')
+            m2 = fusion['transcripts'][tx_id]['match2']
+            f_seq2 = fusion['split_seqs'][1][:m2] +  fusion['split_seqs'][1][m2:].lower()
+            t_seq2 = fusion['transcripts'][tx_id]['seq2'][:m2] + fusion['transcripts'][tx_id]['seq2'][m2:].lower()
+            if len(f_seq2) > len(t_seq2):
+              t_seq2 = t_seq2.ljust(len(f_seq2),'.')
+            elif len(f_seq2) < len(t_seq2):
+              f_seq2 = f_seq2.ljust(len(t_seq2),'.')
+            print >> outputTxFile, ">%s\n%s\n%s" % (fusion_id,'\n'.join(textwrap.wrap(f_seq1,fa_width)),'\n'.join(textwrap.wrap(f_seq2,fa_width)))
+            print >> outputTxFile, "%s bkpt:%d rev_compl:%s\n%s\n%s" % (fusion['transcripts'][tx_id]['full_id'],fusion['transcripts'][tx_id]['bkpt'],str(fusion['transcripts'][tx_id]['revcompl']),'\n'.join(textwrap.wrap(t_seq1,fa_width)),'\n'.join(textwrap.wrap(t_seq2,fa_width)))
+  """
+  if options.peptides and options.orf_alignment: 
+    pass
+  """
+  print >> outputFile,"%s\t%s" % ('#','\t'.join([report_colnames[x] for x in report_fields]))
+  for i,fusion in enumerate(fusions): 
+    print >> outputFile,"%s\t%s" % (i + 1,'\t'.join([str(fusion[x]) for x in report_fields]))
+
+if __name__ == "__main__" : __main__()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/defuse_trinity_analysis.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,55 @@
+<?xml version="1.0"?>
+<tool id="defuse_trinity_analysis" name="Defuse Trinity" version="0.6.1">
+  <description>verify fusions with trinity</description>
+  <stdio>
+    <exit_code range="1:" level="fatal" description="Error" />
+  </stdio>
+  <command interpreter="python">defuse_trinity_analysis.py --input $defuse_results --transcripts $trinity_transcripts --peptides $trinity_orfs 
+  --nbases $nbases --min_pep_len $min_pep_len --ticdist $ticdist --readthrough=$readthrough
+  #if 'matched' in str($outputs).split(','):
+    --matched="$matched_output"
+  #end if  
+  #if 'aligned' in str($outputs).split(','):
+    --transcript_alignment="$aligned_output"
+  #end if  
+  --output $output 
+  </command>
+  <inputs>
+    <param name="defuse_results" type="data" format="defuse.results.tsv" label="Defuse Results file"/> 
+    <param name="trinity_transcripts" type="data" format="fasta" label="TrinityRNAseq: Assembled Transcripts"/> 
+    <param name="trinity_orfs" type="data" format="fasta" label="transcriptsToOrfs: Candidate Peptide Sequences"/> 
+    <param name="nbases" type="integer" value="12" min="1" label="Number of bases on either side of the fusion to compare"/> 
+    <param name="min_pep_len" type="integer" value="100" min="0" label="Minimum length of peptide to report"/> 
+    <param name="ticdist" type="integer" value="1000000" min="0" label="Maximum intrachromosomal distance to be classified a Transcription-induced chimera (TIC)"/> 
+    <param name="readthrough" type="integer" value="4" min="0" label="Number of stop_codons to read through"/> 
+    <param name="outputs" type="select" multiple="true" display="checkboxes" label="Additional outputs">
+      <option value="matched">Matched Fusions Trinity Tanscripts and ORFs Tabular</option>
+      <option value="aligned">Aligned Fusion and Trinity Transcipts Fasta</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="matched_output" metadata_source="defuse_results" format="tabular" label="${tool.name} on ${on_string}: Fusions Trinity Matched ">
+      <filter>(outputs and 'matched' in outputs)</filter>
+    </data>
+    <data name="aligned_output" metadata_source="defuse_results" format="fasta" label="${tool.name} on ${on_string}: Fusion Trinity Sequences">
+      <filter>(outputs and 'aligned' in outputs)</filter>
+    </data>
+    <data name="output" metadata_source="defuse_results" format="tabular" label="${tool.name} on ${on_string}: Fusion Report"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="defuse_results" value="mm10_results.filtered.tsv" ftype="defuse.results.tsv" dbkey="mm10"/>
+      <output name="vcf" file="mm10_results.filtered.vcf"/>
+    </test>
+  </tests>
+  <help>
+**Defuse Results**
+
+Verifies DeFuse_ fusion predictions in results.tsv with TrinityRNAseq_ assembled transcripts and ORFs.   
+
+This program relies on the header line of the results.tsv to determine which columns to use for analysis.   
+
+.. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse
+.. _TrinityRNAseq: http://trinityrnaseq.github.io/
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,28 @@
+<macros>
+    <token name="@DEFUSE_VERSION@">0.6.2</token>
+    <xml name="defuse_requirement">
+            <requirement type="package" version="@DEFUSE_VERSION@">defuse</requirement>
+    </xml>
+    <xml name="mapping_requirements">
+            <requirement type="package" version="0.1.19">samtools</requirement>
+            <requirement type="package" version="1.0.0">bowtie</requirement>
+            <requirement type="package" version="2013-05-09">gmap</requirement>
+            <requirement type="package" version="35x1">blat</requirement>
+    </xml>
+    <xml name="r_requirements">
+            <requirement type="package" version="3.1.2">R</requirement>
+            <requirement type="package" version="2.0.3">ada</requirement>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range=":-1"  level="fatal" description="Error: Cannot open file" />
+            <exit_code range="1:"  level="fatal" description="Error" />
+        </stdio>
+  </xml>
+  <xml name="citations">
+      <citations>
+        <citation type="doi">10.1371/journal.pcbi.1001138</citation>
+        <yield />
+      </citations>
+  </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mm10_results.filtered.tsv	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,46 @@
+cluster_id	splitr_sequence	splitr_count	splitr_span_pvalue	splitr_pos_pvalue	splitr_min_pvalue	adjacent	altsplice	break_adj_entropy1	break_adj_entropy2	break_adj_entropy_min	breakpoint_homology	breakseqs_estislands_percident	cdna_breakseqs_percident	deletion	est_breakseqs_percident	eversion	exonboundaries	expression1	expression2	gene1	gene2	gene_align_strand1	gene_align_strand2	gene_chromosome1	gene_chromosome2	gene_end1	gene_end2	gene_location1	gene_location2	gene_name1	gene_name2	gene_start1	gene_start2	gene_strand1	gene_strand2	genome_breakseqs_percident	genomic_break_pos1	genomic_break_pos2	genomic_strand1	genomic_strand2	interchromosomal	interrupted_index1	interrupted_index2	inversion	library_name	max_map_count	max_repeat_proportion	mean_map_count	min_map_count	num_multi_map	num_splice_variants	orf	read_through	repeat_proportion1	repeat_proportion2	span_count	span_coverage1	span_coverage2	span_coverage_max	span_coverage_min	splice_score	splicing_index1	splicing_index2	probability
+8647	GTGCTCCTGCTGCCAGGCGCAGCTGGGCGACATTGGCACGTCCTGTTACACCAAGAGCGGCATGATCCTTTGCAGAAATGACTACATTAGGT|GAAGATTGTAAAAAATTGACATCAGAAATATTTACAGAAATAGATACCTGTTTGAATAAAGTTAGAGATGAAATTTTTGCTAAACTTCAACCGAAGCTTAGATGCACATTAGGTGACATGGAAAGTCCTGTGTTTGCACTTCCTG	4	0.849232794977309	0.875860929877954	0.794775556794258	N	N	3.72551845106187	3.02448896101185	3.02448896101185	1	0.0366733649981732	0	N	0	N	Y	2482	2085	ENSMUSG00000028266	ENSMUSG00000041264	+	-	3	5	144205220	149215434	coding	coding	Lmo4	Uspl1	144188530	149184350	-	+	0	144201813	149198645	-	-	Y	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	5	0.831776131589327	0.982288003019777	0.982288003019777	0.831776131589327	4	-	-	0.950339354539546
+12095	CTTCCAGGGTCCCCCGAGCCTAATGGATGCCGAGACAGACGAGGGCATGGACTATACAGGCTGTAGCCCTGGAGCGGCGTCCTCAGAGTCTTCCACCATGGACCGTAGCTGTTCCAGCACCC|CTGGCCCTTGACATCTAGCACCCCTTCACCCTCTTCCTGGGGACCCAGCAGGTGGTATGTGGCCGTGGAGCCCTCCGGGCTGTGGCTGTCCTTCCCAGGAGAGGATGACGTAGACTCGTTGCTGACAGGGGAGATGTCACTGCTGC	6	0.869976758916331	0.907802910133282	0.774396849342807	Y	Y	3.64400585547602	3.28189243439864	3.28189243439864	0	0.983667499542934	0	Y	0	N	N	0	1453	ENSMUSG00000086606	ENSMUSG00000028975	-	+	4	4	148948914	149099876	intron	coding	Gm13205	Pex14	148947492	148960535	-	-	0	148948907	148961548	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	5	0.760481034595956	1.02189639023832	1.02189639023832	0.760481034595956	4	-	-	0.976704594819493
+4068	CGACACGCCGGGGCTGGCTGAGGAAAACAAAACGAAGCCCCTGGAGACCCGGCTTATCTCAGAGACCACAGCTATTTGCAAACCAGAGCAGGTGGCCAAACAAATTGTCAAAGATGCCATA|TACCGTTCCCCAGCTGAAGAGTTCTGAATCCACGCCGGATCCTTCTCAACAGTCTGTTTTACGGGAACTTTTATTAACCACTCCTTCCCCGTGATGCAGTTCTGAATCCTCCCTGTAGCAGGGGGTCTTCACTCATGCCTGAAGATGTTTCTTTTCC	8	1	0.541521196503195	0.26082452496772	N	Y	3.46128890676658	3.81976875220098	3.46128890676658	2	0	0	N	0.794128973014604	N	N	775	35409	ENSMUSG00000009905	ENSMUSG00000022816	+	-	1	16	106759742	37836514	coding	intron	Kdsr	Fstl1	106720410	37776873	-	+	0.00358422939068104	106734547	37799068	-	-	Y	-	-	N	dataset_6344_files	4	1	1.81818181818182	1	4	1	N	N	0	1	11	0.974366325576069	1.17240826166877	1.17240826166877	0.974366325576069	4	-	-	0.913877182950088
+12868	GCGGTCTCGGCTCCAGCGGCAGTAGCAGCGGCGCCGGTCCCGTGTGCAGGAGCTCCTTTGCGGCCCAGTTTCTTGGCCATCGCCTGCTCTCCCCACAGCGCCAGGACGAGTCCCGTGCGCGTCCGTCCGCGGAGGTCTTTCTCATCTCGCTCGGCTGCGGGAAATCGGGCTGAAGCGACTGAGTCCGCGATGGAGA|AAACTTTAGAAACTGTTCCTTTGGAGAGGAAAAAGAGAGAAAAGGAAAACTT	69	1	0.439895614069599	0.332872660216425	N	Y	3.30124852419771	2.96787791690762	2.96787791690762	0	0.0997837503861599	0.00401606425702794	N	0.540315106580167	N	N	41631	0	ENSMUSG00000004980	ENSMUSG00000085456	+	+	6	10	51469894	73327027	coding	intron	Hnrnpa2b1	Gm15398	51460434	73201399	-	-	0.0997837503861599	51467295	73201702	-	-	Y	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	41	1.14864322933764	0.720872647377417	1.14864322933764	0.720872647377417	1	-	-	0.520098627306064
+5160	TACGGATTCATTCAGTGTTCAGAACGGCAAGCTAGACTTTTCTTCCACTGTTCACAATATAATGGCAACCTCCAAGACTTAAAAGTAGGAGATGATGTTGAATTTGAAGTATCATCTGACCGGAGGACTGGGAAACCTATTGCTATTAAATTGGTGAAGATAAAACCAGAAATACATCCTGAAGAACGAATGAACGGAC|AAGAAGTATTTTATCTGACTTACACCCCTGAAGATGTGGAAGGGAAAGTTCAGC	33	1	0.767323324767952	0.471361574015707	N	Y	3.3022815939676	3.55553920790569	3.3022815939676	91	0.944663167104112	0.00393700787401585	N	0.944663167104112	N	N	10681	0	ENSMUSG00000068823	ENSMUSG00000087940	+	-	3	18	103058189	28309891	coding	upstream	Csde1	SNORA17	103020546	28309760	+	+	0	103040040	28188917	+	-	Y	-	-	N	dataset_6344_files	2	0	1.03333333333333	1	1	1	N	N	0	0	30	1.1011131646754	0.67334258271517	1.1011131646754	0.67334258271517	3	-	-	0.835626866413202
+8748	GGGGTAGATCACCTTCCGAGGGTCTCCATGGGTCCAGGCTATGATGCCCACAGCCACATAGCCTACGATGGCCAGGAAGAGCAACACACAGCAAATGACATCTGTGCATCCCCTGTTGTAAATGGGTCCTTTGAAGGTGGGGTCGTATTTCTGAGGCGTCCC|ATAGACTGCGTCCTTCCGATCGTCCTCCATGGCCTCAACCGAGGAGAGCTGAGTCCGAAGCCAGCGCGACCCCAACCCAAGCGGGCGGGAGACACCGCGCGCTGCTGGCCCCGCC	51	1	0.844316325604616	0.871821260113135	Y	Y	3.29447165365238	3.73336824417043	3.29447165365238	2	0	0	Y	0.991335627150454	N	N	6343	2	ENSMUSG00000057193	ENSMUSG00000003309	-	-	9	9	21355025	21312333	coding	upstream	Slc44a2	Ap1m2	21337828	21295457	+	-	0	21337962	21320850	-	+	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	52	1.3070767782118	0.958522970688653	1.3070767782118	0.958522970688653	2	-	-	0.74403558965543
+11169	CCACATCTGACAGAACTTGCCACTGTGCCTGCAACCTTGTCTGAGAGGAACCCTTCTCTG|AGGATGGACACTTCTCACACTACAAAGTCCTGTTTGCTGATTCTTCTTGTGGCCCTACTGTGTGCAGAAAGAGCTCAGGGACTGGAGTGTTACCAGTGCTATGGAGTCCCATTTGAGACTTCTTGCCCATCAATTACCTGCCCCTACCCTGATGGAGTCTGTGTTACTCAGGAGGCAGCAGTTATTGTGGATTCTCAAACAAGGAAAGTAAAGAACAATCTTTGCTTACCC	410	1	0.87136611677351	0.978590093310325	Y	Y	3.45293525254824	3.55976477294109	3.45293525254824	0	0	0.152910958904109	Y	0.966780821917808	N	Y	1416	5049	ENSMUSG00000079018	ENSMUSG00000075602	+	-	15	15	75048837	74997634	utr5p	utr5p	Ly6c1	Ly6a	75045017	74994878	-	-	0	75048442	74996568	-	+	N	-	-	N	dataset_6344_files	2	0	1.53082191780822	1	155	2	Y	Y	0	0	292	0.728794324821125	1.49719703686079	1.49719703686079	0.728794324821125	4	-	-	0.93076564160702
+12600	CTAAAATCGCCAAGCCTGTCAAGTTTGAGCTTTCTGGCTGCACCAGTGTGAAGACATACAGGGCTAAGTTCTGCGGGGTGTGCACAGACGGCCGCTGCTGCACACCGCACAGAACCACCACTCTGCCAGTGGAGTTCAAATGCCCCGATGGCGAGATCATGAAAAAGAATATGATGTTCATCAAG|CCCTGCCCCTGCCATTACCACTGTTCTGGGGTCCCTGGCATGCTCCCATTAC	1	1	0.820229055260519	0.873212995139997	N	N	3.28548615724827	3.17936818462789	3.17936818462789	0	0.482950872656755	0.482950872656755	N	0.482950872656755	N	N	7298	2735	ENSMUSG00000019997	ENSMUSG00000015133	+	+	10	7	24598683	66388350	coding	intron	Ctgf	Lrrk1	24595442	66226912	+	-	0.482950872656755	24597524	66355922	+	-	Y	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	5	1.02981806768202	0.41192722707281	1.02981806768202	0.41192722707281	3	-	-	0.58275607356487
+1855	CCTGGTCACACAGCTCTCCTAGGAAGCGCGTCTCCAATGACTCGTTTGTCAACCCCAGGTGTCTTCCACGTTGTGGTTTCAACTTCATAATTCTCTGAAGTCTTATTCAT|CTCTTTGTCAAGCCCACGGCGTCAGCCTCTGACACGAGGGCGGGCGTCCTCGCCTCCGGGGTGAAAGAGGGGCGCAGCGGGCCGCCCCTCCCCCCCGCCCCCCTACGACACGCGGGGCCCTGCCTCGGGCGACCTGTTGGCAGGGCGCGTCACGTGACGCGGCGGGCCGCGCCGTCCCC	4	0.769054178023969	0.159736089157399	0.42467594723228	Y	Y	3.40528677690917	3.51935717616377	3.40528677690917	0	0.981880877742947	0	Y	0.0487460815047023	N	N	0	8476	ENSMUSG00000097162	ENSMUSG00000039361	-	-	7	7	90129474	90209447	intron	upstream	AC130210.1	Picalm	90124858	90130232	-	+	0	90125032	90129872	+	-	N	-	-	N	dataset_6344_files	1	0.23489932885906	1	1	0	1	N	Y	0	0.23489932885906	9	0.887227873695282	1.18032993911247	1.18032993911247	0.887227873695282	4	-	-	0.949188379753168
+1870	CTGGGTCAAGAGCCGGAGGGACAGGACCAGAGCACCCCTTACGCCAGAACTAGCTCTCCTTGTTCCTACTGGGTGACCTCATCTCGCCACGCCTCCTCAGGTGAACACCCGGGCTGGTAACGTCACTTCCTGC|CAGGGTTTCACTATGTAGACCCTGGTCGGCCTGGAACTCTATAGACCAGACTGGCCTCGAGCTCAGATCCGTCCCCCTCTGCTGTCCCAGCACGGGGATTAAGAACGCGCCACCACTACAGCTGACCGGA	2	0.788660184540219	0.550050944563454	0.182990959521483	Y	Y	3.63552422018169	3.63193049733206	3.63193049733206	4	0	0	Y	0	N	N	1543	697	ENSMUSG00000044786	ENSMUSG00000003444	-	+	7	7	28379255	28392708	downstream	intron	Zfp36	Med29	28376784	28386146	-	-	0	28376683	28392166	+	-	N	-	-	N	dataset_6344_files	1	0.869158878504673	1	1	0	1	N	Y	0	0.869158878504673	8	1.08526980978798	0.847619486476743	1.08526980978798	0.847619486476743	1	-	-	0.841318537051835
+3153	CAGAGCTATGTAGAAAGACCCTGTCTGGTAAGTAAATAAAAACATAGCCAGGCATGGTGGCAATCAGCAGGTAGATTGGAGTTTGAGGTCATCCTGGTCTGGAGAGTGAGTTCCAGGAGAGCCAAGATTACACAAACCC|TGTCTTTTTCTTTCTTTCTGTTGTTGTTGTTGCTGTTCCTGCTGCTGCTGCTGTTTTGCTTTTCATGACAGGGTTTCTCTGTGTCTCTGTGCAACTTTGGCTATCCTGGAATTCACACTGTAGACCAGGCTGGCCTTGAATTCACACAGATCCATCTG	16	1	0.656277930324132	0.671834466829468	Y	Y	3.40740634961144	2.41484814640267	2.41484814640267	0	0.9928298971561	0	Y	0.985659794312201	N	N	0	30	ENSMUSG00000097379	ENSMUSG00000047786	-	-	17	17	17395303	17459387	intron	intron	AC154200.1	Lix1	17389943	17402672	-	+	0	17395298	17411818	+	-	N	-	-	N	dataset_6344_files	1	0.709459459459459	1	1	0	1	N	Y	0.685314685314685	0.709459459459459	25	1.13279987445023	1.17240826166877	1.17240826166877	1.13279987445023	1	-	-	0.81041504882724
+11596	GCCGGAGCAGATCAGGCTGAAAGTTGGTGGTGTGGACCCAAAGCAGCTAGCCGTCTATGAAGAGTTTGCACGAAATGTGCCTGGCTTCTTACCTACAAATGACTTAAGTCAGCCTACAGGGTTTTTAGCTCAGCCCATGAA|GTTTCTGGATCAAGATGTGAGCTTTCAGCTGTTGCTTGAGCATCATGCCTGCCTGCCTGCCACCACGCTCTCGGCCAGGATAGTGATGGATTCCTTCCCTCGGACTATAAGCCCCAAATGAACCCTTCCTTCTATGAGTTGCCT	8	0.974101244860941	0.684001003266195	0.530048089286902	N	Y	3.49640736351508	3.32783191248938	3.32783191248938	0	0	0	Y	0	N	N	6004	544	ENSMUSG00000036550	ENSMUSG00000036564	+	+	8	8	95807462	95715119	coding	intron	Cnot1	Ndrg4	95719451	95676980	-	+	0	95739809	95682960	-	+	N	-	-	N	dataset_6344_files	1	1	1	1	0	1	N	N	0	1	16	1.12487819700652	1.17240826166877	1.17240826166877	1.12487819700652	1	-	-	0.873952352979217
+3783	CTCTGTTCGTGCATCCCTGGGATATGCAGATGGATGGACGAATGGCCAAATACTGGCTGCCTTGGCTCGTAGGTTTGTGCTGGACTAGGGTTGGGAACACAGCACCAACTCTTGGGTTTTTTCTGTTATCCATGGAATTCTGTTCT|TTTCTTTCCGAGTGACCCCCACTTTACGGCGTCTCCCAATGACAAGAGGCGCAGAGCATTGTGGTCCCGCGGGTCCGAGA	2	0.867750583125795	0.425502304916607	0.692249690893423	Y	Y	3.22804973003685	3.42048008240514	3.22804973003685	0	0	0	Y	0.987555066079295	N	N	0	752	ENSMUSG00000026348	ENSMUSG00000026349	+	-	1	1	127767564	127808061	intron	upstream	Acmsd	Ccnt2	127729413	127774164	+	+	0	127753955	127773930	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	6	1.02189639023832	0.649577550384046	1.02189639023832	0.649577550384046	3	-	-	0.711778687837589
+4912	TAACAGAACAAGAGCAATGTGCTAGAATAGAAGACCAGAAAATGAAATGGTGGAGTTTGA|GGCTGGATAGACAGTTTGAAAGGTAAGTATTGAAAAACACTTGAATTTGGGTCAGTACAAAGGGACATGCAGAGACTTTGAATCATCAAAACTCCAGCATGCATTGTCTTACGGATGTTTAGATAGGTGTGTTTTGGACAACACTCTGGGTTCTTGTAATGATGTTGATCAAATGTCTGAG	15	1	0.895469718833858	0.922600375157455	N	Y	3.29868787111796	3.36447795254773	3.29868787111796	0	0.933608815426997	0	N	0.933608815426997	N	N	0	0	ENSMUSG00000088422	ENSMUSG00000053332	+	-	5	1	79639109	161038539	downstream	intron	7SK	Gas5	79638815	161034422	-	+	0.933608815426997	79638354	161036831	-	-	Y	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	5	0.459457291735057	0.808011099258204	0.808011099258204	0.459457291735057	1	-	-	0.592482164323654
+5141	TCTTATCTCTGTGTTGGTGTGCTTCTCTGTTTGATGACAGAGCAGGGTCTTGCTGAACTGCACCTACTGGAGTAGGCAACTGTTTACCAATCCAGTCATATTCATAATCAAACATATACCCTTTTCGATCAAACAAGTCAGTAAAAAGCTTTCTTAGGTAATCATAGTCTGGCTTTTCAAAAAAATCCAGCCTTCGTACGTAACGGAGAT|ACGTTGCCATTTCTGGGAAGTTCTCACACAACACTTCTATTGGTGTGGCTCGTTT	9	1	0.853164221793603	0.997416520640376	N	Y	3.60831833727406	3.59847695253922	3.59847695253922	266	0.981886534518113	0.981886534518113	N	0.981886534518113	N	N	2227	0	ENSMUSG00000073563	ENSMUSG00000083798	-	+	18	X	53955684	53418967	coding	intron	Csnk1g3	Gm14584	53862113	53418331	+	+	0	53932206	53418862	-	+	Y	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	17	1.2040949714436	0.990209680463485	1.2040949714436	0.990209680463485	2	-	-	0.788432447586541
+1886	CGCGGGGCGCCCGCCGGCTCACACTCCCGCGCTCTCTCTCCGGGTTTGGCGGCCGCCAGGAGGAGGAAGAGGAGGAGGAGGAGAAGAAGAAGGAGGAGGAGTGGAGCGAGCGCAGAAG|TAGCTGCTGCTGCTGGTGGTGACAATGTCAAATAACGGCGTAGACATCCAGGACAAACCCCCAGCCCCTCCGATGAGAAACACCAGCACTATGATTGGAGCCGGCAGCAAAGACACTGGAACCCTAAACCACGGCTCCAAACCTCTGCCTCCAAACCCAGAGGAG	18	1	0.708948431161152	0.783431002546196	Y	Y	2.65454686157692	3.52101274054962	2.65454686157692	0	0.983110527572213	0	Y	0.983110527572213	N	N	16	2476	ENSMUSG00000042797	ENSMUSG00000030774	-	-	7	7	97738247	97912381	upstream	utr5p	Aqp11	Pak1	97726379	97842935	-	+	0	97788974	97854436	+	-	N	-	-	N	dataset_6344_files	1	0.366666666666667	1	1	0	1	N	Y	0.366666666666667	0	18	0.950601293244945	1.29915510076809	1.29915510076809	0.950601293244945	4	-	-	0.970255256454431
+9297	GTGCTGGGCAGGAAGTCCCGGGCCAGGCAGCCCATGGCCACCAGATTCTTATCAGACAGGGGGCTCTCGCAGGAGACGAGGGGGAAGACATTTGGGAAGGACTGACTCT|CATTTGCGGTGCCTGGTTTCGGAGAGGTCCAGAGTCTTTGTGTGGAATTGTTCCTTCAAAGCCACCGAGGCTGGCTGGTCCATGAGCAGCCAGGTGGATGGGTGGCAGAAGCC	20	0.318065745640931	0.64026046146514	0.56228459372928	N	Y	3.05166433869594	3.43614916339873	3.05166433869594	0	0	0	Y	0.990866828485621	N	N	1488	0	ENSMUSG00000076617	ENSMUSG00000092748	-	+	12	12	113422730	113425227	coding	upstream	Ighm	AC073553.3	113418950	113425150	-	-	0	113422730	113426655	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	6	0.665420905271462	0.879306196251575	0.879306196251575	0.665420905271462	4	-	-	0.831715391282697
+5326	GTACACTGTAGCTGTCTTCAGACACACCAGAAGAGGGAGTCAGATCTTGTTACGGATGGTTGTGAGCCACCATGTGGTTGCTGGGATTTGAACTCTGGACCTTCGGAAGAGCAGTCGGGTGCTCTTACCCACTGAGCCATCTCACCAGCCCC|GAGTTACACAGTTTTAATGACTTCCTTACTTCTCATGATCTCTATACTGTTTAACTTTCCTCGGTGTGTTTTTAAGTCTTTGCTATCTTGTTCTAGTTTTTTGCAAGAATCACGAAAATGATTCTTGGATTTCCAGACTCTTCCTTTTGAAAGCTG	7	1	0.607242467201396	0.755062684986217	N	Y	3.49744043328497	3.49528100778935	3.49528100778935	9	0.960654062340317	0	N	0.967211718616931	N	N	2963	0	ENSMUSG00000039671	ENSMUSG00000073647	+	+	2	18	165899016	4198969	utr3p	downstream	Zmynd8	Gm10557	165784152	4198216	-	-	0.00979390223130638	165827729	4198107	-	-	Y	-	-	N	dataset_6344_files	476	0.993464052287582	298.461538461538	168	13	1	N	N	0.993464052287582	0	13	1.21201664888731	1.34668516543034	1.34668516543034	1.21201664888731	1	-	-	0.536685629613134
+1721	CGTACCTCCCTCCCAGCAACCGGCCTGGCGGCAGCGCGGCTACAAAACTGAGGAGGCGGAGCCGAGACGGAGTCGGTACTGCGCTCTGACTCCTAGACCAGGTTTAAGTTTTTGAAGTTGAAGTAGGTCTACACAGTAGGAACCCATGTCTTTTCTTGTAAGTAAACCAGAGCGCATTA|GGGCCAATGAGGCGAGCTCAGAGTCCATAGCATTGTTCTCCAAACCA	8	0.903915350299452	0.697638501162383	0.785699237375327	N	Y	3.65384724021086	3.68527868815389	3.65384724021086	142	0.936451401255975	0.343331146311745	N	0.936451401255975	N	N	800	0	ENSMUSG00000069631	ENSMUSG00000087034	+	+	11	5	106202168	60232198	coding	upstream	Strada	Cbfa2t2-ps1	106163330	60231482	-	+	0	106187103	60176541	-	+	Y	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	5	0.879306196251575	0.475300646622473	0.879306196251575	0.475300646622473	1	-	-	0.738431070020728
+1027	GCGGCAGCCGGTTCGGGCGGGCGGCATCATGGACGAGAAGTTGTTCACCAAGGAGCTGGACCAGTGGATCGAGCAGCTGAACGAGTGCAAGCAGCTCTCCGAGTCCCAGGTCAAGAGCCTCTGCGAGAAG|TGCTGCCTTTGACAGAGATGACATGCTTATACCATGCGGGTGGCACGAAGCTGTGAAGTGGTGATGACGGGGATGAGCTTGGACATCCTACGAGAGTGGCAAAGGTGAAGCAAGCCCAGGTGCTGGCTGTGCAAGG	2	1	0.148625497007208	0.21082086276253	N	N	3.33524062522047	3.43039601505348	3.33524062522047	3	0	0	N	0	N	N	10063	1785	ENSMUSG00000020349	ENSMUSG00000035021	+	+	11	12	52127778	55014348	coding	intron	Ppp2ca	Baz1a	52098681	54892989	+	-	0	52099150	54980379	+	-	Y	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	10	0.942679615801238	1.07734813234427	1.07734813234427	0.942679615801238	4	-	-	0.83438414122479
+7746	AAAGGGAGTCGAGACTGCCTTCTGCGCGCGCCCGGCTTTGCGCGCCTCCGCCACCAGATGTGGGGGGATGGGAGGCCCCCTCCGCGGCCCCTTCCCCACCCAGCCCAGAAAGCTGAACTGGCAAG|AGGCAATTTGAAACAAGCCACTCCAACCTCTTTTTCAAAGTTATAGGAGGTTCCCTGTCTTGAAGTCTCCTGCCTTGGATTTTCTGAGGTGCTGCTATTCTGGGCAACTATCAAAATCCTACCTGTTAAAACATGATGGATTAGAGAAAAAAAACAACCCAC	26	1	0.707412664952061	0.861438628736582	Y	Y	3.14574680596571	3.22961716614005	3.14574680596571	0	0	0	Y	0	N	N	0	0	ENSMUSG00000087658	ENSMUSG00000087626	+	-	6	6	52162119	52174059	intron	intron	Gm15051	Gm15050	52156902	52172881	+	+	0	52158684	52173634	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	20	1.02189639023832	1.23578168121843	1.23578168121843	1.02189639023832	2	-	-	0.746044848941682
+8762	GGGGGCGGGGCTGAGCGCGGCCGCAGCCATTTTGGTGGAAGAGAAACAATAGGACGGAAGCGTCGCGGGACTGGGCTGTGGCCGCAG|AGTGTCCTGGCCTGCACAAACCGAGGAGCTGAGATCAAACAGGTGGCTGTAAGGACAGACAGTGAACGGAGGGCAAGCCGGCCTCTGGACCCCGCTGCCTCCCCTTTCTCCCTGCTGCTCGTGTCCAGAGGATGAGCCCAGCCTTCAGGACCATGGACGTGGAGCCCCGCACCAAGGGCATCC	28	0.498252441818171	0.890426951441059	0.30351938174741	Y	Y	3.55041790174621	3.58424216889964	3.55041790174621	0	0.977096322687365	0.0380455528693218	Y	0.977096322687365	N	N	0	714	ENSMUSG00000088626	ENSMUSG00000032599	-	-	9	9	108795388	108806333	downstream	utr5p	SNORA28	Ip6k2	108795243	108795994	-	+	0	108783870	108796064	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	8	0.705029292490001	1.27539006843697	1.27539006843697	0.705029292490001	4	-	-	0.953988239876003
+13800	GATTGTTAGGGATGGGTCCTTGGTCAGCTGTCCAGAATGCTAGAGCTTCGTCCTCCTGGGAGATGGTTTCAGTCCTTACCCCCAGGACTCTGATGAGATCCTGAGGGAATCCACCCTCTGCTGATGGCCCAGTGAAAGCC|AGGCTCCGTGGCTTTACAGTGGATGGGATCTGGGAGAGTAGAGGGAAGGTTCTAGAACCCTGAAACCAGACCACTCTATTAGCGAACTCACAGCTGCCTTGTGGTGTAGAC	3	0.978409273957491	0.969552944230377	0.898574822628826	N	Y	3.329991337985	3.51679652308403	3.329991337985	0	0	0	Y	0	N	N	575	443	ENSMUSG00000053799	ENSMUSG00000024987	+	-	19	19	37683245	37701528	intron	upstream	Exoc6	Cyp26a1	37550418	37697808	+	+	0	37678397	37696729	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	12	0.839697809033035	0.855541163920451	0.855541163920451	0.839697809033035	1	-	-	0.80116231284735
+5983	CCATGTCAAACCACCATCCACGGCTGTCGTCCTCGAAGTGCAGGTAGTCCATCCTGTGAGCGCCGTGCGGGAACTGCCTCCGTGTCACTGGGGCGGCGCGCCTGTGGAAT|CCTCCGAAGAGATGGAATCCTTTCCTGCAGCTCGGCAAGGGCCACTTCGCAGAGCTGGATTTCTGAAAGCTTTGCTTGATTTTCAAATATTCTTTAGTAAAGAATGTCTTTGTGGCATTGTTC	1	1	1	0.945022483912395	Y	Y	3.49015970162987	3.55334927718654	3.49015970162987	0	0.990947940947941	0.00427350427350426	Y	0.990947940947941	N	N	0	60	ENSMUSG00000021718	ENSMUSG00000021720	+	+	13	13	105121782	105271039	downstream	intron	4933425L06Rik	Rnf180	105082122	105149352	+	-	0	105131562	105167527	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	5	0.78424606692708	0.974366325576069	0.974366325576069	0.78424606692708	1	-	-	0.813708699275449
+12092	TGCCCAATGAGCTGCTGGCACTCAGCACAGGTGTTGGCGAAGGTGTTGTCATAGCACGGAACGCAGTAGGGGCCACTGTCTGTCTGGATGTATTTGCGGCCGTACAAGGACTCGTTGCATTTTGCACAGTCAAATGCCTCGCTCATGGTGGCGGTGCCCAGTGAGC|CCTCAAACTCAAGAAGCCCCATCTCAGTCGGTCTTCTTACTTTGCAAGAGTTTTCAAAGGACTGCGCTGGGTCTCTTCTGCCAAGCGGCTCATGGCTTCCTCTGGGTGCTGAATCATTCTCTGGTGCCTGCAACCACAAGACCTCCTTCC	2	1	0.16313785091306	0.240986946418107	Y	Y	3.43784458057793	3.48805370929674	3.43784458057793	0	0.99335436382755	0.00315457413249232	Y	0	N	N	2072	0	ENSMUSG00000032643	ENSMUSG00000088067	-	-	4	4	124708611	124697607	utr5p	downstream	Fhl3	U6	124700701	124697505	+	-	0	124705614	124696071	-	+	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	9	1.21993832633101	1.18825161655618	1.21993832633101	1.18825161655618	1	-	-	0.736022708835765
+11605	CTCCACACCACAGCGTCACCGATGTCCAGCGCTTTCGAGCCGGCACACAGCTGTTGCAGAAGACTACAGGGGTGGGATCCGAGCTGGGAATGTAGAAGGAGGAACAGCGGCCAAAACAGAGATGATTAAGGACCCGGGCACTTGTGCAACCAGGCCTGGAGATCACCTGC|GCCGTACGGACTAAAGCAGCGCGGCGCTCCTCCGCTCCCCGGCCGGAGGCCCCCGGTGTTTCCGCCGCGCAGGCAGCGCCGTAGCCAGCCCCGCTGCCGCGAGGACCCACAGCCAAG	11	0.982340211125991	0.568397069242315	0.586452494205497	Y	Y	3.57167229721571	3.28292550416853	3.28292550416853	0	0	0	Y	0.770032051282051	N	N	167	2428	ENSMUSG00000053226	ENSMUSG00000033751	-	-	8	8	84822823	84835482	intron	utr5p	Dand5	Gadd45gip1	84815405	84831522	-	+	0	84816537	84832174	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	12	1.26746839099326	0.990209680463485	1.26746839099326	0.990209680463485	2	-	-	0.758045795414889
+7456	AAATTCTGGAGTCAGTTCTGGAGACATAGATAGCTCCCAAATTATAACCAACCCTCTTCCTCCCGTGGCCTCCCCTCCTCCTGCATCTAAAGCAAAGGAAGTTTCCGATGGGGAAAATCTCGAGCAAGATCTATGTACGTTCTTGATATCAAGAGCCTGTAAGAACTCAACACTGGCTAATTATTTATACTGG|TCACTTTTACTCTGCTATTCCCCTATCTATGCTCCGAAAATCATTCTGACTCCTGCAATGACCCCTATTACCCAGATTTTGAAGATACTCCCTGCTATCTACTCAGCTCTGAAGACTTG	9	1	0.674019218743678	0.827309111200933	N	N	3.64400585547602	3.3012485241977	3.3012485241977	0	0	0	N	0	N	N	1791	0	ENSMUSG00000033628	ENSMUSG00000056822	+	-	18	16	30348126	19487830	coding	downstream	Pik3c3	Olfr166	30272747	19486793	+	+	0	30311220	19493776	+	-	Y	-	-	N	dataset_6344_files	1	1	1	1	0	1	N	N	0	1	10	1.37837187520517	0.942679615801238	1.37837187520517	0.942679615801238	1	-	-	0.745013975183866
+2385	CGGGACGGGGCGGGGCCGGCGAACTTCTGTGCCTCACTGTCCCCGGACACTGAGGGACACCGGGCAGGCAGCTGGCACCATGAAGATCTGGACTTCGGAGCACGTCTTTGAC|TACATCCGGTTTTCCCAGATCTGTGCAAAAGCAGTGAGGGATGCCCTGAAGACCGAGTTCAAAGCGAACGCTGAGAAGACTTCGGGCAGCAGCATAAAAATTGTGAAAGTCTCGAAGAAGGAGTAGCTGAATCTGAAGCCTGAAGTGCTGAGTCTTGAA	7	1	0.168340481582645	0.162692865850249	Y	Y	3.69784855983781	3.60831833727406	3.60831833727406	0	0	0	Y	0	N	Y	3085	3617	ENSMUSG00000016257	ENSMUSG00000016252	+	-	2	2	174473081	174464105	coding	coding	Slmo2	Atp5e	174465067	174461072	-	-	0	174472832	174462629	-	+	N	-	-	N	dataset_6344_files	2	0.190909090909091	1.2	1	1	1	Y	Y	0.190909090909091	0	5	0.871384518807867	1.29915510076809	1.29915510076809	0.871384518807867	4	-	-	0.7622459420951
+13923	AGACTGTTGAGAAGGATTCAACTGCCGAATTCAGAACTCATCAGCTGGGGAACGACGGTGATAAAGGTTCCCGTAAAGCAGACTGTTGAGAAGGATTCAACTGCCGAATTCAGAACTCATCAG|CCAGAGTCGGCGCTCTCCGGCGAGCTATCCCCTTCTCACCACACTCTGAGAACGGAGCTTGGTGCCGGCTCGGCCGCCTCCGCCAATTCCGGGTCCCTCTTCA	4	0.0503617839630236	0.900541946891923	0.647988756014038	N	Y	3.6988816296077	3.47080361183081	3.47080361183081	0	0.980668063812497	0	N	0.980668063812497	N	N	590	367	ENSMUSG00000046079	ENSMUSG00000051671	-	-	5	8	105832436	126425434	intron	upstream	Lrrc8d	1810063B05Rik	105699969	126422501	+	+	0	105728723	126422269	-	-	Y	-	-	N	dataset_6344_files	10	0.968503937007874	8	4	7	1	N	N	0.968503937007874	0	7	0.974366325576069	0.617890840609215	0.974366325576069	0.617890840609215	4	-	-	0.812231785796686
+11177	ATCTGACAGAACTTGCCACTGTGCCTGCAACCTTGTCTGAGAGGAA|CCCTTCTCTGAGGATGGACACTTCTCACACTACAAAGTCCTGTTTGCTGATTCTTCTTGTGGCCCTACTGTGTGCAGAAAGAGCTCAGGGACTGGAGTGTTACCAGTGCTATGGAGTCCCATTTGAGACTTCTTGCCCATCAATTACCTGCCCCTACCCTGATGGAGTCTGTGTTACTCAGGAGGCAGCAGTTATTGTGGATTCTCAAACAAGGA	434	0.873359569934578	0.121885088610831	0.552181129473186	Y	Y	3.55081912933034	3.43784458057793	3.43784458057793	0	0	0.00381679389312994	Y	0.956687686691006	N	Y	1416	5049	ENSMUSG00000079018	ENSMUSG00000075602	+	-	15	15	75048837	74997634	utr5p	utr5p	Ly6c1	Ly6a	75045017	74994878	-	-	0	75048442	74996568	-	+	N	-	-	N	dataset_6344_files	2	0	1.65079365079365	1	41	2	Y	Y	0	0	63	0.665420905271462	1.36252852031776	1.36252852031776	0.665420905271462	4	-	-	0.566839236883877
+3839	GTCACAGCCACCAATGTGTCAGCCCATGGAAGCCAAGCTAACTCGCCCTCTACTCCCAACTCAGCGGGTGGATACCCTTCGCCATGTTATCAGCCAGACAGGAGGATACAGTGACGGACTCGCAGCCAGTCAGATGTACAGTCCGCAGGGCATCAGT|GATATGTCAAGAACCTACTGATCCTCACAAGAACCTACTGTCTCTCTTCTCTTGACTGAAAACAAAAGTCTTCTTCTACCTGACCCGTGGCCTGACTTCTGGAAGAATGCATATGGACTTTCGAAGAAGTCAGAGGATATCTGCTGGCCACTTGTATCAGACAAAACAAGGCTGGTGAGCA	13	1	0.804385830976137	0.0961220231684031	Y	Y	3.42779550918039	3.49697642496855	3.42779550918039	0	0	0	Y	0.993649362117881	N	N	3895	0	ENSMUSG00000052534	ENSMUSG00000093538	+	+	1	1	168432270	168122465	coding	intron	Pbx1	Gm20711	168153527	168115244	-	+	0	168183530	168121480	-	+	N	-	-	N	dataset_6344_files	1	0.745098039215686	1	1	0	1	N	Y	0	0.745098039215686	16	1.29915510076809	1.21201664888731	1.29915510076809	1.21201664888731	4	-	-	0.97679698937665
+66	CTGAGAACGAGGAGCAGGAAGAACACACCAGCATGGGCGCGTTCAACGATCCGTTCCTGGCTCAGCCCCCCGATGAAGATTCACATTCCAGTTTTCCTGATGGTGAACAAATAGACCCTGAAAATCTCCACTTCAACCCTGATGAAGGAGGTGGA|AGACTGCTTGTTCTTGGAACCCAGCAGCCATACTGTGAGGAAGTCCAAACCAGCCAACCTGGAGAGACGGCATGCACAGGGTCCCACGGATA	13	0.719595391393931	0.399734668051015	0.808813953308524	Y	Y	3.39487633072086	3.67849247003875	3.39487633072086	0	0	0	Y	0.989174263674614	N	N	1514	627	ENSMUSG00000018548	ENSMUSG00000046442	+	+	11	11	87220683	87359023	coding	intron	Trim37	Ppm1e	87127077	87226906	+	-	0	87218328	87250589	+	-	N	-	-	N	dataset_6344_files	1	1	1	1	0	1	N	Y	0	1	11	0.863462841364159	0.689185937602586	0.863462841364159	0.689185937602586	1	-	-	0.615916282680748
+11601	GGCACTGGGAACCCGAGCGCAGCTTGGACAAGTGGACTGCACGCAGCGCCTGCCTGGTCTTTCACACTTCTCTGGGGACCTCAGGAGAGGAAGGAAGACTTTTGGATATGG|GGTGCTTCGAGTGCTGCATTAAATGCCTGGGAGGTATTCCCTATGCTTCTCTGATTGCAACCATCCTGCTGTATGCAGGCGTTGCCCTGTTCTGTGGCTGTGGCCATGAAGCCCTTTCTGGAACAGTCAACATTCTGCAGACCTACTTTGAGTTGGCAAGGACTGCTG	45	1	0.903984782676282	0.890308208832683	Y	Y	3.25993583872649	3.64553343878587	3.25993583872649	0	0	0	Y	0.991023166023166	N	N	195	2438	ENSMUSG00000039375	ENSMUSG00000031517	-	-	8	8	54887184	55060877	intron	coding	Wdr17	Gpm6a	54629055	54954728	-	+	0	54779650	55037328	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	33	0.887227873695282	1.25162503610584	1.25162503610584	0.887227873695282	4	-	-	0.961094875436964
+11202	CAAACACGTCCTGGAGCAAGGCCTCCACAGCCACGGGGTCCCCATCCAGGGTCCTGAGTGGTTTTGAGTCAGCCATCAGGATGAAATCTTCCAGCTCAGGAGGGCTCAGAGAAGGATCAGACGAGCTGGTGACAAGGTACACAATGGACTTTTCAGAATAGGG|CTGGACTGCTTATTCCTTTAGAAGCAAGGTCTCTTCCCCAACCTGGGGCTCTAATTTTCTCAGTTAAGGTGGAATCCAGCAAGTCCAGGGGATTCTCCAGTCTCCACCTTTGTCAGAGCAGGCTTGTTAACC	21	1	0.362314658679907	0.288186846563607	Y	Y	3.57167229721571	3.6035982586987	3.57167229721571	0	0	0	Y	0.984899672399672	N	N	1306	3566	ENSMUSG00000023044	ENSMUSG00000046897	-	-	15	15	102189043	102215606	utr5p	upstream	Csad	Zfp740	102176998	102203648	-	+	0	102188962	102202444	+	-	N	-	-	N	dataset_6344_files	1	0.961240310077519	1	1	0	3	N	Y	0	0.961240310077519	10	1.21993832633101	1.02189639023832	1.21993832633101	1.02189639023832	4	-	-	0.973142113883806
+9292	GGAGGTATCAAAGGACTTTTCAAAGGCGGTGATATGTCTAAGAATGTGAGTCAGTCACAGATGGCAAAATTAAACCAACAAATGGCCAAAATGATGGACCCACGAGTTCTTCATCACATG|GGAGGAGGAGGAGGAAGAAGAAAATAGGATGTCAGAAGAAGCAGAAAGACAATACCAACAAAACAAGCTGCAGGCCGATTCCATTGTACAGACAGACCAACCAGAAACAGTGTCGTCCAGCTTTGTAAATATTAATTTTGAAATGGAGGAAGACTGTGAAGCAATTAAG	13	0.627587070077396	0.568913321228002	0.464690965079815	N	Y	3.57233464462502	2.71992636785275	2.71992636785275	0	0	0	Y	0.368850574712644	N	Y	1463	391	ENSMUSG00000073079	ENSMUSG00000094103	+	-	12	12	55112891	55214076	coding	intron	Srp54c	1700047I17Rik2	55089202	55199533	+	+	0	55111181	55213840	+	-	N	-	-	N	dataset_6344_files	9	0	9	9	9	1	N	N	0	0	9	0.93475793835753	1.17240826166877	1.17240826166877	0.93475793835753	4	-	-	0.529926984288339
+8690	CTGGAGGAAAGCACCGCAGGTCTGAGCAGCCCTGAGCCGGGCAGGGTGGGGGCAGTGGCTAAGGCCTAGCTGGGGACGATTTAAAGGTATCGCGCCACCCAGCCACACCCCACAGGCCAGGCGAGGGTGCCACCCCCGGAGATCAGAGGTCATTGCTGGCGTTCAGA|GCCTAGGAAGTGGGCTGCGTTTCAGGGGGAAGTCCATGATCACCACGTGGCAACATGCAAGCGGGTGCTG	4	0.685704457350918	0.684620189710687	0.966832507460471	N	Y	3.58424216889964	3.53768019619294	3.53768019619294	39	0.98577430972389	0.260264105642257	N	0.260264105642257	N	N	4781	556	ENSMUSG00000032000	ENSMUSG00000016409	+	-	9	X	7873186	37150746	utr5p	intron	Birc3	Nkap	7848699	37126795	-	+	0	7872842	37143984	-	-	Y	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	N	0	0	5	0.831776131589327	0.712950969933709	0.831776131589327	0.712950969933709	1	-	-	0.520824161374586
+11497	CCTTTCCAGCGAGGTTCCAAGTTCTTAGTCTGGTGCCGGCGTACCCACACGGCGTCACCGACACGGAAGGGGTGTGGTATCACTGGCTGATCTAGCTGGTCCTGATAAGCAGCGGCCAGCGGCTTCCAGACCTCTCGTTGTACTGCTTGGAGGGCCTGTAAGTGAG|CTTCTCTTCTGGAAGTCGGACCAATTCACCTCAAGCACCAGAGCTTGAATTCATGATCATCCTGGACACAGCACTCATCAGGACCGCGCCGCGGCTGA	67	0.883499713768307	0.731092666993054	0.679758623906517	N	Y	3.66336194527509	3.6508225788147	3.6508225788147	0	0.0444358875625721	0	N	0.94917212167886	N	N	0	1604	ENSMUSG00000096832	ENSMUSG00000039007	-	+	5	15	23711061	33594552	downstream	intron	SNORD93	Pgcp	23710991	33083129	-	+	0	23703360	33221484	+	+	Y	-	-	N	dataset_6344_files	76	1	38.7730061349693	5	163	1	N	N	1	0	163	1.34668516543034	0.760481034595956	1.34668516543034	0.760481034595956	4	-	-	0.903135577741999
+8726	GAGCAAGCTGACAGCTGAGCAGAAGCTGAGCATGGACACCTTCAGATCCAACTCAGCGGACATCATTCTTTCTGCAGGGCGGCAAGAGCTCAAGAGCAAGCCAAGGCTGATAAGCATGAAGAGGATGGAGATGAGGAACAAGGCTCTGGAGTACTGGAGATAAT|CATGTCAGTTCGGCCGCGCAGGCGGGCTGCGTCGTCCTCGACGAGGCCTTTCGACGCTACCGTAACCTGCTCTTCGGTTCCGGCTCTTGGCCCCGACCCAGCTTCTCAAGTGAGTCACTGCCC	3	0.70117526311829	0.321035960603558	0.664396670389762	Y	Y	3.42949092635959	3.29652844562235	3.29652844562235	0	0	0	Y	0	N	N	44	8769	ENSMUSG00000049526	ENSMUSG00000025232	-	-	9	9	59525501	59565105	coding	coding	Tmem202	Hexa	59518686	59539667	-	+	0	59520238	59539868	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	5	1.02981806768202	0.744637679708541	1.02981806768202	0.744637679708541	4	-	-	0.931447658206354
+5020	CAAACCATCATTTGTATTTTTCAAACTGTCTATCGAGCCTCAAACTCC|ACCATTTCATTTTCTGGTCTTCTATTCTAGCACCTACAACAAACAAATAGTTAAAGCTGCCCGGTTAGGCCCCTGACTCAGCACTTAAGATGGGGGAACCAGAGTCCACCAAATAGGGGGTAAATGCGTTAACGACCACTAGCTCCCACATATCAAAAACCAGCCTCCTCAGATACGCAGAAACAATGCCCTGACTTCAGAC	27	0.772639018637575	0.542772921402035	0.769655490407199	N	Y	3.43152237077921	3.41592783769912	3.41592783769912	0	0.89624833997344	0	N	0.584993359893758	N	N	0	0	ENSMUSG00000088422	ENSMUSG00000053332	-	+	5	1	79639109	161038539	downstream	intron	7SK	Gas5	79638815	161034422	-	+	0.89624833997344	79638362	161036581	+	+	Y	-	-	N	dataset_6344_files	1	0	1	1	0	3	N	N	0	0	9	0.52283071128472	1.06942645490056	1.06942645490056	0.52283071128472	1	-	-	0.818668251983994
+3184	CATTGGAGCTGTGGTGGCTTTTGTGATGAAGAGAAGGAGAAACACAGGTGGAAAAGGAGGGGACTATGCTCTGGCTCCAG|AGAACAGCGCCTGATGTTCCCTGTGAGCCTATGGGCTCAATGTGAAGAATTGTGGAGCCCAGCCTTCGCCTACACACCAGGACCCTGTCTCTGCATTGCCCTGTGTTCCCTTCCACCGCCAACCTTCCGGGTCTGCAGTGGAAACTAAGGGTTCTTTGGAAAGTCGG	40	1	0.843266171585359	0.786568876550746	N	Y	3.49424793801946	3.684245618384	3.49424793801946	0	0	0.551814516129032	Y	0.950201612903226	N	Y	23545	2145	ENSMUSG00000073411	ENSMUSG00000035929	+	-	17	17	35267499	35385290	coding	utr3p	H2-D1	H2-Q4	35262730	35379617	+	+	0	35266514	35383995	+	-	N	-	-	N	dataset_6344_files	4	0.22972972972973	2.57142857142857	2	7	1	N	N	0	0.22972972972973	7	0.562439098503259	1.03773974512573	1.03773974512573	0.562439098503259	4	-	-	0.85501289117385
+9158	CCGAATTTCAACCTCCTTATCAACAGTGGGATCTTCAAAGAGTTGTACCCTGAAGTTGCTCTTTCTCAGTGGAGTCCCACACTCAGGACAGTTTCCAGCTCCTCTTACAAACAGTAAGTCCACACAACTCTCACAC|CTTCCCCACCAGCCTGGTCCGGCTGCCCACCTCTCCCCGCCCCCCACCTCGCTTCCCTACCGGGGTGGTAGGGGGGACGACGGTGGCAACGAGCGGGCGGGGGATCCTCCC	5	1	0.770112168741176	0.964592797110167	Y	Y	3.1490579347374	3.11232376639641	3.11232376639641	0	0	0	Y	0	N	N	1882	677	ENSMUSG00000021103	ENSMUSG00000034460	-	-	12	12	73273988	73114037	coding	intron	Mnat1	Six4	73123717	73099609	+	-	0	73168000	73112254	-	+	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	5	1.0060530353509	0.78424606692708	1.0060530353509	0.78424606692708	4	-	-	0.934413835802699
+1851	GCTGCAGGTGGGCTTATTCTACCATTGCTACTGTTCTGTCTTTGAAGATGTTCTTTATAGCATACTGAACACATGCCATTTGTACGAGGGTTTCCATAAAATCCGCAGCCAGTGGAGCAAAGCATAGGTGCTTGGCTGTGATTAGTTTCTTGAGCCATGTTCTTCTGTTGCA|CACCTGGCGGCGGCCCTCTCCGCCGGACGCGCTGCCGCCGCCGCCTCTCGCCGCCGCTGAGAGTGAGGACAGGTGAGGCCGCCAAACCCCCACTCGCTCCCGGCCCGCCGCCGCCGGCCCTCCGTCCGC	21	0.826443478467522	0.547250352165575	0.543160934834641	Y	Y	3.38908332958226	2.95732075160099	2.95732075160099	0	0	0	Y	0.992273730684327	N	N	1944	1	ENSMUSG00000030629	ENSMUSG00000085236	-	-	7	7	84679361	84776549	utr5p	intron	Zfand6	2610206C17Rik	84615054	84689640	-	+	0	84634406	84689743	+	-	N	-	-	N	dataset_6344_files	2	0.159420289855072	1.04878048780488	1	2	1	N	Y	0	0.159420289855072	41	1.14864322933764	1.09319148723169	1.14864322933764	1.09319148723169	1	-	-	0.715005473321084
+9962	CTGCGGCCCGCCGGGTCCCGGAGCCCACTGCCCCAGCACCCCGCGCTCGGCGCCCGCAGACGGCGCGGACCTCAGCGCGCACTTATGGGCTCGTTACCAGGACATGCGGAGGCTGGTGCACG|ACCTTCTGCCCCCTGAGGTCTGCAGCCTCCTAAACCCAGCAGCTATTTATGCCAACAATGAGATCAGCCTGAGTGACGTCGAAGTCTATGGCTTTGACTACGACTACACGCTGGCCCAGTATGCGGATGCACTGCACCCTGAGATCTTCAATGCTGCCCGGGACATCTTGATAGAGC	93	0.0971360740885943	0.213119196903586	0.467950871740144	Y	Y	3.71720464963688	3.27448372166755	3.27448372166755	0	0.983661202185792	0	Y	0.991830601092896	N	N	211	4122	ENSMUSG00000058351	ENSMUSG00000071547	-	-	14	14	31128930	31139121	upstream	upstream	2010107H07Rik	Nt5dc2	31088869	31134853	-	+	0	31131376	31134739	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	33	1.1011131646754	0.808011099258204	1.1011131646754	0.808011099258204	4	-	-	0.743419539119423
+12549	TTGGAGATGCCAGTACCATGAGATGACCACCAAGAGCAGCAGCAGCAGTGGAGTACAGGCATCTGGAGCCTAGAGGATGACACATGTGCTACAAAGGGCCTGGCTGGAGAAGTGACCCAAGCCCTTGGAGGAGCCCAGAAGATC|GTCCATCCTGATAAAAATCACCATCCCCGGGCTGAGGAGGCCTTCAAAATTTTGCGGGCAGCTTGGGACATTGTCAGCAACCCAGAGAGGCGGAAGGAATATGAGATGAAACGGAT	18	1	0.96278392593137	0.394484707065412	N	Y	3.2808195118354	3.46448140203209	3.2808195118354	1	0.965649359228432	0	N	0.974237019421324	N	Y	1732	2437	ENSMUSG00000039307	ENSMUSG00000025354	+	-	11	10	121222655	128819446	utr5p	coding	Hexdc	Dnajc14	121204433	128805676	+	+	0	121206748	128814038	+	-	Y	-	-	N	dataset_6344_files	3	0.993103448275862	1.72222222222222	1	11	1	N	N	0.993103448275862	0	18	1.14864322933764	0.879306196251575	1.14864322933764	0.879306196251575	4	-	-	0.927135541379163
+3179	CTGGAGCAGTCCCCGTGACGCCGGGTGGCGACTGGCTCCCGGGTCTGAGGGGCTTCTGCTTGTCAGGTTCT|AGATATGTGCTGACTAGCAGGCTCACGTGCACAGTGTGGAGGATAAGCTATATCTTACAAAATGGGATTTGGGAGTGACCTGAAGAACTCACAGGAAGCTGTGTTAAAGTTGCAAGACTGGGAACTACGGTTGCTGGAGACAGTGAAGAAATTTATGGCTCTGAG	10	1	0.557528435226891	0.364482194613623	Y	Y	3.15741158895574	3.54759612884129	3.15741158895574	0	0.985974921257503	0	Y	0.873774291317525	N	N	10	1325	ENSMUSG00000045506	ENSMUSG00000000127	-	-	17	17	63863791	64139494	upstream	upstream	A930002H24Rik	Fer	63863300	63896018	-	+	0	63864053	63896016	+	-	N	-	-	N	dataset_6344_files	1	0	1	1	0	1	N	Y	0	0	7	0.499065678953596	1.12487819700652	1.12487819700652	0.499065678953596	3	-	-	0.767822892174251
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mm10_results.filtered.vcf	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,115 @@
+##fileformat=VCFv4.1
+##source=defuse
+##reference=mm10
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of the BND mate">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Read Depth of segment containing breakend">
+##INFO=<ID=SPLITCNT,Number=1,Type=Integer,Description="number of split reads supporting the prediction">
+##INFO=<ID=SPANCNT,Number=1,Type=Integer,Description="number of spanning reads supporting the fusion">
+##INFO=<ID=HOMLEN,Number=1,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=SPLICESCORE,Number=1,Type=Integer,Description="number of nucleotides similar to GTAG at fusion splice">
+##INFO=<ID=GENE,Number=2,Type=String,Description="Gene Names at each breakend">
+##INFO=<ID=GENEID,Number=2,Type=String,Description="Gene IDs at each breakend">
+##INFO=<ID=GENELOC,Number=2,Type=String,Description="location of breakpoint releative to genes">
+##INFO=<ID=EXPR,Number=2,Type=Integer,Description="expression of genes as number of concordant pairs aligned to exons">
+##INFO=<ID=ORF,Number=0,Type=Flag,Description="fusion combines genes in a way that preserves a reading frame">
+##INFO=<ID=EXONBND,Number=0,Type=Flag,Description="fusion splice at exon boundaries">
+##INFO=<ID=INTERCHROM,Number=0,Type=Flag,Description="fusion produced by an interchromosomal translocation">
+##INFO=<ID=READTHROUGH,Number=0,Type=Flag,Description="fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement">
+##INFO=<ID=ADJACENT,Number=0,Type=Flag,Description="fusion between adjacent genes">
+##INFO=<ID=ALTSPLICE,Number=0,Type=Flag,Description="fusion likely the product of alternative splicing between adjacent genes">
+##INFO=<ID=DELETION,Number=0,Type=Flag,Description="fusion produced by a genomic deletion">
+##INFO=<ID=EVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic eversion">
+##INFO=<ID=INVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic inversion">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+1	106734547	bnd_4068_1	A	A]16:37799068]	233	PASS	SVTYPE=BND;MATEID=bnd_4068_2;DP=19;SPLITCNT=8;SPANCNT=11;GENE=Kdsr,Fstl1;GENEID=ENSMUSG00000009905,ENSMUSG00000022816;GENELOC=coding,intron;EXPR=775,35409;HOMLEN=2;SPLICESCORE=4;INTERCHROM;ALTSPLICE
+1	127753955	bnd_3783_1	T	T]1:127773930]	181	PASS	SVTYPE=BND;MATEID=bnd_3783_2;DP=8;SPLITCNT=2;SPANCNT=6;GENE=Acmsd,Ccnt2;GENEID=ENSMUSG00000026348,ENSMUSG00000026349;GENELOC=intron,upstream;EXPR=0,752;HOMLEN=0;SPLICESCORE=3;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+1	127773930	bnd_3783_2	T	[1:127753955[T	181	PASS	SVTYPE=BND;MATEID=bnd_3783_1;DP=8;SPLITCNT=2;SPANCNT=6;GENE=Acmsd,Ccnt2;GENEID=ENSMUSG00000026348,ENSMUSG00000026349;GENELOC=intron,upstream;EXPR=0,752;HOMLEN=0;SPLICESCORE=3;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+1	161036581	bnd_5020_2	A	[5:79638362[A	208	PASS	SVTYPE=BND;MATEID=bnd_5020_1;DP=36;SPLITCNT=27;SPANCNT=9;GENE=7SK,Gas5;GENEID=ENSMUSG00000088422,ENSMUSG00000053332;GENELOC=downstream,intron;EXPR=0,0;HOMLEN=0;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+1	161036831	bnd_4912_2	G	]5:79638354]G	151	PASS	SVTYPE=BND;MATEID=bnd_4912_1;DP=20;SPLITCNT=15;SPANCNT=5;GENE=7SK,Gas5;GENEID=ENSMUSG00000088422,ENSMUSG00000053332;GENELOC=downstream,intron;EXPR=0,0;HOMLEN=0;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+1	168121480	bnd_3839_2	G	]1:168183530]G	249	PASS	SVTYPE=BND;MATEID=bnd_3839_1;DP=29;SPLITCNT=13;SPANCNT=16;GENE=Pbx1,Gm20711;GENEID=ENSMUSG00000052534,ENSMUSG00000093538;GENELOC=coding,intron;EXPR=3895,0;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+1	168183530	bnd_3839_1	T	T[1:168121480[	249	PASS	SVTYPE=BND;MATEID=bnd_3839_2;DP=29;SPLITCNT=13;SPANCNT=16;GENE=Pbx1,Gm20711;GENEID=ENSMUSG00000052534,ENSMUSG00000093538;GENELOC=coding,intron;EXPR=3895,0;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+2	165827729	bnd_5326_1	C	C]18:4198107]	136	PASS	SVTYPE=BND;MATEID=bnd_5326_2;DP=20;SPLITCNT=7;SPANCNT=13;GENE=Zmynd8,Gm10557;GENEID=ENSMUSG00000039671,ENSMUSG00000073647;GENELOC=utr3p,downstream;EXPR=2963,0;HOMLEN=9;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+2	174462629	bnd_2385_2	T	]2:174472832]T	194	PASS	SVTYPE=BND;MATEID=bnd_2385_1;DP=12;SPLITCNT=7;SPANCNT=5;GENE=Slmo2,Atp5e;GENEID=ENSMUSG00000016257,ENSMUSG00000016252;GENELOC=coding,coding;EXPR=3085,3617;HOMLEN=0;SPLICESCORE=4;ORF;EXONBND;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+2	174472832	bnd_2385_1	C	C[2:174462629[	194	PASS	SVTYPE=BND;MATEID=bnd_2385_2;DP=12;SPLITCNT=7;SPANCNT=5;GENE=Slmo2,Atp5e;GENEID=ENSMUSG00000016257,ENSMUSG00000016252;GENELOC=coding,coding;EXPR=3085,3617;HOMLEN=0;SPLICESCORE=4;ORF;EXONBND;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+3	103040040	bnd_5160_1	C	C]18:28188917]	213	PASS	SVTYPE=BND;MATEID=bnd_5160_2;DP=63;SPLITCNT=33;SPANCNT=30;GENE=Csde1,SNORA17;GENEID=ENSMUSG00000068823,ENSMUSG00000087940;GENELOC=coding,upstream;EXPR=10681,0;HOMLEN=91;SPLICESCORE=3;INTERCHROM;ALTSPLICE
+3	144201813	bnd_8647_1	T	T]5:149198645]	242	PASS	SVTYPE=BND;MATEID=bnd_8647_2;DP=9;SPLITCNT=4;SPANCNT=5;GENE=Lmo4,Uspl1;GENEID=ENSMUSG00000028266,ENSMUSG00000041264;GENELOC=coding,coding;EXPR=2482,2085;HOMLEN=1;SPLICESCORE=4;EXONBND;INTERCHROM
+4	124696071	bnd_12092_2	C	]4:124705614]C	187	PASS	SVTYPE=BND;MATEID=bnd_12092_1;DP=11;SPLITCNT=2;SPANCNT=9;GENE=Fhl3,U6;GENEID=ENSMUSG00000032643,ENSMUSG00000088067;GENELOC=utr5p,downstream;EXPR=2072,0;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+4	124705614	bnd_12092_1	C	C[4:124696071[	187	PASS	SVTYPE=BND;MATEID=bnd_12092_2;DP=11;SPLITCNT=2;SPANCNT=9;GENE=Fhl3,U6;GENEID=ENSMUSG00000032643,ENSMUSG00000088067;GENELOC=utr5p,downstream;EXPR=2072,0;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+4	148948907	bnd_12095_1	C	C]4:148961548]	249	PASS	SVTYPE=BND;MATEID=bnd_12095_2;DP=11;SPLITCNT=6;SPANCNT=5;GENE=Gm13205,Pex14;GENEID=ENSMUSG00000086606,ENSMUSG00000028975;GENELOC=intron,coding;EXPR=0,1453;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+4	148961548	bnd_12095_2	C	[4:148948907[C	249	PASS	SVTYPE=BND;MATEID=bnd_12095_1;DP=11;SPLITCNT=6;SPANCNT=5;GENE=Gm13205,Pex14;GENEID=ENSMUSG00000086606,ENSMUSG00000028975;GENELOC=intron,coding;EXPR=0,1453;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+5	23703360	bnd_11497_1	G	G[15:33221484[	230	PASS	SVTYPE=BND;MATEID=bnd_11497_2;DP=230;SPLITCNT=67;SPANCNT=163;GENE=SNORD93,Pgcp;GENEID=ENSMUSG00000096832,ENSMUSG00000039007;GENELOC=downstream,intron;EXPR=0,1604;HOMLEN=0;SPLICESCORE=4;INTERCHROM;ALTSPLICE
+5	60176541	bnd_1721_2	G	]11:106187103]G	188	PASS	SVTYPE=BND;MATEID=bnd_1721_1;DP=13;SPLITCNT=8;SPANCNT=5;GENE=Strada,Cbfa2t2-ps1;GENEID=ENSMUSG00000069631,ENSMUSG00000087034;GENELOC=coding,upstream;EXPR=800,0;HOMLEN=142;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+5	79638354	bnd_4912_1	A	A]1:161036831]	151	PASS	SVTYPE=BND;MATEID=bnd_4912_2;DP=20;SPLITCNT=15;SPANCNT=5;GENE=7SK,Gas5;GENEID=ENSMUSG00000088422,ENSMUSG00000053332;GENELOC=downstream,intron;EXPR=0,0;HOMLEN=0;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+5	79638362	bnd_5020_1	C	C[1:161036581[	208	PASS	SVTYPE=BND;MATEID=bnd_5020_2;DP=36;SPLITCNT=27;SPANCNT=9;GENE=7SK,Gas5;GENEID=ENSMUSG00000088422,ENSMUSG00000053332;GENELOC=downstream,intron;EXPR=0,0;HOMLEN=0;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+5	105728723	bnd_13923_1	G	G]8:126422269]	207	PASS	SVTYPE=BND;MATEID=bnd_13923_2;DP=11;SPLITCNT=4;SPANCNT=7;GENE=Lrrc8d,1810063B05Rik;GENEID=ENSMUSG00000046079,ENSMUSG00000051671;GENELOC=intron,upstream;EXPR=590,367;HOMLEN=0;SPLICESCORE=4;INTERCHROM;ALTSPLICE
+5	149198645	bnd_8647_2	G	]3:144201813]G	242	PASS	SVTYPE=BND;MATEID=bnd_8647_1;DP=9;SPLITCNT=4;SPANCNT=5;GENE=Lmo4,Uspl1;GENEID=ENSMUSG00000028266,ENSMUSG00000041264;GENELOC=coding,coding;EXPR=2482,2085;HOMLEN=1;SPLICESCORE=4;EXONBND;INTERCHROM
+6	51467295	bnd_12868_1	A	A]10:73201702]	132	PASS	SVTYPE=BND;MATEID=bnd_12868_2;DP=110;SPLITCNT=69;SPANCNT=41;GENE=Hnrnpa2b1,Gm15398;GENEID=ENSMUSG00000004980,ENSMUSG00000085456;GENELOC=coding,intron;EXPR=41631,0;HOMLEN=0;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+6	52158684	bnd_7746_1	G	G]6:52173634]	190	PASS	SVTYPE=BND;MATEID=bnd_7746_2;DP=46;SPLITCNT=26;SPANCNT=20;GENE=Gm15051,Gm15050;GENEID=ENSMUSG00000087658,ENSMUSG00000087626;GENELOC=intron,intron;EXPR=0,0;HOMLEN=0;SPLICESCORE=2;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+6	52173634	bnd_7746_2	A	[6:52158684[A	190	PASS	SVTYPE=BND;MATEID=bnd_7746_1;DP=46;SPLITCNT=26;SPANCNT=20;GENE=Gm15051,Gm15050;GENEID=ENSMUSG00000087658,ENSMUSG00000087626;GENELOC=intron,intron;EXPR=0,0;HOMLEN=0;SPLICESCORE=2;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+7	28376683	bnd_1870_1	C	C]7:28392166]	214	PASS	SVTYPE=BND;MATEID=bnd_1870_2;DP=10;SPLITCNT=2;SPANCNT=8;GENE=Zfp36,Med29;GENEID=ENSMUSG00000044786,ENSMUSG00000003444;GENELOC=downstream,intron;EXPR=1543,697;HOMLEN=4;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+7	28392166	bnd_1870_2	C	[7:28376683[C	214	PASS	SVTYPE=BND;MATEID=bnd_1870_1;DP=10;SPLITCNT=2;SPANCNT=8;GENE=Zfp36,Med29;GENEID=ENSMUSG00000044786,ENSMUSG00000003444;GENELOC=downstream,intron;EXPR=1543,697;HOMLEN=4;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+7	66355922	bnd_12600_2	C	[10:24597524[C	148	PASS	SVTYPE=BND;MATEID=bnd_12600_1;DP=6;SPLITCNT=1;SPANCNT=5;GENE=Ctgf,Lrrk1;GENEID=ENSMUSG00000019997,ENSMUSG00000015133;GENELOC=coding,intron;EXPR=7298,2735;HOMLEN=0;SPLICESCORE=3;INTERCHROM
+7	84634406	bnd_1851_1	A	A]7:84689743]	182	PASS	SVTYPE=BND;MATEID=bnd_1851_2;DP=62;SPLITCNT=21;SPANCNT=41;GENE=Zfand6,2610206C17Rik;GENEID=ENSMUSG00000030629,ENSMUSG00000085236;GENELOC=utr5p,intron;EXPR=1944,1;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+7	84689743	bnd_1851_2	C	[7:84634406[C	182	PASS	SVTYPE=BND;MATEID=bnd_1851_1;DP=62;SPLITCNT=21;SPANCNT=41;GENE=Zfand6,2610206C17Rik;GENEID=ENSMUSG00000030629,ENSMUSG00000085236;GENELOC=utr5p,intron;EXPR=1944,1;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+7	90125032	bnd_1855_1	T	T]7:90129872]	242	PASS	SVTYPE=BND;MATEID=bnd_1855_2;DP=13;SPLITCNT=4;SPANCNT=9;GENE=AC130210.1,Picalm;GENEID=ENSMUSG00000097162,ENSMUSG00000039361;GENELOC=intron,upstream;EXPR=0,8476;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+7	90129872	bnd_1855_2	C	[7:90125032[C	242	PASS	SVTYPE=BND;MATEID=bnd_1855_1;DP=13;SPLITCNT=4;SPANCNT=9;GENE=AC130210.1,Picalm;GENEID=ENSMUSG00000097162,ENSMUSG00000039361;GENELOC=intron,upstream;EXPR=0,8476;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+7	97788974	bnd_1886_1	G	G]7:97854436]	247	PASS	SVTYPE=BND;MATEID=bnd_1886_2;DP=36;SPLITCNT=18;SPANCNT=18;GENE=Aqp11,Pak1;GENEID=ENSMUSG00000042797,ENSMUSG00000030774;GENELOC=upstream,utr5p;EXPR=16,2476;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+7	97854436	bnd_1886_2	T	[7:97788974[T	247	PASS	SVTYPE=BND;MATEID=bnd_1886_1;DP=36;SPLITCNT=18;SPANCNT=18;GENE=Aqp11,Pak1;GENEID=ENSMUSG00000042797,ENSMUSG00000030774;GENELOC=upstream,utr5p;EXPR=16,2476;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+8	54779650	bnd_11601_1	G	G]8:55037328]	245	PASS	SVTYPE=BND;MATEID=bnd_11601_2;DP=78;SPLITCNT=45;SPANCNT=33;GENE=Wdr17,Gpm6a;GENEID=ENSMUSG00000039375,ENSMUSG00000031517;GENELOC=intron,coding;EXPR=195,2438;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+8	55037328	bnd_11601_2	G	[8:54779650[G	245	PASS	SVTYPE=BND;MATEID=bnd_11601_1;DP=78;SPLITCNT=45;SPANCNT=33;GENE=Wdr17,Gpm6a;GENEID=ENSMUSG00000039375,ENSMUSG00000031517;GENELOC=intron,coding;EXPR=195,2438;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+8	84816537	bnd_11605_1	C	C]8:84832174]	193	PASS	SVTYPE=BND;MATEID=bnd_11605_2;DP=23;SPLITCNT=11;SPANCNT=12;GENE=Dand5,Gadd45gip1;GENEID=ENSMUSG00000053226,ENSMUSG00000033751;GENELOC=intron,utr5p;EXPR=167,2428;HOMLEN=0;SPLICESCORE=2;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+8	84832174	bnd_11605_2	G	[8:84816537[G	193	PASS	SVTYPE=BND;MATEID=bnd_11605_1;DP=23;SPLITCNT=11;SPANCNT=12;GENE=Dand5,Gadd45gip1;GENEID=ENSMUSG00000053226,ENSMUSG00000033751;GENELOC=intron,utr5p;EXPR=167,2428;HOMLEN=0;SPLICESCORE=2;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+8	95682960	bnd_11596_2	G	]8:95739809]G	222	PASS	SVTYPE=BND;MATEID=bnd_11596_1;DP=24;SPLITCNT=8;SPANCNT=16;GENE=Cnot1,Ndrg4;GENEID=ENSMUSG00000036550,ENSMUSG00000036564;GENELOC=coding,intron;EXPR=6004,544;HOMLEN=0;SPLICESCORE=1;ALTSPLICE;DELETION
+8	95739809	bnd_11596_1	A	A[8:95682960[	222	PASS	SVTYPE=BND;MATEID=bnd_11596_2;DP=24;SPLITCNT=8;SPANCNT=16;GENE=Cnot1,Ndrg4;GENEID=ENSMUSG00000036550,ENSMUSG00000036564;GENELOC=coding,intron;EXPR=6004,544;HOMLEN=0;SPLICESCORE=1;ALTSPLICE;DELETION
+8	126422269	bnd_13923_2	C	]5:105728723]C	207	PASS	SVTYPE=BND;MATEID=bnd_13923_1;DP=11;SPLITCNT=4;SPANCNT=7;GENE=Lrrc8d,1810063B05Rik;GENEID=ENSMUSG00000046079,ENSMUSG00000051671;GENELOC=intron,upstream;EXPR=590,367;HOMLEN=0;SPLICESCORE=4;INTERCHROM;ALTSPLICE
+9	7872842	bnd_8690_1	A	A]X:37143984]	132	PASS	SVTYPE=BND;MATEID=bnd_8690_2;DP=9;SPLITCNT=4;SPANCNT=5;GENE=Birc3,Nkap;GENEID=ENSMUSG00000032000,ENSMUSG00000016409;GENELOC=utr5p,intron;EXPR=4781,556;HOMLEN=39;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+9	21320850	bnd_8748_2	A	]9:21337962]A	189	PASS	SVTYPE=BND;MATEID=bnd_8748_1;DP=103;SPLITCNT=51;SPANCNT=52;GENE=Slc44a2,Ap1m2;GENEID=ENSMUSG00000057193,ENSMUSG00000003309;GENELOC=coding,upstream;EXPR=6343,2;HOMLEN=2;SPLICESCORE=2;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+9	21337962	bnd_8748_1	C	C[9:21320850[	189	PASS	SVTYPE=BND;MATEID=bnd_8748_2;DP=103;SPLITCNT=51;SPANCNT=52;GENE=Slc44a2,Ap1m2;GENEID=ENSMUSG00000057193,ENSMUSG00000003309;GENELOC=coding,upstream;EXPR=6343,2;HOMLEN=2;SPLICESCORE=2;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+9	59520238	bnd_8726_1	T	T]9:59539868]	237	PASS	SVTYPE=BND;MATEID=bnd_8726_2;DP=8;SPLITCNT=3;SPANCNT=5;GENE=Tmem202,Hexa;GENEID=ENSMUSG00000049526,ENSMUSG00000025232;GENELOC=coding,coding;EXPR=44,8769;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+9	59539868	bnd_8726_2	C	[9:59520238[C	237	PASS	SVTYPE=BND;MATEID=bnd_8726_1;DP=8;SPLITCNT=3;SPANCNT=5;GENE=Tmem202,Hexa;GENEID=ENSMUSG00000049526,ENSMUSG00000025232;GENELOC=coding,coding;EXPR=44,8769;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+9	108783870	bnd_8762_1	G	G]9:108796064]	243	PASS	SVTYPE=BND;MATEID=bnd_8762_2;DP=36;SPLITCNT=28;SPANCNT=8;GENE=SNORA28,Ip6k2;GENEID=ENSMUSG00000088626,ENSMUSG00000032599;GENELOC=downstream,utr5p;EXPR=0,714;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+9	108796064	bnd_8762_2	A	[9:108783870[A	243	PASS	SVTYPE=BND;MATEID=bnd_8762_1;DP=36;SPLITCNT=28;SPANCNT=8;GENE=SNORA28,Ip6k2;GENEID=ENSMUSG00000088626,ENSMUSG00000032599;GENELOC=downstream,utr5p;EXPR=0,714;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+10	24597524	bnd_12600_1	G	G]7:66355922]	148	PASS	SVTYPE=BND;MATEID=bnd_12600_2;DP=6;SPLITCNT=1;SPANCNT=5;GENE=Ctgf,Lrrk1;GENEID=ENSMUSG00000019997,ENSMUSG00000015133;GENELOC=coding,intron;EXPR=7298,2735;HOMLEN=0;SPLICESCORE=3;INTERCHROM
+10	73201702	bnd_12868_2	A	]6:51467295]A	132	PASS	SVTYPE=BND;MATEID=bnd_12868_1;DP=110;SPLITCNT=69;SPANCNT=41;GENE=Hnrnpa2b1,Gm15398;GENEID=ENSMUSG00000004980,ENSMUSG00000085456;GENELOC=coding,intron;EXPR=41631,0;HOMLEN=0;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+10	128814038	bnd_12549_2	G	[11:121206748[G	236	PASS	SVTYPE=BND;MATEID=bnd_12549_1;DP=36;SPLITCNT=18;SPANCNT=18;GENE=Hexdc,Dnajc14;GENEID=ENSMUSG00000039307,ENSMUSG00000025354;GENELOC=utr5p,coding;EXPR=1732,2437;HOMLEN=1;SPLICESCORE=4;EXONBND;INTERCHROM;ALTSPLICE
+11	52099150	bnd_1027_1	G	G]12:54980379]	212	PASS	SVTYPE=BND;MATEID=bnd_1027_2;DP=12;SPLITCNT=2;SPANCNT=10;GENE=Ppp2ca,Baz1a;GENEID=ENSMUSG00000020349,ENSMUSG00000035021;GENELOC=coding,intron;EXPR=10063,1785;HOMLEN=3;SPLICESCORE=4;INTERCHROM
+11	87218328	bnd_66_1	A	A]11:87250589]	157	PASS	SVTYPE=BND;MATEID=bnd_66_2;DP=24;SPLITCNT=13;SPANCNT=11;GENE=Trim37,Ppm1e;GENEID=ENSMUSG00000018548,ENSMUSG00000046442;GENELOC=coding,intron;EXPR=1514,627;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+11	87250589	bnd_66_2	A	[11:87218328[A	157	PASS	SVTYPE=BND;MATEID=bnd_66_1;DP=24;SPLITCNT=13;SPANCNT=11;GENE=Trim37,Ppm1e;GENEID=ENSMUSG00000018548,ENSMUSG00000046442;GENELOC=coding,intron;EXPR=1514,627;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+11	106187103	bnd_1721_1	A	A[5:60176541[	188	PASS	SVTYPE=BND;MATEID=bnd_1721_2;DP=13;SPLITCNT=8;SPANCNT=5;GENE=Strada,Cbfa2t2-ps1;GENEID=ENSMUSG00000069631,ENSMUSG00000087034;GENELOC=coding,upstream;EXPR=800,0;HOMLEN=142;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+11	121206748	bnd_12549_1	C	C]10:128814038]	236	PASS	SVTYPE=BND;MATEID=bnd_12549_2;DP=36;SPLITCNT=18;SPANCNT=18;GENE=Hexdc,Dnajc14;GENEID=ENSMUSG00000039307,ENSMUSG00000025354;GENELOC=utr5p,coding;EXPR=1732,2437;HOMLEN=1;SPLICESCORE=4;EXONBND;INTERCHROM;ALTSPLICE
+12	54980379	bnd_1027_2	T	[11:52099150[T	212	PASS	SVTYPE=BND;MATEID=bnd_1027_1;DP=12;SPLITCNT=2;SPANCNT=10;GENE=Ppp2ca,Baz1a;GENEID=ENSMUSG00000020349,ENSMUSG00000035021;GENELOC=coding,intron;EXPR=10063,1785;HOMLEN=3;SPLICESCORE=4;INTERCHROM
+12	55111181	bnd_9292_1	G	G]12:55213840]	135	PASS	SVTYPE=BND;MATEID=bnd_9292_2;DP=22;SPLITCNT=13;SPANCNT=9;GENE=Srp54c,1700047I17Rik2;GENEID=ENSMUSG00000073079,ENSMUSG00000094103;GENELOC=coding,intron;EXPR=1463,391;HOMLEN=0;SPLICESCORE=4;EXONBND;ALTSPLICE;DELETION
+12	55213840	bnd_9292_2	G	[12:55111181[G	135	PASS	SVTYPE=BND;MATEID=bnd_9292_1;DP=22;SPLITCNT=13;SPANCNT=9;GENE=Srp54c,1700047I17Rik2;GENEID=ENSMUSG00000073079,ENSMUSG00000094103;GENELOC=coding,intron;EXPR=1463,391;HOMLEN=0;SPLICESCORE=4;EXONBND;ALTSPLICE;DELETION
+12	73112254	bnd_9158_2	C	]12:73168000]C	238	PASS	SVTYPE=BND;MATEID=bnd_9158_1;DP=10;SPLITCNT=5;SPANCNT=5;GENE=Mnat1,Six4;GENEID=ENSMUSG00000021103,ENSMUSG00000034460;GENELOC=coding,intron;EXPR=1882,677;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+12	73168000	bnd_9158_1	C	C[12:73112254[	238	PASS	SVTYPE=BND;MATEID=bnd_9158_2;DP=10;SPLITCNT=5;SPANCNT=5;GENE=Mnat1,Six4;GENEID=ENSMUSG00000021103,ENSMUSG00000034460;GENELOC=coding,intron;EXPR=1882,677;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+12	113422730	bnd_9297_1	T	T]12:113426655]	212	PASS	SVTYPE=BND;MATEID=bnd_9297_2;DP=26;SPLITCNT=20;SPANCNT=6;GENE=Ighm,AC073553.3;GENEID=ENSMUSG00000076617,ENSMUSG00000092748;GENELOC=coding,upstream;EXPR=1488,0;HOMLEN=0;SPLICESCORE=4;ALTSPLICE;DELETION
+12	113426655	bnd_9297_2	C	[12:113422730[C	212	PASS	SVTYPE=BND;MATEID=bnd_9297_1;DP=26;SPLITCNT=20;SPANCNT=6;GENE=Ighm,AC073553.3;GENEID=ENSMUSG00000076617,ENSMUSG00000092748;GENELOC=coding,upstream;EXPR=1488,0;HOMLEN=0;SPLICESCORE=4;ALTSPLICE;DELETION
+13	105131562	bnd_5983_1	T	T]13:105167527]	207	PASS	SVTYPE=BND;MATEID=bnd_5983_2;DP=6;SPLITCNT=1;SPANCNT=5;GENE=4933425L06Rik,Rnf180;GENEID=ENSMUSG00000021718,ENSMUSG00000021720;GENELOC=downstream,intron;EXPR=0,60;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+13	105167527	bnd_5983_2	C	[13:105131562[C	207	PASS	SVTYPE=BND;MATEID=bnd_5983_1;DP=6;SPLITCNT=1;SPANCNT=5;GENE=4933425L06Rik,Rnf180;GENEID=ENSMUSG00000021718,ENSMUSG00000021720;GENELOC=downstream,intron;EXPR=0,60;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+14	31131376	bnd_9962_1	G	G]14:31134739]	189	PASS	SVTYPE=BND;MATEID=bnd_9962_2;DP=126;SPLITCNT=93;SPANCNT=33;GENE=2010107H07Rik,Nt5dc2;GENEID=ENSMUSG00000058351,ENSMUSG00000071547;GENELOC=upstream,upstream;EXPR=211,4122;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+14	31134739	bnd_9962_2	A	[14:31131376[A	189	PASS	SVTYPE=BND;MATEID=bnd_9962_1;DP=126;SPLITCNT=93;SPANCNT=33;GENE=2010107H07Rik,Nt5dc2;GENEID=ENSMUSG00000058351,ENSMUSG00000071547;GENELOC=upstream,upstream;EXPR=211,4122;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+15	33221484	bnd_11497_2	C	[5:23703360[C	230	PASS	SVTYPE=BND;MATEID=bnd_11497_1;DP=230;SPLITCNT=67;SPANCNT=163;GENE=SNORD93,Pgcp;GENEID=ENSMUSG00000096832,ENSMUSG00000039007;GENELOC=downstream,intron;EXPR=0,1604;HOMLEN=0;SPLICESCORE=4;INTERCHROM;ALTSPLICE
+15	74996568	bnd_11177_2	C	]15:75048442]C	144	PASS	SVTYPE=BND;MATEID=bnd_11177_1;DP=497;SPLITCNT=434;SPANCNT=63;GENE=Ly6c1,Ly6a;GENEID=ENSMUSG00000079018,ENSMUSG00000075602;GENELOC=utr5p,utr5p;EXPR=1416,5049;HOMLEN=0;SPLICESCORE=4;ORF;EXONBND;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+15	74996568	bnd_11169_2	A	]15:75048442]A	237	PASS	SVTYPE=BND;MATEID=bnd_11169_1;DP=702;SPLITCNT=410;SPANCNT=292;GENE=Ly6c1,Ly6a;GENEID=ENSMUSG00000079018,ENSMUSG00000075602;GENELOC=utr5p,utr5p;EXPR=1416,5049;HOMLEN=0;SPLICESCORE=4;ORF;EXONBND;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+15	75048442	bnd_11177_1	A	A[15:74996568[	144	PASS	SVTYPE=BND;MATEID=bnd_11177_2;DP=497;SPLITCNT=434;SPANCNT=63;GENE=Ly6c1,Ly6a;GENEID=ENSMUSG00000079018,ENSMUSG00000075602;GENELOC=utr5p,utr5p;EXPR=1416,5049;HOMLEN=0;SPLICESCORE=4;ORF;EXONBND;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+15	75048442	bnd_11169_1	G	G[15:74996568[	237	PASS	SVTYPE=BND;MATEID=bnd_11169_2;DP=702;SPLITCNT=410;SPANCNT=292;GENE=Ly6c1,Ly6a;GENEID=ENSMUSG00000079018,ENSMUSG00000075602;GENELOC=utr5p,utr5p;EXPR=1416,5049;HOMLEN=0;SPLICESCORE=4;ORF;EXONBND;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+15	102188962	bnd_11202_1	G	G]15:102202444]	248	PASS	SVTYPE=BND;MATEID=bnd_11202_2;DP=31;SPLITCNT=21;SPANCNT=10;GENE=Csad,Zfp740;GENEID=ENSMUSG00000023044,ENSMUSG00000046897;GENELOC=utr5p,upstream;EXPR=1306,3566;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+15	102202444	bnd_11202_2	C	[15:102188962[C	248	PASS	SVTYPE=BND;MATEID=bnd_11202_1;DP=31;SPLITCNT=21;SPANCNT=10;GENE=Csad,Zfp740;GENEID=ENSMUSG00000023044,ENSMUSG00000046897;GENELOC=utr5p,upstream;EXPR=1306,3566;HOMLEN=0;SPLICESCORE=4;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+16	19493776	bnd_7456_2	T	[18:30311220[T	189	PASS	SVTYPE=BND;MATEID=bnd_7456_1;DP=19;SPLITCNT=9;SPANCNT=10;GENE=Pik3c3,Olfr166;GENEID=ENSMUSG00000033628,ENSMUSG00000056822;GENELOC=coding,downstream;EXPR=1791,0;HOMLEN=0;SPLICESCORE=1;INTERCHROM
+16	37799068	bnd_4068_2	T	]1:106734547]T	233	PASS	SVTYPE=BND;MATEID=bnd_4068_1;DP=19;SPLITCNT=8;SPANCNT=11;GENE=Kdsr,Fstl1;GENEID=ENSMUSG00000009905,ENSMUSG00000022816;GENELOC=coding,intron;EXPR=775,35409;HOMLEN=2;SPLICESCORE=4;INTERCHROM;ALTSPLICE
+17	17395298	bnd_3153_1	C	C]17:17411818]	206	PASS	SVTYPE=BND;MATEID=bnd_3153_2;DP=41;SPLITCNT=16;SPANCNT=25;GENE=AC154200.1,Lix1;GENEID=ENSMUSG00000097379,ENSMUSG00000047786;GENELOC=intron,intron;EXPR=0,30;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+17	17411818	bnd_3153_2	T	[17:17395298[T	206	PASS	SVTYPE=BND;MATEID=bnd_3153_1;DP=41;SPLITCNT=16;SPANCNT=25;GENE=AC154200.1,Lix1;GENEID=ENSMUSG00000097379,ENSMUSG00000047786;GENELOC=intron,intron;EXPR=0,30;HOMLEN=0;SPLICESCORE=1;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+17	35266514	bnd_3184_1	G	G]17:35383995]	218	PASS	SVTYPE=BND;MATEID=bnd_3184_2;DP=47;SPLITCNT=40;SPANCNT=7;GENE=H2-D1,H2-Q4;GENEID=ENSMUSG00000073411,ENSMUSG00000035929;GENELOC=coding,utr3p;EXPR=23545,2145;HOMLEN=0;SPLICESCORE=4;EXONBND;ALTSPLICE;DELETION
+17	35383995	bnd_3184_2	A	[17:35266514[A	218	PASS	SVTYPE=BND;MATEID=bnd_3184_1;DP=47;SPLITCNT=40;SPANCNT=7;GENE=H2-D1,H2-Q4;GENEID=ENSMUSG00000073411,ENSMUSG00000035929;GENELOC=coding,utr3p;EXPR=23545,2145;HOMLEN=0;SPLICESCORE=4;EXONBND;ALTSPLICE;DELETION
+17	63864053	bnd_3179_1	T	T]17:63896016]	195	PASS	SVTYPE=BND;MATEID=bnd_3179_2;DP=17;SPLITCNT=10;SPANCNT=7;GENE=A930002H24Rik,Fer;GENEID=ENSMUSG00000045506,ENSMUSG00000000127;GENELOC=upstream,upstream;EXPR=10,1325;HOMLEN=0;SPLICESCORE=3;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+17	63896016	bnd_3179_2	A	[17:63864053[A	195	PASS	SVTYPE=BND;MATEID=bnd_3179_1;DP=17;SPLITCNT=10;SPANCNT=7;GENE=A930002H24Rik,Fer;GENEID=ENSMUSG00000045506,ENSMUSG00000000127;GENELOC=upstream,upstream;EXPR=10,1325;HOMLEN=0;SPLICESCORE=3;READTHROUGH;ADJACENT;ALTSPLICE;DELETION
+18	4198107	bnd_5326_2	G	]2:165827729]G	136	PASS	SVTYPE=BND;MATEID=bnd_5326_1;DP=20;SPLITCNT=7;SPANCNT=13;GENE=Zmynd8,Gm10557;GENEID=ENSMUSG00000039671,ENSMUSG00000073647;GENELOC=utr3p,downstream;EXPR=2963,0;HOMLEN=9;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+18	28188917	bnd_5160_2	A	[3:103040040[A	213	PASS	SVTYPE=BND;MATEID=bnd_5160_1;DP=63;SPLITCNT=33;SPANCNT=30;GENE=Csde1,SNORA17;GENEID=ENSMUSG00000068823,ENSMUSG00000087940;GENELOC=coding,upstream;EXPR=10681,0;HOMLEN=91;SPLICESCORE=3;INTERCHROM;ALTSPLICE
+18	30311220	bnd_7456_1	G	G]16:19493776]	189	PASS	SVTYPE=BND;MATEID=bnd_7456_2;DP=19;SPLITCNT=9;SPANCNT=10;GENE=Pik3c3,Olfr166;GENEID=ENSMUSG00000033628,ENSMUSG00000056822;GENELOC=coding,downstream;EXPR=1791,0;HOMLEN=0;SPLICESCORE=1;INTERCHROM
+18	53932206	bnd_5141_1	T	T[X:53418862[	201	PASS	SVTYPE=BND;MATEID=bnd_5141_2;DP=26;SPLITCNT=9;SPANCNT=17;GENE=Csnk1g3,Gm14584;GENEID=ENSMUSG00000073563,ENSMUSG00000083798;GENELOC=coding,intron;EXPR=2227,0;HOMLEN=266;SPLICESCORE=2;INTERCHROM;ALTSPLICE
+19	37678397	bnd_13800_1	C	C]19:37696729]	204	PASS	SVTYPE=BND;MATEID=bnd_13800_2;DP=15;SPLITCNT=3;SPANCNT=12;GENE=Exoc6,Cyp26a1;GENEID=ENSMUSG00000053799,ENSMUSG00000024987;GENELOC=intron,upstream;EXPR=575,443;HOMLEN=0;SPLICESCORE=1;ALTSPLICE;DELETION
+19	37696729	bnd_13800_2	A	[19:37678397[A	204	PASS	SVTYPE=BND;MATEID=bnd_13800_1;DP=15;SPLITCNT=3;SPANCNT=12;GENE=Exoc6,Cyp26a1;GENEID=ENSMUSG00000053799,ENSMUSG00000024987;GENELOC=intron,upstream;EXPR=575,443;HOMLEN=0;SPLICESCORE=1;ALTSPLICE;DELETION
+X	37143984	bnd_8690_2	G	]9:7872842]G	132	PASS	SVTYPE=BND;MATEID=bnd_8690_1;DP=9;SPLITCNT=4;SPANCNT=5;GENE=Birc3,Nkap;GENEID=ENSMUSG00000032000,ENSMUSG00000016409;GENELOC=utr5p,intron;EXPR=4781,556;HOMLEN=39;SPLICESCORE=1;INTERCHROM;ALTSPLICE
+X	53418862	bnd_5141_2	A	]18:53932206]A	201	PASS	SVTYPE=BND;MATEID=bnd_5141_1;DP=26;SPLITCNT=9;SPANCNT=17;GENE=Csnk1g3,Gm14584;GENEID=ENSMUSG00000073563,ENSMUSG00000083798;GENELOC=coding,intron;EXPR=2227,0;HOMLEN=266;SPLICESCORE=2;INTERCHROM;ALTSPLICE
Binary file test-data/tophat_out2h.bam has changed
--- a/tool-data/defuse.loc.sample	Mon Jan 14 12:24:28 2013 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-## Configurstion info for prepared data references for DeFuse
-## http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
-## 3 columns separated by the TAB character
-## The 3rd column has dictionary values that will be substituted in the config file for defuse
-## It should likely contain keys:   dataset_directory gene_models genome_fasta repeats_filename est_fasta est_alignments unigene_fasta
-## If this is not a Homo_sapiens reference also need keys:  gene_id_pattern transcript_id_pattern chromosomes
-
-#db_key	name	{'config_key':'config_value'}
-#hg19	GRCh37(hg19)	{'gene_id_pattern':'ENSG\d+', 'transcript_id_pattern':'ENST\d+', 'dataset_directory':'/data/genomes/Hsapiens/hg19/defuse', 'gene_models':'$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf', 'genome_fasta':'$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Hs.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
-#mm9	NCBIM37(mm9)	{'gene_id_pattern':'ENSMUSG\d+', 'transcript_id_pattern':'ENSMUST\d+', 'dataset_directory':'/data/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM37.63.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM37.63.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
-#mm8	NCBIM36(mm8)	{'gene_id_pattern':'ENSMUSG\d+', 'transcript_id_pattern':'ENSMUST\d+', 'dataset_directory':'/data/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM36.46.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM36.46.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/defuse_reference.loc.sample	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,7 @@
+## Configurstion info for prepared data references for DeFuse
+## http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.0
+## 4 columns separated by the TAB character
+## The 4th column has the path to the defuse config.txt file, it needs to have the dataset_directory set the directory path where the defuse reference data resides.
+## The defuse galaxy tool  will substitute the directory path of config.txt if the dataset_directory property is not set '__DATASET_DIRECTORY__'
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+GRCh37	GRCh37	Human GRCh37 (hg19)	/depot/GRCh37/defuse/GRCh37.config
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sun Jan 17 14:11:06 2016 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="defuse_reference" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/defuse_reference.loc" />
+    </table>
+</tables>
--- a/tool_dependencies.xml	Mon Jan 14 12:24:28 2013 -0600
+++ b/tool_dependencies.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -1,180 +1,24 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="defuse" version="0.6.0">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">http://sourceforge.net/projects/defuse/files/defuse/0.6/defuse-0.6.0.tar.gz</action>
-                <action type="shell_command">cd tools &amp;&amp; make</action>
-                <action type="move_directory_files">
-                    <source_directory>.</source_directory>
-                    <destination_directory>$INSTALL_DIR</destination_directory>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="DEFUSE_PATH" action="set_to">$INSTALL_DIR</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-deFuse code
-To build the deFuse toolset you must have the boost c++ development libraries installed. If they are not installed on your system you can download them from the boost website. A full install of boost is not required. The easiest thing to do is to download the latest boost source tar.gz, extract it, then add the extracted path to the CPLUS_INCLUDE_PATH environment variable (in bash, `export CPLUS_INCLUDE_PATH=/boost/directory/:$CPLUS_INCLUDE_PATH`)
-        </readme>
+    <package name="defuse" version="0.6.2">
+        <repository changeset_revision="5a4237bbe6bf" name="package_defuse_0_6_2" owner="jjohnson" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-
-    <package name="samtools" version="0.1.18">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">http://sourceforge.net/projects/samtools/files/samtools/0.1.18/samtools-0.1.18.tar.bz2</action>
-                <action type="shell_command">sed -i.bak -e 's/-lcurses/-lncurses/g' Makefile</action>
-                <action type="shell_command">make</action>
-                <action type="move_file">
-                    <source>samtools</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>misc/maq2sam-long</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-Compiling SAMtools requires the ncurses and zlib development libraries.
-        </readme>
+    <package name="samtools" version="0.1.19">
+        <repository changeset_revision="96aab723499f" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-
-
-    <package name="bowtie" version="0.12.7">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">http://downloads.sourceforge.net/project/bowtie-bio/bowtie/0.12.7/bowtie-0.12.7-src.zip</action>
-                <action type="shell_command">make</action>
-                <action type="move_file">
-                    <source>bowtie</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>bowtie-build</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>bowtie-inspect</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-            </action>
-            </actions>
-        </install>
-        <readme>
-            Compiling Bowtie requires libpthread to be present on your system.
-        </readme>
+    <package name="bowtie" version="1.0.0">
+        <repository changeset_revision="9fcaaedbbfd6" name="package_bowtie_1_0_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-
-    <package name="gmap" version="2012-07-20">
-        <install version="1.0">
-            <actions>
-	        <action type="download_by_url" target_filename="gmap-2012-07-20.tar.gz">http://research-pub.gene.com/gmap/src/gmap-gsnap-2012-07-20.v2.tar.gz</action>
-                <action type="shell_command">./configure</action>
-                <action type="shell_command">make</action>
-                <action type="move_file">
-                    <source>src/gmap</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/gmapindex</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/gsnap</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/uniqscan</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/iit_store</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/iit_get</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/atoiindex</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/snpindex</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/cmetindex</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>src/get-genome</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_directory_files">
-                    <source_directory>util</source_directory>
-                    <destination_directory>$INSTALL_DIR/bin</destination_directory>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-        </readme>
+    <package name="gmap" version="2013-05-09">
+        <repository changeset_revision="953f5eb53593" name="package_gmap_2013_05_09" owner="jjohnson" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="blat" version="35x1">
+        <repository changeset_revision="cc0f4b49b6f1" name="package_blat_35x1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-
-    <package name="blat" version="34x10">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/blat</action>
-                <action type="shell_command">chmod 755 blat</action>
-                <action type="move_file">
-                    <source>blat</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-This only handles blat for a non-commercial linux system.
-
-Please note that the Blat source and executables are freely available for
-academic, nonprofit and personal use. Commercial licensing information is
-available on the Kent Informatics website (http://www.kentinformatics.com/).
-        </readme>
+    <package name="R" version="3.1.2">
+        <repository changeset_revision="c987143177d4" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-
-    <package name="fatotwobit" version="34x10">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/faToTwoBit</action>
-                <action type="shell_command">chmod 755 faToTwoBit</action>
-                <action type="move_file">
-                    <source>faToTwoBit</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-This only handles faToTwoBit for a non-commercial linux system.
-
-Please note that the source and executables are freely available for
-academic, nonprofit and personal use. Commercial licensing information is
-available on the Kent Informatics website (http://www.kentinformatics.com/).
-        </readme>
+    <package name="ada" version="2.0.3">
+        <repository changeset_revision="f0e6af8a95e5" name="package_r_ada_2_0_3" owner="jjohnson" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-
 </tool_dependency>