changeset 2:4245c2b047de

Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
author Jim Johnson <jj@umn.edu>
date Tue, 08 Nov 2011 13:56:35 -0600
parents 36306d8086fa
children c90022a13c7c
files README defuse-0.4.2.tar.gz defuse-0.4.3.tar.gz defuse.xml modified_scripts.tgz tool-data/defuse.loc.sample
diffstat 6 files changed, 34 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/README	Fri Sep 16 12:41:37 2011 -0500
+++ b/README	Tue Nov 08 13:56:35 2011 -0600
@@ -1,33 +1,29 @@
-The DeFuse galaxy tool is based on DeFuse_Version_0.4.2
+The DeFuse galaxy tool is based on DeFuse_Version_0.4.3
   http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
 
 DeFuse is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.
 
 
 Manual:
-  http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
+  http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.3
 
 
-The included defuse source code is from: http://sourceforge.net/projects/defuse/files/defuse/0.4/defuse-0.4.2.tar.gz/download 
-(without the defuse-0.4.2 dir level)
-tar zxf defuse-0.4.2.tar.gz
-cd tool 
+The included defuse source code is from:   http://sourceforge.net/projects/defuse/files/defuse/0.4/defuse-0.4.3.tar.gz/download 
+(to install uncompress the defuse source and navigate to the tools dir and type make)
+tar zxf defuse-0.4.3.tar.gz
+cd defuse-0.4.3/tool 
 make
-cd ..
+cd ../..
 
-To use with non human genome references:
-tar zxf modified_scripts.tgz
-Defuse source was modified to include 2 extra parameters for non human references: gene_id_pattern and transcript_id_pattern
-	scripts/alignjob.pl
-	scripts/annotate_fusions.pl
-	scripts/calculate_expression_simple.pl
-	scripts/filter_bulk_fusion_reads.pl
-	scripts/filter_sam_genes.pl
-	scripts/find_concordant_ensembl.pl
-	scripts/find_gene_clusters.pl
+Generate Reference Datasets as described in the Manual: 
+
+  The manual has detailed instructions on how to set up reference datasets for Human hg19 and hg18.  
+  We were able to follow the same basic procedures to set up a reference for Mouse mm9.
+
+  These datasets should be referenced in the tool-data/defuse.loc file.   
 
 
 The defuse.xml galaxy tool wrapper will generate a defuse config.txt using values from tool-data/defuse.loc
-and call scripts/defuse.pl
+and call defuse-0.4.3/scripts/defuse.pl
 
 
Binary file defuse-0.4.2.tar.gz has changed
Binary file defuse-0.4.3.tar.gz has changed
--- a/defuse.xml	Fri Sep 16 12:41:37 2011 -0500
+++ b/defuse.xml	Tue Nov 08 13:56:35 2011 -0600
@@ -1,10 +1,14 @@
-<tool id="defuse" name="DeFuse" version="1.0">
+<tool id="defuse" name="DeFuse" version="1.1">
  <description>identify fusion transcripts</description>
  <requirements>
   <requirement type="binary"></requirement>
  </requirements>
  <command interpreter="perl">
-  scripts/defuse.pl
+  ## Find the defuse.pl in the galaxy tool path
+  #import Cheetah.FileUtils
+  #set $toolpath = '/'.join([$__root_dir__,'tools','defuse'])
+  #set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0]
+  $defuse
   -c `cp $defuse_config $config_txt; echo $defuse_config`
   -d `mkdir -p data_dir; ln -s $left_pairendreads data_dir/reads_1.fastq; ln -s $right_pairendreads data_dir/reads_2.fastq; echo data_dir`
   -o  output_dir -p 8
@@ -86,20 +90,7 @@
 #
 # At a minimum, change all values enclused by []
 #
-# Gene/Transcript id pattern
-gene_id_pattern = #slurp
-#try
-$ref_dict['gene_id_pattern']
-transcript_id_pattern = #slurp
-#except
-ENSG\d+
-#end try
-#try
-$ref_dict['transcript_id_pattern']
-#except
-ENST\d+
-#end try
-                      
+
 # Directory where the defuse code was unpacked
 ## Default location in the tool/defuse directory  
 # source_directory = ${__root_dir__}/tools/defuse
@@ -107,7 +98,15 @@
 #try
 $ref_dict['source_directory']
 #except
-${__root_dir__}/tools/defuse
+#try
+## Try to find the defuse source dir in the galaxy tool path
+#import Cheetah.FileUtils
+#set $toolpath = '/'.join([$__root_dir__,'tools','defuse'])
+#set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0]
+$defuse.replace('/scripts/defuse.pl','')
+#except
+${__root_dir__}/tools/defuse/defuse
+#end try
 #end try
 
 # Directory where you want your dataset
Binary file modified_scripts.tgz has changed
--- a/tool-data/defuse.loc.sample	Fri Sep 16 12:41:37 2011 -0500
+++ b/tool-data/defuse.loc.sample	Tue Nov 08 13:56:35 2011 -0600
@@ -1,10 +1,9 @@
-## Configurstion info for prepared data references for DeFuse
+## Configurstion info for prepared data references for DeFuse Version 0.4.3 
+#  http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
 ## http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
 ## 3 columns separated by the TAB character
 ## The 3rd column has dictionary values that will be substituted in the config file for defuse
 ## It should likely contain keys:   dataset_directory gene_models genome_fasta repeats_filename est_fasta est_alignments unigene_fasta
-## If this is not a Homo_sapiens reference also need keys:  gene_id_pattern transcript_id_pattern chromosomes
-
 #db_key	name	{'config_key':'config_value'}
-hg19	GRCh37(hg19)	{'samtools_bin':'/soft/samtools/0.1.12a/bin/samtools', 'gene_id_pattern':'ENSG\d+', 'transcript_id_pattern':'ENST\d+', 'dataset_directory':'/project/db/genomes/Hsapiens/hg19/defuse', 'gene_models':'$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf', 'genome_fasta':'$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Hs.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
-mm9	NCBIM37(mm9)	{'samtools_bin':'/soft/samtools/0.1.12a/bin/samtools', 'gene_id_pattern':'ENSMUSG\d+', 'transcript_id_pattern':'ENSMUST\d+', 'dataset_directory':'/project/db/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM37.63.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM37.63.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
+hg19	GRCh37(hg19)	{'samtools_bin':'/soft/samtools/0.1.8/bin/samtools', 'dataset_directory':'/project/db/genomes/Hsapiens/hg19/defuse', 'gene_models':'$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf', 'genome_fasta':'$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Hs.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
+mm9	NCBIM37(mm9)	{'samtools_bin':'/soft/samtools/0.1.8/bin/samtools', 'dataset_directory':'/project/db/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM37.63.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM37.63.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}