# HG changeset patch # User Jim Johnson # Date 1357327743 21600 # Node ID 679a5c7b12943158f9f4e6b008f47b9823f9be41 # Parent c90022a13c7cee1416afdcfc09ed4d4183f04fc0 deFuse version 0.5.0 - Use tool_dependencies.xml diff -r c90022a13c7c -r 679a5c7b1294 README --- a/README Fri Jan 06 16:06:17 2012 -0600 +++ b/README Fri Jan 04 13:29:03 2013 -0600 @@ -1,29 +1,35 @@ -The DeFuse galaxy tool is based on DeFuse_Version_0.4.3 - http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page +The DeFuse galaxy tool is based on DeFuse_Version_0.5.0 +http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page DeFuse is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. Manual: - http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.3 +http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 + +The included tool_dependencies.xml will download and install the defuse code. +It will set the environment variable: "DEFUSE_PATH" to the location of the defuse install. -The included defuse source code is from: http://sourceforge.net/projects/defuse/files/defuse/0.4/defuse-0.4.3.tar.gz/download -(to install uncompress the defuse source and navigate to the tools dir and type make) -tar zxf defuse-0.4.3.tar.gz -cd defuse-0.4.3/tool -make -cd ../.. +The defuse.pl command relies on a configuration file to specifiy options, the location of reference data, and other applications that it depends upon: bowtie, bowtie-build, samtools, blat, fatotwobit, R, and Rscript. + +The DeFuse galaxy tool can either construct the config.txt file that is mentioned in the defuse manual, or select an existing config.txt file in the users history. +When constructing the config.txt file, the DeFuse tool uses the values selected in: tool-data/defuse.loc +The dictionary field in the tool-data/defuse.loc can be used to set fields in the config.txt file, including the site specific location of reference data and the paths to the other application binaries. +The "Defuse parameter settings" are used to alter options in the config.txt file. + +The DeFuse galaxy tool also generates a bash script to run defuse. +That script will attempt to edit the config.txt file to specifiy any unset paths to applications that defuse relies upon: +bowtie, bowtie-build, samtools, blat, fatotwobit, R, and Rscript +The script uses the using the shell "which" command to discover the application path, so the required applications should in PATH environment variable. + Generate Reference Datasets as described in the Manual: - The manual has detailed instructions on how to set up reference datasets for Human hg19 and hg18. - We were able to follow the same basic procedures to set up a reference for Mouse mm9. +The manual has detailed instructions on how to set up reference datasets for Human hg19 and hg18. +We were able to follow the same basic procedures to set up a reference for Mouse mm9. - These datasets should be referenced in the tool-data/defuse.loc file. +These datasets should be referenced in the tool-data/defuse.loc file. -The defuse.xml galaxy tool wrapper will generate a defuse config.txt using values from tool-data/defuse.loc -and call defuse-0.4.3/scripts/defuse.pl - diff -r c90022a13c7c -r 679a5c7b1294 defuse-0.4.3.tar.gz Binary file defuse-0.4.3.tar.gz has changed diff -r c90022a13c7c -r 679a5c7b1294 defuse.xml --- a/defuse.xml Fri Jan 06 16:06:17 2012 -0600 +++ b/defuse.xml Fri Jan 04 13:29:03 2013 -0600 @@ -1,7 +1,10 @@ - + identify fusion transcripts - + defuse + bowtie + blat + fatotwobit /bin/bash $shscript @@ -69,7 +72,7 @@ - + @@ -91,15 +94,7 @@ #try $ref_dict['source_directory'] #except -#try -## Try to find the defuse source dir in the galaxy tool path -#import Cheetah.FileUtils -#set $toolpath = '/'.join([$__root_dir__,'tools','defuse']) -#set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0] -$defuse.replace('/scripts/defuse.pl','') -#except -${__root_dir__}/tools/defuse/defuse -#end try +__DEFUSE_PATH__ #end try # Directory where you want your dataset @@ -159,37 +154,37 @@ #try $ref_dict['bowtie_bin'] #except -/soft/bowtie/0.12.7/bowtie +__BOWTIE_BIN__ #end try bowtie_build_bin = #slurp #try $ref_dict['bowtie_build_bin'] #except -/soft/bowtie/0.12.7/bowtie-build +__BOWTIE_BUILD_BIN__ #end try blat_bin = #slurp #try $ref_dict['blat_bin'] #except -/soft/blat/34/bin/blat +__BLAT_BIN__ #end try fatotwobit_bin = #slurp #try $ref_dict['fatotwobit_bin'] #except -/soft/blat/34/bin/faToTwoBit +__FATOTWOBIT_BIN__ #end try r_bin = #slurp #try $ref_dict['r_bin'] #except -/project/sdml-sles11-weblocal/R-2.12.1/bin/R +__R_BIN__ #end try rscript_bin = #slurp #try $ref_dict['rscript_bin'] #except -/project/sdml-sles11-weblocal/R-2.12.1/bin/Rscript +__RSCRIPT_BIN__ #end try #raw @@ -499,14 +494,12 @@ #!/bin/bash ## define some things for cheetah proccessing #set $ds = chr(36) +#set $amp = chr(38) #set $gt = chr(62) #set $lt = chr(60) #set $echo_cmd = 'echo' ## Find the defuse.pl in the galaxy tool path #import Cheetah.FileUtils -#set $toolpath = '/'.join([$__root_dir__,'tools','defuse']) -#set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0] -#set $get_reads = $Cheetah.FileUtils.findFiles($toolpath,['get_reads.pl'],[],['tools','external','include','em','data'])[0] ## declare a bash function for converting a results tsv into html with links to the get_reads output files results2html() { rlts=${ds}1 @@ -531,10 +524,21 @@ for i in `awk '${ds}1 ~ /[1-9][0-9]*/{print ${ds}1}' ${ds}rlts`; do fn=cluster_${ds}{i}_reads.txt; pn=${ds}_EFP/${ds}fn; - perl $get_reads -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn; + perl \${DEFUSE_PATH}/scripts/get_reads.pl -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn; done fi } +## substitute pathnames into config file +if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi +if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi +if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi +if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi +if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi +if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi +if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi +if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi + + ## copy config to output cp $defuse_config $config_txt ## make a data_dir and ln -s the input fastq @@ -549,7 +553,7 @@ mkdir -p output_dir #end if ## run defuse.pl -perl $defuse -c $defuse_config -d data_dir -o output_dir -p 8 +perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -d data_dir -o output_dir -p 8 ## copy primary results to output datasets if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi diff -r c90022a13c7c -r 679a5c7b1294 tool-data/defuse.loc.sample --- a/tool-data/defuse.loc.sample Fri Jan 06 16:06:17 2012 -0600 +++ b/tool-data/defuse.loc.sample Fri Jan 04 13:29:03 2013 -0600 @@ -1,9 +1,11 @@ -## Configurstion info for prepared data references for DeFuse Version 0.4.3 -# http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page +## Configurstion info for prepared data references for DeFuse ## http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 ## 3 columns separated by the TAB character ## The 3rd column has dictionary values that will be substituted in the config file for defuse ## It should likely contain keys: dataset_directory gene_models genome_fasta repeats_filename est_fasta est_alignments unigene_fasta +## If this is not a Homo_sapiens reference also need keys: gene_id_pattern transcript_id_pattern chromosomes + #db_key name {'config_key':'config_value'} -hg19 GRCh37(hg19) {'samtools_bin':'/soft/samtools/0.1.8/bin/samtools', 'dataset_directory':'/project/db/genomes/Hsapiens/hg19/defuse', 'gene_models':'$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf', 'genome_fasta':'$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Hs.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'} -mm9 NCBIM37(mm9) {'samtools_bin':'/soft/samtools/0.1.8/bin/samtools', 'dataset_directory':'/project/db/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM37.63.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM37.63.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'} +#hg19 GRCh37(hg19) {'gene_id_pattern':'ENSG\d+', 'transcript_id_pattern':'ENST\d+', 'dataset_directory':'/data/genomes/Hsapiens/hg19/defuse', 'gene_models':'$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf', 'genome_fasta':'$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Hs.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'} +#mm9 NCBIM37(mm9) {'gene_id_pattern':'ENSMUSG\d+', 'transcript_id_pattern':'ENSMUST\d+', 'dataset_directory':'/data/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM37.63.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM37.63.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'} +#mm8 NCBIM36(mm8) {'gene_id_pattern':'ENSMUSG\d+', 'transcript_id_pattern':'ENSMUST\d+', 'dataset_directory':'/data/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM36.46.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM36.46.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'} diff -r c90022a13c7c -r 679a5c7b1294 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jan 04 13:29:03 2013 -0600 @@ -0,0 +1,21 @@ + + + + + + http://sourceforge.net/projects/defuse/files/defuse/0.5/defuse-0.5.0.tar.gz + cd tools && make + + . + $INSTALL_DIR + + + $INSTALL_DIR + + + + + + + +