Mercurial > repos > jjohnson > defuse

diff defuse.xml @ 11:b22f8634ff84 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/defuse commit 23b94b5747c6956360cd2eca0a07a669929ea141-dirty
author: jjohnson
date: Sun, 17 Jan 2016 14:11:06 -0500
parents: f65857c1b92e
children: 4fe2e80d4ae1
--- a/defuse.xml	Mon Jan 14 12:24:28 2013 -0600
+++ b/defuse.xml	Sun Jan 17 14:11:06 2016 -0500
@@ -1,103 +1,150 @@
-<tool id="defuse" name="DeFuse" version="1.6">
- <description>identify fusion transcripts</description>
- <requirements>
-  <requirement type="package" version="0.6.0">defuse</requirement>
-  <requirement type="package" version="0.1.18">samtools</requirement>
-  <requirement type="package" version="0.12.7">bowtie</requirement>
-  <requirement type="package" version="2012-07-20">gmap</requirement>
-  <requirement type="package" version="34x10">blat</requirement>
-  <requirement type="package" version="34x10">fatotwobit</requirement>
- </requirements>
+<tool id="defuse" name="DeFuse" version="@DEFUSE_VERSION@.1">
+    <description>identify fusion transcripts</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="defuse_requirement" />
+        <expand macro="mapping_requirements" />
+        <expand macro="r_requirements" />
+    </requirements>
   <command interpreter="command"> /bin/bash $shscript </command>
  <inputs>
   <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads.  (FASTQ interlacer will pair reads and remove the unpaired.   FASTQ de-interlacer will separate the result into left and right reads.)"/>
   <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/>
+  <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column">
+    <validator type="length" min="1"/>
+  </param>
   <conditional name="refGenomeSource">
-      <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">
-        <option value="indexed">Use a built-in DeFuse Reference Dataset</option>
-        <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>
+    <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">
+      <option value="indexed">Use a built-in DeFuse Reference Dataset</option>
+      <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>
+    </param>
+    <when value="indexed">
+      <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
+        <options from_file="defuse_reference.loc">
+          <column name="name" index="1"/>
+          <column name="value" index="3"/>
+          <filter type="sort_by" column="0" />
+          <validator type="no_options" message="No indexes are available" />
+        </options>
+      </param>
+    </when>
+    <when value="history">
+      <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/>
+    </when>  <!-- history -->
+  </conditional>  <!-- refGenomeSource -->
+  <conditional name="defuse_param">
+    <param name="settings" type="select" label="Defuse parameter settings" help="">
+      <option value="preSet">Default settings</option>
+      <option value="full">Full parameter list</option>
+    </param>
+    <when value="preSet" />
+    <when value="full">
+      <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
+      <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
+      <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
+      <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help="">
+        <option value="">Use Default</option>
+        <option value="no">no</option>
+        <option value="yes">yes</option>
       </param>
-      <when value="indexed">
-        <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
-          <options from_file="defuse.loc">
-            <column name="name" index="1"/>
-            <column name="value" index="2"/>
-            <filter type="sort_by" column="0" />
-            <validator type="no_options" message="No indexes are available" />
-          </options>
-        </param>
-        <conditional name="defuse_param">
-          <param name="settings" type="select" label="Defuse parameter settings" help="">
-            <option value="preSet">Default settings</option>
-            <option value="full">Full parameter list</option>
-          </param>
-          <when value="preSet" />
-          <when value="full">
-            <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
-            <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
-            <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
-            <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
-              <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
-            </param>
-            <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
-            <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" />
-            <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
-              <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
-            </param>
-            <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" />
-            <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" />
-            <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
-            <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio">
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
-            <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
-              <help>Position density when calculating covariance</help>
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
-              <option value="">Use Default</option>
-              <option value="no">no</option>
-              <option value="yes">yes</option>
-            </param>
-            <!--
-              <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
-            -->
-          </when> <!-- full -->
-        </conditional>  <!-- defuse_param -->
-      </when>
-      <when value="history">
-        <param name="config" type="data" format="txt" label="Defuse Config file" help=""/>
-      </when>  <!-- history -->
-  </conditional>  <!-- refGenomeSource -->
+      <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
+        <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
+      </param>
+      <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
+      <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
+        <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
+      </param>
+      <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
+      <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
+      <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
+        <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
+      </param>
+      <param name="multi_exon_transcripts_stats" type="select" label="Use multiple exon transcripts for stats calculations" help="should be enabled for very small libraries">
+        <option value="no" select="true">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
+        <help>Position density when calculating covariance</help>
+        <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
+      </param>
+      <param name="max_paired_alignments" type="integer" value="10" optional="true" label="max_paired_alignments">
+        <help>Maximum number of alignments for a read pair, Pairs with more alignments are filtered, default is 10</help>
+        <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="1" max="100"/>
+      </param>
+      <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
+        <option value="">Use Default</option>
+        <option value="no">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <!--
+        <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
+      -->
+      <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" />
+    </when> <!-- full -->
+  </conditional>  <!-- defuse_param -->
+  <param name="breakpoints_bam" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/>
   <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" 
          help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, 
                but they require considerable diskspace, and should be deleted and purged when no longer needed."/>
-  <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
+  <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
  </inputs>
+ <stdio>
+   <exit_code range="1:"  level="fatal" description="Error Running Defuse" />
+ </stdio>
  <outputs>
   <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
   <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" />
   <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)">
     <filter>keep_output == True</filter>
   </data>
-  <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" />
-  <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" />
-  <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" />
+  <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" />
+  <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" />
   <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads">
     <filter>do_get_reads == True</filter>
   </data>
+  <data format="bam" name="fusions_bam" label="${tool.name} on ${on_string}: fusions.bam">
+    <filter>breakpoints_bam == True</filter>
+  </data>
+  <!--
+   expression_plot
+   circos plot
+  -->
  </outputs>
  <configfiles>
   <configfile name="defuse_config">
-#import ast
+#import re
+#set $ds = chr(36)
 #if $refGenomeSource.genomeSource == "history":
-#include raw $refGenomeSource.config.__str__
+#set config_file = $refGenomeSource.config.__str__
 #else 
-#set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value))
+#set config_file = $refGenomeSource.index.value
+#end if
+#set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$'
+#set fh = open($config_file)
+#set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources']
+#set kv = []
+#for $line in $fh:
+  #set m = $re.match($pat,$line)
+  #if $m and len($m.groups()) == 2:
+    ## #echo $line
+    #if $m.groups()[0] in keys:
+      #set k = $m.groups()[0]
+      #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed":
+        ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory
+        #set v = $os.path.dirname($config_file)
+      #else:
+        #set v = $m.groups()[1]
+      #end if
+      #set kv = $kv + [[$k, $v]]
+    #end if
+  #end if
+#end for
+## #echo $kv
+#set ref_dict = dict($kv)
+## #echo $ref_dict
+## include raw $refGenomeSource.config.__str__
 #
 # Configuration file for defuse
 #
@@ -107,12 +154,7 @@
 # Directory where the defuse code was unpacked
 ## Default location in the tool/defuse directory  
 # source_directory = ${__root_dir__}/tools/defuse
-source_directory = #slurp
-#try
-$ref_dict['source_directory']
-#except
-__DEFUSE_PATH__
-#end try
+source_directory = __DEFUSE_PATH__
 
 # Directory where you want your dataset
 dataset_directory = #slurp
@@ -122,18 +164,68 @@
 /project/db/genomes/Hsapiens/hg19/defuse
 #end try
 
+# Organism IDs
+ensembl_organism = #slurp
+#try
+$ref_dict['ensembl_organism']
+#except
+homo_sapiens
+#end try
+
+ensembl_prefix = #slurp
+#try
+$ref_dict['ensembl_prefix']
+#except
+Homo_sapiens
+#end try
+
+ensembl_version = #slurp
+#try
+$ref_dict['ensembl_version']
+#except
+71
+#end try
+
+ensembl_genome_version = #slurp
+#try
+$ref_dict['ensembl_genome_version']
+#except
+GRCh37
+#end try
+
+ucsc_genome_version = #slurp
+#try
+$ref_dict['ucsc_genome_version']
+#except
+hg19
+#end try
+
+ncbi_organism = #slurp
+#try
+$ref_dict['ncbi_organism']
+#except
+Homo_sapiens
+#end try
+
+ncbi_prefix = #slurp
+#try
+$ref_dict['ncbi_prefix']
+#except
+Hs
+#end try
+
 # Input genome and gene models
 gene_models = #slurp
 #try
 $ref_dict['gene_models']
 #except
-\$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf
+\$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).gtf
 #end try
 genome_fasta = #slurp
 #try
 $ref_dict['genome_fasta']
 #except
-\$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa
+\$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).dna.chromosomes.fa
 #end try
 
 # Repeat table from ucsc genome browser
@@ -163,71 +255,28 @@
 #try
 $ref_dict['unigene_fasta']
 #except
-\$(dataset_directory)/Hs.seq.uniq
+\$(dataset_directory)/\$(ncbi_prefix).seq.uniq
 #end try
 
 # Paths to external tools
-bowtie_bin = #slurp
-#try
-$ref_dict['bowtie_bin']
-#except
-__BOWTIE_BIN__
-#end try
-bowtie_build_bin = #slurp
-#try
-$ref_dict['bowtie_build_bin']
-#except
-__BOWTIE_BUILD_BIN__
-#end try
-blat_bin = #slurp
-#try
-$ref_dict['blat_bin']
-#except
-__BLAT_BIN__
-#end try
-fatotwobit_bin = #slurp
-#try
-$ref_dict['fatotwobit_bin']
-#except
-__FATOTWOBIT_BIN__
-#end try
-gmap_bin = #slurp
-#try
-$ref_dict['gmap_bin']
-#except
-__GMAP_BIN__
-#end try
-gmap_bin = #slurp
-#try
-$ref_dict['gmap_bin']
-#except
-__GMAP_BIN__
-#end try
-gmap_setup_bin = #slurp
-#try
-$ref_dict['gmap_setup_bin']
-#except
-__GMAP_SETUP_BIN__
-#end try
-r_bin = #slurp
-#try
-$ref_dict['r_bin']
-#except
-__R_BIN__
-#end try
-rscript_bin = #slurp
-#try
-$ref_dict['rscript_bin']
-#except
-__RSCRIPT_BIN__
-#end try
+bowtie_bin = __BOWTIE_BIN__
+bowtie_build_bin = __BOWTIE_BUILD_BIN__
+blat_bin = __BLAT_BIN__
+fatotwobit_bin = __FATOTWOBIT_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_setup_bin = __GMAP_SETUP_BIN__
+r_bin = __R_BIN__
+rscript_bin = __RSCRIPT_BIN__
 
 # Directory where you want your dataset
 gmap_index_directory = #slurp
 #try
 $ref_dict['gmap_index_directory']
 #except
-\$(dataset_directory)/gmap
+#raw
+$(dataset_directory)/gmap
+#end raw
 #end try
 
 #raw
@@ -282,9 +331,15 @@
 #except
 --phred33-quals
 #end try
+bowtie_params = #slurp
+#try
+$ref_dict['bowtie_params']
+#except
+--chunkmbs 200
+#end try
 max_insert_size = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "":
-$refGenomeSource.defuse_param.max_insert_size
+#if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "":
+$defuse_param.max_insert_size
 #else
 #try
 $ref_dict['max_insert_size']
@@ -335,8 +390,8 @@
 
 # Minimum gene fusion range
 dna_concordant_length = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "":
-$refGenomeSource.defuse_param.dna_concordant_length
+#if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "":
+$defuse_param.dna_concordant_length
 #else
 #try
 $ref_dict['dna_concordant_length']
@@ -347,8 +402,8 @@
 
 # Trim length for discordant reads (split reads are not trimmed)
 discord_read_trim = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "":
-$refGenomeSource.defuse_param.discord_read_trim
+#if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "":
+$defuse_param.discord_read_trim
 #else
 #try
 $ref_dict['discord_read_trim']
@@ -356,11 +411,21 @@
 50
 #end try
 #end if
-
+# Calculate extra annotations, fusion splice index and interrupted index
+calculate_extra_annotations = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "":
+$defuse_param.calculate_extra_annotations
+#else
+#try
+$ref_dict['calculate_extra_annotations']
+#except
+no
+#end try
+#end if
 # Filtering parameters
 clustering_precision = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != ""
-$refGenomeSource.defuse_param.clustering_precision
+#if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != ""
+$defuse_param.clustering_precision
 #else
 #try
 $ref_dict['clustering_precision']
@@ -369,8 +434,8 @@
 #end try
 #end if
 span_count_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != ""
-$refGenomeSource.defuse_param.span_count_threshold
+#if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != ""
+$defuse_param.span_count_threshold
 #else
 #try
 $ref_dict['span_count_threshold']
@@ -378,19 +443,9 @@
 5
 #end try
 #end if
-split_count_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != ""
-$refGenomeSource.defuse_param.split_count_threshold
-#else
-#try
-$ref_dict['split_count_threshold']
-#except
-3
-#end try
-#end if
 percent_identity_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != ""
-$refGenomeSource.defuse_param.percent_identity_threshold
+#if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != ""
+$defuse_param.percent_identity_threshold
 #else
 #try
 $ref_dict['percent_identity_threshold']
@@ -398,29 +453,9 @@
 0.90
 #end try
 #end if
-max_dist_pos = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != ""
-$refGenomeSource.defuse_param.max_dist_pos
-#else
-#try
-$ref_dict['max_dist_pos']
-#except
-600
-#end try
-#end if
-num_dist_genes = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != ""
-$refGenomeSource.defuse_param.num_dist_genes
-#else
-#try
-$ref_dict['num_dist_genes']
-#except
-500
-#end try
-#end if
 split_min_anchor = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != ""
-$refGenomeSource.defuse_param.split_min_anchor
+#if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != ""
+$defuse_param.split_min_anchor
 #else
 #try
 $ref_dict['split_min_anchor']
@@ -428,19 +463,9 @@
 4
 #end try
 #end if
-max_concordant_ratio = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != ""
-$refGenomeSource.defuse_param.max_concordant_ratio
-#else
-#try
-$ref_dict['max_concordant_ratio']
-#except
-0.1
-#end try
-#end if
 splice_bias = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != ""
-$refGenomeSource.defuse_param.splice_bias
+#if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != ""
+$defuse_param.splice_bias
 #else
 #try
 $ref_dict['splice_bias']
@@ -449,8 +474,8 @@
 #end try
 #end if
 denovo_assembly = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != ""
-$refGenomeSource.defuse_param.denovo_assembly
+#if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != ""
+$defuse_param.denovo_assembly
 #else
 #try
 $ref_dict['denovo_assembly']
@@ -459,8 +484,8 @@
 #end try
 #end if
 probability_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != ""
-$refGenomeSource.defuse_param.probability_threshold
+#if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != ""
+$defuse_param.probability_threshold
 #else
 #try
 $ref_dict['probability_threshold']
@@ -470,10 +495,23 @@
 #end if
 positive_controls                           = \$(data_directory)/controls.txt
 
+# Use multiple exon transcripts for stats calculations (yes/no)
+# should be enabled for very small libraries
+multi_exon_transcripts_stats = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.multi_exon_transcripts_stats.__str__ != ""
+$defuse_param.multi_exon_transcripts_stats
+#else
+#try
+$ref_dict['multi_exon_transcripts_stats']
+#except
+no
+#end try
+#end if
+
 # Position density when calculating covariance
 covariance_sampling_density = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != ""
-$refGenomeSource.defuse_param.covariance_sampling_density
+#if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != ""
+$defuse_param.covariance_sampling_density
 #else
 #try
 $ref_dict['covariance_sampling_density']
@@ -482,12 +520,30 @@
 #end try
 #end if
 
+# Maximum number of alignments for a read pair
+# Pairs with more alignments are filtered
+max_paired_alignments = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.max_paired_alignments.__str__ != ""
+$defuse_param.max_paired_alignments
+#else
+#try
+$ref_dict['max_paired_alignments']
+#except
+10
+#end try
+#end if
 
 # Number of reads for each job in split
-reads_per_job                               = 1000000
-
-# Number of regions for each breakpoint sequence job in split
-regions_per_job                             = 20
+reads_per_job = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != ""
+$defuse_param.reads_per_job
+#else
+#try
+$ref_dict['reads_per_job']
+#except
+1000000
+#end try
+#end if
 
 #raw
 # If you have command line 'mail' and wish to be notified
@@ -497,40 +553,10 @@
 remove_job_files                            = yes
 remove_job_temp_files                       = yes
 
-# Converting to fastq
-# Fastq converter config format 1 for reads stored in separate files for each end
-#  data_lane_rexex_N is a perl regex which stores the lane id in $1
-#  data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1
-#  data_compress_regex_N is a perl regex which stores the compression extension in $1
-#  data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout
-# Fastq converter config format 2 for reads stored in separate files for each end
-#  data_lane_regex_N is a perl regex which stores the lane id in $1
-#  data_compress_regex_N is a perl regex which stores the compression extension in $1
-#  data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout
-#  data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout
+qsub_params                                 = ""
 
-data_lane_regex_1                           = ^(.+)_[12]_export\.txt.*$
-data_end_regex_1                            = ^.+_([12])_export\.txt.*$
-data_compress_regex_1                       = ^.+_[12]_export\.txt(.*)$
-data_converter_1                            = $(scripts_directory)/fq_all2std.pl export2std
-
-data_lane_regex_2                           = ^(.+)_[12]_concat_qseq\.txt.*$
-data_end_regex_2                            = ^.+_([12])_concat_qseq\.txt.*$
-data_compress_regex_2                       = ^.+_[12]_concat_qseq\.txt(.*)$
-data_converter_2                            = $(scripts_directory)/qseq2fastq.pl
-
-data_lane_regex_3                           = ^(.+)\.bam.*$
-data_compress_regex_3                       = ^.+\.bam(.*)$
-data_end1_converter_3                       = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl
-data_end2_converter_3                       = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl
-
-data_lane_regex_4                           = ^(.+).[12].fastq.*$
-data_end_regex_4                            = ^.+.([12]).fastq.*$
-data_compress_regex_4                       = ^.+.[12].fastq(.*)$
-data_converter_4                            = cat
 #end raw
 
-#end if
 
   </configfile>
   <configfile name="shscript">
@@ -588,29 +614,42 @@
 cp $defuse_config $config_txt
 ## make a data_dir  and ln -s the input fastq
 mkdir -p data_dir
-ln -s $left_pairendreads data_dir/reads_1.fastq
-ln -s $right_pairendreads data_dir/reads_2.fastq
+## ln -s "$left_pairendreads" data_dir/reads_1.fastq
+## ln -s "$right_pairendreads" data_dir/reads_2.fastq
+cp "$left_pairendreads" data_dir/reads_1.fastq
+cp "$right_pairendreads" data_dir/reads_2.fastq
 ## ln to output_dir in from_work_dir
 #if $defuse_out.__str__ != 'None':
-mkdir -p $defuse_out.extra_files_path
-ln -s $defuse_out.extra_files_path  output_dir
+mkdir -p $defuse_out.dataset.extra_files_path
+ln -s $defuse_out.dataset.extra_files_path  output_dir
 #else
 mkdir -p output_dir
 #end if
 ## run defuse.pl
-perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -d data_dir -o output_dir  -p 8
+perl \${DEFUSE_PATH}/scripts/defuse.pl -name "$library_name" -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir  -p \$GALAXY_SLOTS
 ## copy primary results to output datasets
 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi
-if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
+## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi
 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi
+#if $breakpoints_bam:
+if [ -e output_dir/results.filtered.tsv ] ${amp}${amp}  [ -e output_dir/breakpoints.genome.psl ]
+then
+  awk "\\$10 ~ /^(`awk '\\$1 ~ /[0-9]+/{print \\$1}' output_dir/results.filtered.tsv | tr '\n' '|'`)\\$/{print \\$0}" output_dir/breakpoints.genome.psl > breakpoints.genome.filtered.psl ${amp}${amp}
+  psl2sam.pl breakpoints.genome.filtered.psl > breakpoints.genome.filtered.sam ${amp}${amp}
+  samtools view -b -T /panfs/roc/rissdb/galaxy/genomes/NCBIM37/defuse/defuse.reference.fa -o breakpoints.genome.filtered.bam breakpoints.genome.filtered.sam ${amp}${amp}
+  samtools sort breakpoints.genome.filtered.bam breakpoints ${amp}${amp}
+  ## samtools index breakpoints.bam
+  cp breakpoints.bam $fusions_bam
+fi
+#end if
 ## create html with links for output_dir
 #if $defuse_out.__str__ != 'None':
 if [ -e $defuse_out ]
 then
   echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out
   echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt}  $defuse_out
-  pushd $defuse_out.extra_files_path
+  pushd $defuse_out.dataset.extra_files_path
   for f in `find -L . -maxdepth 1 -type f`; 
    do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt}  $defuse_out; 
   done
@@ -623,8 +662,8 @@
 #if $fusion_reads.__str__ != 'None':
 if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ] 
 then
-  mkdir -p $fusion_reads.extra_files_path
-  results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.extra_files_path
+  mkdir -p $fusion_reads.dataset.extra_files_path
+  results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.dataset.extra_files_path
 fi
 #end if
   </configfile>
@@ -753,4 +792,5 @@
   3596	TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG	250	7.00711162298275e-72	0.00912124762512338	0.00684237452309549	N	N	3.31745197152461	3.47233119514066	3.31745197152461	splitr	7	0.0157657657657656	0	0	N	0.0135135135135136	N	N	0	0	ENSG00000156860	ENSG00000212932	-	+	16	21	30682131	48111157	coding	upstream	FBRS	RPL23AP4	30670289	48110676	+	+	0.0157657657657656	30680678	9827473	-	+	Y	-	-	N	output_dir	2	1	1.11111111111111	1	1	1	N	N	0	1	9	0.325530693397641	0.296465452915709	0.325530693397641	0.296465452915709	2	-	-	
 
  </help>
+    <expand macro="citations"/>
 </tool>
author	jjohnson
date	Sun, 17 Jan 2016 14:11:06 -0500
parents	f65857c1b92e
children	4fe2e80d4ae1