Mercurial > repos > jjohnson > defuse
diff defuse.xml @ 11:b22f8634ff84 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/defuse commit 23b94b5747c6956360cd2eca0a07a669929ea141-dirty
author | jjohnson |
---|---|
date | Sun, 17 Jan 2016 14:11:06 -0500 |
parents | f65857c1b92e |
children | 4fe2e80d4ae1 |
line wrap: on
line diff
--- a/defuse.xml Mon Jan 14 12:24:28 2013 -0600 +++ b/defuse.xml Sun Jan 17 14:11:06 2016 -0500 @@ -1,103 +1,150 @@ -<tool id="defuse" name="DeFuse" version="1.6"> - <description>identify fusion transcripts</description> - <requirements> - <requirement type="package" version="0.6.0">defuse</requirement> - <requirement type="package" version="0.1.18">samtools</requirement> - <requirement type="package" version="0.12.7">bowtie</requirement> - <requirement type="package" version="2012-07-20">gmap</requirement> - <requirement type="package" version="34x10">blat</requirement> - <requirement type="package" version="34x10">fatotwobit</requirement> - </requirements> +<tool id="defuse" name="DeFuse" version="@DEFUSE_VERSION@.1"> + <description>identify fusion transcripts</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <expand macro="defuse_requirement" /> + <expand macro="mapping_requirements" /> + <expand macro="r_requirements" /> + </requirements> <command interpreter="command"> /bin/bash $shscript </command> <inputs> <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> + <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column"> + <validator type="length" min="1"/> + </param> <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> - <option value="indexed">Use a built-in DeFuse Reference Dataset</option> - <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> + <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> + <option value="indexed">Use a built-in DeFuse Reference Dataset</option> + <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> + <options from_file="defuse_reference.loc"> + <column name="name" index="1"/> + <column name="value" index="3"/> + <filter type="sort_by" column="0" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/> + </when> <!-- history --> + </conditional> <!-- refGenomeSource --> + <conditional name="defuse_param"> + <param name="settings" type="select" label="Defuse parameter settings" help=""> + <option value="preSet">Default settings</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> + <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> + <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> + <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help=""> + <option value="">Use Default</option> + <option value="no">no</option> + <option value="yes">yes</option> </param> - <when value="indexed"> - <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> - <options from_file="defuse.loc"> - <column name="name" index="1"/> - <column name="value" index="2"/> - <filter type="sort_by" column="0" /> - <validator type="no_options" message="No indexes are available" /> - </options> - </param> - <conditional name="defuse_param"> - <param name="settings" type="select" label="Defuse parameter settings" help=""> - <option value="preSet">Default settings</option> - <option value="full">Full parameter list</option> - </param> - <when value="preSet" /> - <when value="full"> - <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> - <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> - <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> - <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> - <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> - </param> - <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> - <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" /> - <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> - <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> - </param> - <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" /> - <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" /> - <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> - <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio"> - <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> - </param> - <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> - <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> - <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> - </param> - <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> - <help>Position density when calculating covariance</help> - <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> - </param> - <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> - <option value="">Use Default</option> - <option value="no">no</option> - <option value="yes">yes</option> - </param> - <!-- - <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> - --> - </when> <!-- full --> - </conditional> <!-- defuse_param --> - </when> - <when value="history"> - <param name="config" type="data" format="txt" label="Defuse Config file" help=""/> - </when> <!-- history --> - </conditional> <!-- refGenomeSource --> + <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> + <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> + </param> + <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> + <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> + <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> + </param> + <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> + <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> + <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> + <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> + </param> + <param name="multi_exon_transcripts_stats" type="select" label="Use multiple exon transcripts for stats calculations" help="should be enabled for very small libraries"> + <option value="no" select="true">no</option> + <option value="yes">yes</option> + </param> + <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> + <help>Position density when calculating covariance</help> + <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> + </param> + <param name="max_paired_alignments" type="integer" value="10" optional="true" label="max_paired_alignments"> + <help>Maximum number of alignments for a read pair, Pairs with more alignments are filtered, default is 10</help> + <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="1" max="100"/> + </param> + <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> + <option value="">Use Default</option> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <!-- + <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> + --> + <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> + </when> <!-- full --> + </conditional> <!-- defuse_param --> + <param name="breakpoints_bam" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/> <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, but they require considerable diskspace, and should be deleted and purged when no longer needed."/> - <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> + <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> </inputs> + <stdio> + <exit_code range="1:" level="fatal" description="Error Running Defuse" /> + </stdio> <outputs> <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> <filter>keep_output == True</filter> </data> - <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" /> - <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> - <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> + <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> + <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> <filter>do_get_reads == True</filter> </data> + <data format="bam" name="fusions_bam" label="${tool.name} on ${on_string}: fusions.bam"> + <filter>breakpoints_bam == True</filter> + </data> + <!-- + expression_plot + circos plot + --> </outputs> <configfiles> <configfile name="defuse_config"> -#import ast +#import re +#set $ds = chr(36) #if $refGenomeSource.genomeSource == "history": -#include raw $refGenomeSource.config.__str__ +#set config_file = $refGenomeSource.config.__str__ #else -#set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value)) +#set config_file = $refGenomeSource.index.value +#end if +#set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$' +#set fh = open($config_file) +#set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources'] +#set kv = [] +#for $line in $fh: + #set m = $re.match($pat,$line) + #if $m and len($m.groups()) == 2: + ## #echo $line + #if $m.groups()[0] in keys: + #set k = $m.groups()[0] + #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed": + ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory + #set v = $os.path.dirname($config_file) + #else: + #set v = $m.groups()[1] + #end if + #set kv = $kv + [[$k, $v]] + #end if + #end if +#end for +## #echo $kv +#set ref_dict = dict($kv) +## #echo $ref_dict +## include raw $refGenomeSource.config.__str__ # # Configuration file for defuse # @@ -107,12 +154,7 @@ # Directory where the defuse code was unpacked ## Default location in the tool/defuse directory # source_directory = ${__root_dir__}/tools/defuse -source_directory = #slurp -#try -$ref_dict['source_directory'] -#except -__DEFUSE_PATH__ -#end try +source_directory = __DEFUSE_PATH__ # Directory where you want your dataset dataset_directory = #slurp @@ -122,18 +164,68 @@ /project/db/genomes/Hsapiens/hg19/defuse #end try +# Organism IDs +ensembl_organism = #slurp +#try +$ref_dict['ensembl_organism'] +#except +homo_sapiens +#end try + +ensembl_prefix = #slurp +#try +$ref_dict['ensembl_prefix'] +#except +Homo_sapiens +#end try + +ensembl_version = #slurp +#try +$ref_dict['ensembl_version'] +#except +71 +#end try + +ensembl_genome_version = #slurp +#try +$ref_dict['ensembl_genome_version'] +#except +GRCh37 +#end try + +ucsc_genome_version = #slurp +#try +$ref_dict['ucsc_genome_version'] +#except +hg19 +#end try + +ncbi_organism = #slurp +#try +$ref_dict['ncbi_organism'] +#except +Homo_sapiens +#end try + +ncbi_prefix = #slurp +#try +$ref_dict['ncbi_prefix'] +#except +Hs +#end try + # Input genome and gene models gene_models = #slurp #try $ref_dict['gene_models'] #except -\$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf +\$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).gtf #end try genome_fasta = #slurp #try $ref_dict['genome_fasta'] #except -\$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa +\$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).dna.chromosomes.fa #end try # Repeat table from ucsc genome browser @@ -163,71 +255,28 @@ #try $ref_dict['unigene_fasta'] #except -\$(dataset_directory)/Hs.seq.uniq +\$(dataset_directory)/\$(ncbi_prefix).seq.uniq #end try # Paths to external tools -bowtie_bin = #slurp -#try -$ref_dict['bowtie_bin'] -#except -__BOWTIE_BIN__ -#end try -bowtie_build_bin = #slurp -#try -$ref_dict['bowtie_build_bin'] -#except -__BOWTIE_BUILD_BIN__ -#end try -blat_bin = #slurp -#try -$ref_dict['blat_bin'] -#except -__BLAT_BIN__ -#end try -fatotwobit_bin = #slurp -#try -$ref_dict['fatotwobit_bin'] -#except -__FATOTWOBIT_BIN__ -#end try -gmap_bin = #slurp -#try -$ref_dict['gmap_bin'] -#except -__GMAP_BIN__ -#end try -gmap_bin = #slurp -#try -$ref_dict['gmap_bin'] -#except -__GMAP_BIN__ -#end try -gmap_setup_bin = #slurp -#try -$ref_dict['gmap_setup_bin'] -#except -__GMAP_SETUP_BIN__ -#end try -r_bin = #slurp -#try -$ref_dict['r_bin'] -#except -__R_BIN__ -#end try -rscript_bin = #slurp -#try -$ref_dict['rscript_bin'] -#except -__RSCRIPT_BIN__ -#end try +bowtie_bin = __BOWTIE_BIN__ +bowtie_build_bin = __BOWTIE_BUILD_BIN__ +blat_bin = __BLAT_BIN__ +fatotwobit_bin = __FATOTWOBIT_BIN__ +gmap_bin = __GMAP_BIN__ +gmap_bin = __GMAP_BIN__ +gmap_setup_bin = __GMAP_SETUP_BIN__ +r_bin = __R_BIN__ +rscript_bin = __RSCRIPT_BIN__ # Directory where you want your dataset gmap_index_directory = #slurp #try $ref_dict['gmap_index_directory'] #except -\$(dataset_directory)/gmap +#raw +$(dataset_directory)/gmap +#end raw #end try #raw @@ -282,9 +331,15 @@ #except --phred33-quals #end try +bowtie_params = #slurp +#try +$ref_dict['bowtie_params'] +#except +--chunkmbs 200 +#end try max_insert_size = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "": -$refGenomeSource.defuse_param.max_insert_size +#if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "": +$defuse_param.max_insert_size #else #try $ref_dict['max_insert_size'] @@ -335,8 +390,8 @@ # Minimum gene fusion range dna_concordant_length = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "": -$refGenomeSource.defuse_param.dna_concordant_length +#if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "": +$defuse_param.dna_concordant_length #else #try $ref_dict['dna_concordant_length'] @@ -347,8 +402,8 @@ # Trim length for discordant reads (split reads are not trimmed) discord_read_trim = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "": -$refGenomeSource.defuse_param.discord_read_trim +#if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "": +$defuse_param.discord_read_trim #else #try $ref_dict['discord_read_trim'] @@ -356,11 +411,21 @@ 50 #end try #end if - +# Calculate extra annotations, fusion splice index and interrupted index +calculate_extra_annotations = #slurp +#if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "": +$defuse_param.calculate_extra_annotations +#else +#try +$ref_dict['calculate_extra_annotations'] +#except +no +#end try +#end if # Filtering parameters clustering_precision = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != "" -$refGenomeSource.defuse_param.clustering_precision +#if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != "" +$defuse_param.clustering_precision #else #try $ref_dict['clustering_precision'] @@ -369,8 +434,8 @@ #end try #end if span_count_threshold = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != "" -$refGenomeSource.defuse_param.span_count_threshold +#if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != "" +$defuse_param.span_count_threshold #else #try $ref_dict['span_count_threshold'] @@ -378,19 +443,9 @@ 5 #end try #end if -split_count_threshold = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != "" -$refGenomeSource.defuse_param.split_count_threshold -#else -#try -$ref_dict['split_count_threshold'] -#except -3 -#end try -#end if percent_identity_threshold = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != "" -$refGenomeSource.defuse_param.percent_identity_threshold +#if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != "" +$defuse_param.percent_identity_threshold #else #try $ref_dict['percent_identity_threshold'] @@ -398,29 +453,9 @@ 0.90 #end try #end if -max_dist_pos = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != "" -$refGenomeSource.defuse_param.max_dist_pos -#else -#try -$ref_dict['max_dist_pos'] -#except -600 -#end try -#end if -num_dist_genes = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != "" -$refGenomeSource.defuse_param.num_dist_genes -#else -#try -$ref_dict['num_dist_genes'] -#except -500 -#end try -#end if split_min_anchor = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != "" -$refGenomeSource.defuse_param.split_min_anchor +#if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != "" +$defuse_param.split_min_anchor #else #try $ref_dict['split_min_anchor'] @@ -428,19 +463,9 @@ 4 #end try #end if -max_concordant_ratio = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != "" -$refGenomeSource.defuse_param.max_concordant_ratio -#else -#try -$ref_dict['max_concordant_ratio'] -#except -0.1 -#end try -#end if splice_bias = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != "" -$refGenomeSource.defuse_param.splice_bias +#if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != "" +$defuse_param.splice_bias #else #try $ref_dict['splice_bias'] @@ -449,8 +474,8 @@ #end try #end if denovo_assembly = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != "" -$refGenomeSource.defuse_param.denovo_assembly +#if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != "" +$defuse_param.denovo_assembly #else #try $ref_dict['denovo_assembly'] @@ -459,8 +484,8 @@ #end try #end if probability_threshold = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != "" -$refGenomeSource.defuse_param.probability_threshold +#if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != "" +$defuse_param.probability_threshold #else #try $ref_dict['probability_threshold'] @@ -470,10 +495,23 @@ #end if positive_controls = \$(data_directory)/controls.txt +# Use multiple exon transcripts for stats calculations (yes/no) +# should be enabled for very small libraries +multi_exon_transcripts_stats = #slurp +#if $defuse_param.settings == "full" and $defuse_param.multi_exon_transcripts_stats.__str__ != "" +$defuse_param.multi_exon_transcripts_stats +#else +#try +$ref_dict['multi_exon_transcripts_stats'] +#except +no +#end try +#end if + # Position density when calculating covariance covariance_sampling_density = #slurp -#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != "" -$refGenomeSource.defuse_param.covariance_sampling_density +#if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != "" +$defuse_param.covariance_sampling_density #else #try $ref_dict['covariance_sampling_density'] @@ -482,12 +520,30 @@ #end try #end if +# Maximum number of alignments for a read pair +# Pairs with more alignments are filtered +max_paired_alignments = #slurp +#if $defuse_param.settings == "full" and $defuse_param.max_paired_alignments.__str__ != "" +$defuse_param.max_paired_alignments +#else +#try +$ref_dict['max_paired_alignments'] +#except +10 +#end try +#end if # Number of reads for each job in split -reads_per_job = 1000000 - -# Number of regions for each breakpoint sequence job in split -regions_per_job = 20 +reads_per_job = #slurp +#if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != "" +$defuse_param.reads_per_job +#else +#try +$ref_dict['reads_per_job'] +#except +1000000 +#end try +#end if #raw # If you have command line 'mail' and wish to be notified @@ -497,40 +553,10 @@ remove_job_files = yes remove_job_temp_files = yes -# Converting to fastq -# Fastq converter config format 1 for reads stored in separate files for each end -# data_lane_rexex_N is a perl regex which stores the lane id in $1 -# data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1 -# data_compress_regex_N is a perl regex which stores the compression extension in $1 -# data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout -# Fastq converter config format 2 for reads stored in separate files for each end -# data_lane_regex_N is a perl regex which stores the lane id in $1 -# data_compress_regex_N is a perl regex which stores the compression extension in $1 -# data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout -# data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout +qsub_params = "" -data_lane_regex_1 = ^(.+)_[12]_export\.txt.*$ -data_end_regex_1 = ^.+_([12])_export\.txt.*$ -data_compress_regex_1 = ^.+_[12]_export\.txt(.*)$ -data_converter_1 = $(scripts_directory)/fq_all2std.pl export2std - -data_lane_regex_2 = ^(.+)_[12]_concat_qseq\.txt.*$ -data_end_regex_2 = ^.+_([12])_concat_qseq\.txt.*$ -data_compress_regex_2 = ^.+_[12]_concat_qseq\.txt(.*)$ -data_converter_2 = $(scripts_directory)/qseq2fastq.pl - -data_lane_regex_3 = ^(.+)\.bam.*$ -data_compress_regex_3 = ^.+\.bam(.*)$ -data_end1_converter_3 = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl -data_end2_converter_3 = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl - -data_lane_regex_4 = ^(.+).[12].fastq.*$ -data_end_regex_4 = ^.+.([12]).fastq.*$ -data_compress_regex_4 = ^.+.[12].fastq(.*)$ -data_converter_4 = cat #end raw -#end if </configfile> <configfile name="shscript"> @@ -588,29 +614,42 @@ cp $defuse_config $config_txt ## make a data_dir and ln -s the input fastq mkdir -p data_dir -ln -s $left_pairendreads data_dir/reads_1.fastq -ln -s $right_pairendreads data_dir/reads_2.fastq +## ln -s "$left_pairendreads" data_dir/reads_1.fastq +## ln -s "$right_pairendreads" data_dir/reads_2.fastq +cp "$left_pairendreads" data_dir/reads_1.fastq +cp "$right_pairendreads" data_dir/reads_2.fastq ## ln to output_dir in from_work_dir #if $defuse_out.__str__ != 'None': -mkdir -p $defuse_out.extra_files_path -ln -s $defuse_out.extra_files_path output_dir +mkdir -p $defuse_out.dataset.extra_files_path +ln -s $defuse_out.dataset.extra_files_path output_dir #else mkdir -p output_dir #end if ## run defuse.pl -perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -d data_dir -o output_dir -p 8 +perl \${DEFUSE_PATH}/scripts/defuse.pl -name "$library_name" -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p \$GALAXY_SLOTS ## copy primary results to output datasets if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi -if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi +## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi +#if $breakpoints_bam: +if [ -e output_dir/results.filtered.tsv ] ${amp}${amp} [ -e output_dir/breakpoints.genome.psl ] +then + awk "\\$10 ~ /^(`awk '\\$1 ~ /[0-9]+/{print \\$1}' output_dir/results.filtered.tsv | tr '\n' '|'`)\\$/{print \\$0}" output_dir/breakpoints.genome.psl > breakpoints.genome.filtered.psl ${amp}${amp} + psl2sam.pl breakpoints.genome.filtered.psl > breakpoints.genome.filtered.sam ${amp}${amp} + samtools view -b -T /panfs/roc/rissdb/galaxy/genomes/NCBIM37/defuse/defuse.reference.fa -o breakpoints.genome.filtered.bam breakpoints.genome.filtered.sam ${amp}${amp} + samtools sort breakpoints.genome.filtered.bam breakpoints ${amp}${amp} + ## samtools index breakpoints.bam + cp breakpoints.bam $fusions_bam +fi +#end if ## create html with links for output_dir #if $defuse_out.__str__ != 'None': if [ -e $defuse_out ] then echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt} $defuse_out - pushd $defuse_out.extra_files_path + pushd $defuse_out.dataset.extra_files_path for f in `find -L . -maxdepth 1 -type f`; do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt} $defuse_out; done @@ -623,8 +662,8 @@ #if $fusion_reads.__str__ != 'None': if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ] then - mkdir -p $fusion_reads.extra_files_path - results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.extra_files_path + mkdir -p $fusion_reads.dataset.extra_files_path + results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.dataset.extra_files_path fi #end if </configfile> @@ -753,4 +792,5 @@ 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - - </help> + <expand macro="citations"/> </tool>