comparison defuse.xml @ 2:999746fc92ba default tip

Uploaded
author nate
date Thu, 10 Nov 2011 09:56:05 -0500
parents
children
comparison
equal deleted inserted replaced
1:ad220f659249 2:999746fc92ba
1 <tool id="defuse" name="DeFuse" version="1.1">
2 <description>identify fusion transcripts</description>
3 <requirements>
4 <requirement type="binary"></requirement>
5 </requirements>
6 <command interpreter="perl">
7 ## Find the defuse.pl in the galaxy tool path
8 #import Cheetah.FileUtils
9 #set $toolpath = '/'.join([$__root_dir__,'tools','defuse'])
10 #set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0]
11 $defuse
12 -c `cp $defuse_config $config_txt; echo $defuse_config`
13 -d `mkdir -p data_dir; ln -s $left_pairendreads data_dir/reads_1.fastq; ln -s $right_pairendreads data_dir/reads_2.fastq; echo data_dir`
14 -o output_dir -p 8
15 </command>
16 <inputs>
17 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/>
18 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/>
19 <conditional name="refGenomeSource">
20 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">
21 <option value="indexed">Use a built-in DeFuse Reference Dataset</option>
22 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>
23 </param>
24 <when value="indexed">
25 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
26 <options from_file="defuse.loc">
27 <column name="name" index="1"/>
28 <column name="value" index="2"/>
29 <filter type="sort_by" column="0" />
30 <validator type="no_options" message="No indexes are available" />
31 </options>
32 </param>
33 <conditional name="defuse_param">
34 <param name="settings" type="select" label="Defuse parameter settings" help="">
35 <option value="preSet">Default settings</option>
36 <option value="full">Full parameter list</option>
37 </param>
38 <when value="preSet" />
39 <when value="full">
40 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
41 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
42 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
43 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
44 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
45 </param>
46 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
47 <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" />
48 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
49 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
50 </param>
51 <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" />
52 <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" />
53 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
54 <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio">
55 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
56 </param>
57 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
58 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
59 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
60 </param>
61 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
62 <help>Position density when calculating covariance</help>
63 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
64 </param>
65 <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
66 <option value="">Use Default</option>
67 <option value="no">no</option>
68 <option value="yes">yes</option>
69 </param>
70 <!--
71 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
72 -->
73 </when> <!-- full -->
74 </conditional> <!-- defuse_param -->
75 </when>
76 <when value="history">
77 <param name="config" type="data" format="txt" label="Defuse Config file" help=""/>
78 </when> <!-- history -->
79 </conditional> <!-- refGenomeSource -->
80 </inputs>
81 <configfiles>
82 <configfile name="defuse_config">
83 #import ast
84 #if $refGenomeSource.genomeSource == "history":
85 #include raw $refGenomeSource.config.__str__
86 #else
87 #set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value))
88 #
89 # Configuration file for defuse
90 #
91 # At a minimum, change all values enclused by []
92 #
93
94 # Directory where the defuse code was unpacked
95 ## Default location in the tool/defuse directory
96 # source_directory = ${__root_dir__}/tools/defuse
97 source_directory = #slurp
98 #try
99 $ref_dict['source_directory']
100 #except
101 #try
102 ## Try to find the defuse source dir in the galaxy tool path
103 #import Cheetah.FileUtils
104 #set $toolpath = '/'.join([$__root_dir__,'tools','defuse'])
105 #set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0]
106 $defuse.replace('/scripts/defuse.pl','')
107 #except
108 ${__root_dir__}/tools/defuse/defuse
109 #end try
110 #end try
111
112 # Directory where you want your dataset
113 dataset_directory = #slurp
114 #try
115 $ref_dict['dataset_directory']
116 #except
117 /project/db/genomes/Hsapiens/hg19/defuse
118 #end try
119
120 # Input genome and gene models
121 gene_models = #slurp
122 #try
123 $ref_dict['gene_models']
124 #except
125 \$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf
126 #end try
127 genome_fasta = #slurp
128 #try
129 $ref_dict['genome_fasta']
130 #except
131 \$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa
132 #end try
133
134 # Repeat table from ucsc genome browser
135 repeats_filename = #slurp
136 #try
137 $ref_dict['repeats_filename']
138 #except
139 \$(dataset_directory)/rmsk.txt
140 #end try
141
142 # EST info downloaded from ucsc genome browser
143 est_fasta = #slurp
144 #try
145 $ref_dict['est_fasta']
146 #except
147 \$(dataset_directory)/est.fa
148 #end try
149 est_alignments = #slurp
150 #try
151 $ref_dict['est_alignments']
152 #except
153 \$(dataset_directory)/intronEst.txt
154 #end try
155
156 # Unigene clusters downloaded from ncbi
157 unigene_fasta = #slurp
158 #try
159 $ref_dict['unigene_fasta']
160 #except
161 \$(dataset_directory)/Hs.seq.uniq
162 #end try
163
164 # Paths to external tools
165 bowtie_bin = #slurp
166 #try
167 $ref_dict['bowtie_bin']
168 #except
169 /soft/bowtie/0.12.7/bowtie
170 #end try
171 bowtie_build_bin = #slurp
172 #try
173 $ref_dict['bowtie_build_bin']
174 #except
175 /soft/bowtie/0.12.7/bowtie-build
176 #end try
177 blat_bin = #slurp
178 #try
179 $ref_dict['blat_bin']
180 #except
181 /soft/blat/34/bin/blat
182 #end try
183 fatotwobit_bin = #slurp
184 #try
185 $ref_dict['fatotwobit_bin']
186 #except
187 /soft/blat/34/bin/faToTwoBit
188 #end try
189 r_bin = #slurp
190 #try
191 $ref_dict['r_bin']
192 #except
193 /project/sdml-sles11-weblocal/R-2.12.1/bin/R
194 #end try
195 rscript_bin = #slurp
196 #try
197 $ref_dict['rscript_bin']
198 #except
199 /project/sdml-sles11-weblocal/R-2.12.1/bin/Rscript
200 #end try
201
202 #raw
203 # Dataset files
204 dataset_prefix = $(dataset_directory)/defuse
205 chromosome_prefix = $(dataset_prefix).dna.chromosomes
206 exons_fasta = $(dataset_prefix).exons.fa
207 cds_fasta = $(dataset_prefix).cds.fa
208 cdna_regions = $(dataset_prefix).cdna.regions
209 cdna_fasta = $(dataset_prefix).cdna.fa
210 reference_fasta = $(dataset_prefix).reference.fa
211 rrna_fasta = $(dataset_prefix).rrna.fa
212 ig_gene_list = $(dataset_prefix).ig.gene.list
213 repeats_regions = $(dataset_directory)/repeats.regions
214 est_split_fasta1 = $(dataset_directory)/est.1.fa
215 est_split_fasta2 = $(dataset_directory)/est.2.fa
216 est_split_fasta3 = $(dataset_directory)/est.3.fa
217 est_split_fasta4 = $(dataset_directory)/est.4.fa
218 est_split_fasta5 = $(dataset_directory)/est.5.fa
219 est_split_fasta6 = $(dataset_directory)/est.6.fa
220 est_split_fasta7 = $(dataset_directory)/est.7.fa
221 est_split_fasta8 = $(dataset_directory)/est.8.fa
222 est_split_fasta9 = $(dataset_directory)/est.9.fa
223
224 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs
225 prefilter1 = $(unigene_fasta)
226
227 # deFuse scripts and tools
228 scripts_directory = $(source_directory)/scripts
229 tools_directory = $(source_directory)/tools
230 data_directory = $(source_directory)/data
231 #end raw
232
233 # Path to samtools, 0.1.8 is compiled for you, use other versions at your own risk
234 samtools_bin = #slurp
235 #try
236 $ref_dict['samtools_bin']
237 #except
238 \$(source_directory)/external/samtools-0.1.8/samtools
239 #end try
240
241 # Bowtie parameters
242 bowtie_threads = #slurp
243 #try
244 $ref_dict['bowtie_threads']
245 #except
246 1
247 #end try
248 bowtie_quals = #slurp
249 #try
250 $ref_dict['bowtie_quals']
251 #except
252 --phred33-quals
253 #end try
254 max_insert_size = #slurp
255 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "":
256 $refGenomeSource.defuse_param.max_insert_size
257 #else
258 #try
259 $ref_dict['max_insert_size']
260 #except
261 500
262 #end try
263 #end if
264
265 # Parameters for building the dataset
266 chromosomes = #slurp
267 #try
268 $ref_dict.chromosomes
269 #except
270 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT
271 #end try
272 mt_chromosome = #slurp
273 #try
274 $ref_dict['mt_chromosome']
275 #except
276 MT
277 #end try
278 gene_sources = #slurp
279 #try
280 $ref_dict['gene_sources']
281 #except
282 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding
283 #end try
284 ig_gene_sources = #slurp
285 #try
286 $ref_dict['ig_gene_sources']
287 #except
288 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene
289 #end try
290 rrna_gene_sources = #slurp
291 #try
292 $ref_dict['rrna_gene_sources']
293 #except
294 Mt_rRNA,rRNA,rRNA_pseudogene
295 #end try
296
297 # Blat sequences per job
298 num_blat_sequences = #slurp
299 #try
300 $ref_dict['num_blat_sequences']
301 #except
302 10000
303 #end try
304
305 # Minimum gene fusion range
306 dna_concordant_length = #slurp
307 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "":
308 $refGenomeSource.defuse_param.dna_concordant_length
309 #else
310 #try
311 $ref_dict['dna_concordant_length']
312 #except
313 2000
314 #end try
315 #end if
316
317 # Trim length for discordant reads (split reads are not trimmed)
318 discord_read_trim = #slurp
319 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "":
320 $refGenomeSource.defuse_param.discord_read_trim
321 #else
322 #try
323 $ref_dict['discord_read_trim']
324 #except
325 50
326 #end try
327 #end if
328
329 # Filtering parameters
330 clustering_precision = #slurp
331 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != ""
332 $refGenomeSource.defuse_param.clustering_precision
333 #else
334 #try
335 $ref_dict['clustering_precision']
336 #except
337 0.95
338 #end try
339 #end if
340 span_count_threshold = #slurp
341 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != ""
342 $refGenomeSource.defuse_param.span_count_threshold
343 #else
344 #try
345 $ref_dict['span_count_threshold']
346 #except
347 5
348 #end try
349 #end if
350 split_count_threshold = #slurp
351 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != ""
352 $refGenomeSource.defuse_param.split_count_threshold
353 #else
354 #try
355 $ref_dict['split_count_threshold']
356 #except
357 3
358 #end try
359 #end if
360 percent_identity_threshold = #slurp
361 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != ""
362 $refGenomeSource.defuse_param.percent_identity_threshold
363 #else
364 #try
365 $ref_dict['percent_identity_threshold']
366 #except
367 0.90
368 #end try
369 #end if
370 max_dist_pos = #slurp
371 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != ""
372 $refGenomeSource.defuse_param.max_dist_pos
373 #else
374 #try
375 $ref_dict['max_dist_pos']
376 #except
377 600
378 #end try
379 #end if
380 num_dist_genes = #slurp
381 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != ""
382 $refGenomeSource.defuse_param.num_dist_genes
383 #else
384 #try
385 $ref_dict['num_dist_genes']
386 #except
387 500
388 #end try
389 #end if
390 split_min_anchor = #slurp
391 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != ""
392 $refGenomeSource.defuse_param.split_min_anchor
393 #else
394 #try
395 $ref_dict['split_min_anchor']
396 #except
397 4
398 #end try
399 #end if
400 max_concordant_ratio = #slurp
401 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != ""
402 $refGenomeSource.defuse_param.max_concordant_ratio
403 #else
404 #try
405 $ref_dict['max_concordant_ratio']
406 #except
407 0.1
408 #end try
409 #end if
410 splice_bias = #slurp
411 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != ""
412 $refGenomeSource.defuse_param.splice_bias
413 #else
414 #try
415 $ref_dict['splice_bias']
416 #except
417 10
418 #end try
419 #end if
420 denovo_assembly = #slurp
421 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != ""
422 $refGenomeSource.defuse_param.denovo_assembly
423 #else
424 #try
425 $ref_dict['denovo_assembly']
426 #except
427 no
428 #end try
429 #end if
430 probability_threshold = #slurp
431 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != ""
432 $refGenomeSource.defuse_param.probability_threshold
433 #else
434 #try
435 $ref_dict['probability_threshold']
436 #except
437 0.50
438 #end try
439 #end if
440 positive_controls = \$(data_directory)/controls.txt
441
442 # Position density when calculating covariance
443 covariance_sampling_density = #slurp
444 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != ""
445 $refGenomeSource.defuse_param.covariance_sampling_density
446 #else
447 #try
448 $ref_dict['covariance_sampling_density']
449 #except
450 0.01
451 #end try
452 #end if
453
454
455 # Number of reads for each job in split
456 reads_per_job = 1000000
457
458 # Number of regions for each breakpoint sequence job in split
459 regions_per_job = 20
460
461 #raw
462 # If you have command line 'mail' and wish to be notified
463 # mailto = andrew.mcpherson@gmail.com
464
465 # Remove temp files
466 remove_job_files = yes
467 remove_job_temp_files = yes
468
469 # Converting to fastq
470 # Fastq converter config format 1 for reads stored in separate files for each end
471 # data_lane_rexex_N is a perl regex which stores the lane id in $1
472 # data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1
473 # data_compress_regex_N is a perl regex which stores the compression extension in $1
474 # data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout
475 # Fastq converter config format 2 for reads stored in separate files for each end
476 # data_lane_regex_N is a perl regex which stores the lane id in $1
477 # data_compress_regex_N is a perl regex which stores the compression extension in $1
478 # data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout
479 # data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout
480
481 data_lane_regex_1 = ^(.+)_[12]_export\.txt.*$
482 data_end_regex_1 = ^.+_([12])_export\.txt.*$
483 data_compress_regex_1 = ^.+_[12]_export\.txt(.*)$
484 data_converter_1 = $(scripts_directory)/fq_all2std.pl export2std
485
486 data_lane_regex_2 = ^(.+)_[12]_concat_qseq\.txt.*$
487 data_end_regex_2 = ^.+_([12])_concat_qseq\.txt.*$
488 data_compress_regex_2 = ^.+_[12]_concat_qseq\.txt(.*)$
489 data_converter_2 = $(scripts_directory)/qseq2fastq.pl
490
491 data_lane_regex_3 = ^(.+)\.bam.*$
492 data_compress_regex_3 = ^.+\.bam(.*)$
493 data_end1_converter_3 = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl
494 data_end2_converter_3 = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl
495
496 data_lane_regex_4 = ^(.+).[12].fastq.*$
497 data_end_regex_4 = ^.+.([12]).fastq.*$
498 data_compress_regex_4 = ^.+.[12].fastq(.*)$
499 data_converter_4 = cat
500 #end raw
501
502 #end if
503
504 </configfile>
505 </configfiles>
506 <outputs>
507 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
508 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" from_work_dir="output_dir/log/defuse.log"/>
509 <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" from_work_dir="output_dir/results.tsv"/>
510 <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" from_work_dir="output_dir/results.filtered.tsv"/>
511 <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" from_work_dir="output_dir/results.classify.tsv"/>
512 </outputs>
513 <tests>
514 </tests>
515 <help>
516 **DeFuse**
517
518 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.
519
520 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138
521
522 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
523
524 ------
525
526 **Inputs**
527
528 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**).
529
530 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq.
531
532 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_:
533 - genome_fasta from Ensembl
534 - gene_models from Ensembl
535 - repeats_filename from UCSC RepeatMasker rmsk.txt
536 - est_fasta from UCSC
537 - est_alignments from UCSC intronEst.txt
538 - unigene_fasta from NCBI
539
540 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
541
542 ------
543
544 **Outputs**
545
546 The galaxy history will contain 5 outputs: the config.txt file that provides DeFuse with its parameters, the defuse.log which details what DeFuse has done and can be useful in determining any errors, and the 3 results files that defuse generates.
547
548 DeFuse generates 3 results files: results.txt, results.filtered.txt, and results.classify.txt. All three files have the same format, though results.classify.txt has a probability column from the application of the classifier to results.txt, and results.filtered.txt has been filtered according to the threshold probability as set in config.txt.
549
550 The file format is tab delimited with one prediction per line, and the following fields per prediction (not necessarily in this order):
551
552 - **Identification**
553 - cluster_id : random identifier assigned to each prediction
554 - library_name : library name given on the command line of defuse
555 - gene1 : ensembl id of gene 1
556 - gene2 : ensembl id of gene 2
557 - gene_name1 : name of gene 1
558 - gene_name2 : name of gene 2
559 - **Evidence**
560 - break_predict : breakpoint prediction method, denovo or splitr, that is considered most reliable
561 - concordant_ratio : proportion of spanning reads considered concordant by blat
562 - denovo_min_count : minimum kmer count across denovo assembled sequence
563 - denovo_sequence : fusion sequence predicted by debruijn based denovo sequence assembly
564 - denovo_span_pvalue : p-value, lower values are evidence the prediction is a false positive
565 - gene_align_strand1 : alignment strand for spanning read alignments to gene 1
566 - gene_align_strand2 : alignment strand for spanning read alignments to gene 2
567 - min_map_count : minimum of the number of genomic mappings for each spanning read
568 - max_map_count : maximum of the number of genomic mappings for each spanning read
569 - mean_map_count : average of the number of genomic mappings for each spanning read
570 - num_multi_map : number of spanning reads that map to more than one genomic location
571 - span_count : number of spanning reads supporting the fusion
572 - span_coverage1 : coverage of spanning reads aligned to gene 1 as a proportion of expected coverage
573 - span_coverage2 : coverage of spanning reads aligned to gene 2 as a proportion of expected coverage
574 - span_coverage_min : minimum of span_coverage1 and span_coverage2
575 - span_coverage_max : maximum of span_coverage1 and span_coverage2
576 - splitr_count : number of split reads supporting the prediction
577 - splitr_min_pvalue : p-value, lower values are evidence the prediction is a false positive
578 - splitr_pos_pvalue : p-value, lower values are evidence the prediction is a false positive
579 - splitr_sequence : fusion sequence predicted by split reads
580 - splitr_span_pvalue : p-value, lower values are evidence the prediction is a false positive
581 - **Annotation**
582 - adjacent : fusion between adjacent genes
583 - altsplice : fusion likely the product of alternative splicing between adjacent genes
584 - break_adj_entropy1 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 1
585 - break_adj_entropy2 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 2
586 - break_adj_entropy_min : minimum of break_adj_entropy1 and break_adj_entropy2
587 - breakpoint_homology : number of nucleotides at the fusion splice that align equally well to gene 1 or gene 2
588 - breakseqs_estislands_percident : maximum percent identity of fusion sequence alignments to est islands
589 - cdna_breakseqs_percident : maximum percent identity of fusion sequence alignments to cdna
590 - deletion : fusion produced by a genomic deletion
591 - est_breakseqs_percident : maximum percent identity of fusion sequence alignments to est
592 - eversion : fusion produced by a genomic eversion
593 - exonboundaries : fusion splice at exon boundaries
594 - expression1 : expression of gene 1 as number of concordant pairs aligned to exons
595 - expression2 : expression of gene 2 as number of concordant pairs aligned to exons
596 - gene_chromosome1 : chromosome of gene 1
597 - gene_chromosome2 : chromosome of gene 2
598 - gene_end1 : end position for gene 1
599 - gene_end2 : end position for gene 2
600 - gene_location1 : location of breakpoint in gene 1
601 - gene_location2 : location of breakpoint in gene 2
602 - gene_start1 : start of gene 1
603 - gene_start2 : start of gene 2
604 - gene_strand1 : strand of gene 1
605 - gene_strand2 : strand of gene 2
606 - genome_breakseqs_percident : maximum percent identity of fusion sequence alignments to genome
607 - genomic_break_pos1 : genomic position in gene 1 of fusion splice / breakpoint
608 - genomic_break_pos2 : genomic position in gene 2 of fusion splice / breakpoint
609 - genomic_strand1 : genomic strand in gene 1 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream
610 - genomic_strand2 : genomic strand in gene 2 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream
611 - interchromosomal : fusion produced by an interchromosomal translocation
612 - interrupted_index1 : ratio of coverage before and after the fusion splice / breakpoint in gene 1
613 - interrupted_index2 : ratio of coverage before and after the fusion splice / breakpoint in gene 2
614 - inversion : fusion produced by genomic inversion
615 - orf : fusion combines genes in a way that preserves a reading frame
616 - probability : probability produced by classification using adaboost and example positives/negatives (only given in results.classified.txt)
617 - read_through : fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement
618 - repeat_proportion1 : proportion of the spanning reads in gene 1 that span a repeat region
619 - repeat_proportion2 : proportion of the spanning reads in gene 2 that span a repeat region
620 - max_repeat_proportion : max of repeat_proportion1 and repeat_proportion2
621 - splice_score : number of nucleotides similar to GTAG at fusion splice
622 - num_splice_variants : number of potential splice variants for this gene pair
623 - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2
624 - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1
625
626
627 **Example**
628
629 results.tsv::
630
631 cluster_id splitr_sequence splitr_count splitr_span_pvalue splitr_pos_pvalue splitr_min_pvalue adjacent altsplice break_adj_entropy1 break_adj_entropy2 break_adj_entropy_min break_predict breakpoint_homology breakseqs_estislands_percident cdna_breakseqs_percident concordant_ratio deletion est_breakseqs_percident eversion exonboundaries expression1 expression2 gene1 gene2 gene_align_strand1 gene_align_strand2 gene_chromosome1 gene_chromosome2 gene_end1 gene_end2 gene_location1 gene_location2 gene_name1 gene_name2 gene_start1 gene_start2 gene_strand1 gene_strand2 genome_breakseqs_percident genomic_break_pos1 genomic_break_pos2 genomic_strand1 genomic_strand2 interchromosomal interrupted_index1 interrupted_index2 inversion library_name max_map_count max_repeat_proportion mean_map_count min_map_count num_multi_map num_splice_variants orf read_through repeat_proportion1 repeat_proportion2 span_count span_coverage1 span_coverage2 span_coverage_max span_coverage_min splice_score splicing_index1 splicing_index2
632 1169 GCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT 2 0.000436307890680442 0.110748295953850 0.0880671602973091 N Y 3.19872427442695 3.48337348351473 3.19872427442695 splitr 0 0 0 0 Y 0 N N 0 0 ENSG00000105549 ENSG00000213753 + - 19 19 376013 59111168 intron upstream THEG AC016629.2 361750 59084870 - + 0 375099 386594 + - N 8.34107429512245 - N output_dir 82 0.677852348993289 40.6666666666667 1 11 1 N N 0.361271676300578 0.677852348993289 12 0.758602776578432 0.569678713445872 0.758602776578432 0.569678713445872 2 0.416666666666667 -
633 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - -
634
635 </help>
636 </tool>