Mercurial > repos > jjohnson > defuse
annotate defuse.xml @ 3:c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Allow the creatation of an HTML formatted results.filtered.tsv with links to cluster detail provided by the get_read.pl command.
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Fri, 06 Jan 2012 16:06:17 -0600 |
parents | 4245c2b047de |
children | 679a5c7b1294 |
rev | line source |
---|---|
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
1 <tool id="defuse" name="DeFuse" version="1.2"> |
1 | 2 <description>identify fusion transcripts</description> |
3 <requirements> | |
4 <requirement type="binary"></requirement> | |
5 </requirements> | |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
6 <command interpreter="command"> /bin/bash $shscript </command> |
1 | 7 <inputs> |
8 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> | |
9 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> | |
10 <conditional name="refGenomeSource"> | |
11 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> | |
12 <option value="indexed">Use a built-in DeFuse Reference Dataset</option> | |
13 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> | |
14 </param> | |
15 <when value="indexed"> | |
16 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> | |
17 <options from_file="defuse.loc"> | |
18 <column name="name" index="1"/> | |
19 <column name="value" index="2"/> | |
20 <filter type="sort_by" column="0" /> | |
21 <validator type="no_options" message="No indexes are available" /> | |
22 </options> | |
23 </param> | |
24 <conditional name="defuse_param"> | |
25 <param name="settings" type="select" label="Defuse parameter settings" help=""> | |
26 <option value="preSet">Default settings</option> | |
27 <option value="full">Full parameter list</option> | |
28 </param> | |
29 <when value="preSet" /> | |
30 <when value="full"> | |
31 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> | |
32 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> | |
33 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> | |
34 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> | |
35 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
36 </param> | |
37 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> | |
38 <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" /> | |
39 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> | |
40 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
41 </param> | |
42 <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" /> | |
43 <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" /> | |
44 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> | |
45 <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio"> | |
46 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
47 </param> | |
48 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> | |
49 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> | |
50 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
51 </param> | |
52 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> | |
53 <help>Position density when calculating covariance</help> | |
54 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
55 </param> | |
56 <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> | |
57 <option value="">Use Default</option> | |
58 <option value="no">no</option> | |
59 <option value="yes">yes</option> | |
60 </param> | |
61 <!-- | |
62 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> | |
63 --> | |
64 </when> <!-- full --> | |
65 </conditional> <!-- defuse_param --> | |
66 </when> | |
67 <when value="history"> | |
68 <param name="config" type="data" format="txt" label="Defuse Config file" help=""/> | |
69 </when> <!-- history --> | |
70 </conditional> <!-- refGenomeSource --> | |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
71 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files"/> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
72 <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> |
1 | 73 </inputs> |
74 <configfiles> | |
75 <configfile name="defuse_config"> | |
76 #import ast | |
77 #if $refGenomeSource.genomeSource == "history": | |
78 #include raw $refGenomeSource.config.__str__ | |
79 #else | |
80 #set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value)) | |
81 # | |
82 # Configuration file for defuse | |
83 # | |
84 # At a minimum, change all values enclused by [] | |
85 # | |
2
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
86 |
1 | 87 # Directory where the defuse code was unpacked |
88 ## Default location in the tool/defuse directory | |
89 # source_directory = ${__root_dir__}/tools/defuse | |
90 source_directory = #slurp | |
91 #try | |
92 $ref_dict['source_directory'] | |
93 #except | |
2
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
94 #try |
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
95 ## Try to find the defuse source dir in the galaxy tool path |
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
96 #import Cheetah.FileUtils |
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
97 #set $toolpath = '/'.join([$__root_dir__,'tools','defuse']) |
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
98 #set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0] |
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
99 $defuse.replace('/scripts/defuse.pl','') |
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
100 #except |
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
101 ${__root_dir__}/tools/defuse/defuse |
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
102 #end try |
1 | 103 #end try |
104 | |
105 # Directory where you want your dataset | |
106 dataset_directory = #slurp | |
107 #try | |
108 $ref_dict['dataset_directory'] | |
109 #except | |
110 /project/db/genomes/Hsapiens/hg19/defuse | |
111 #end try | |
112 | |
113 # Input genome and gene models | |
114 gene_models = #slurp | |
115 #try | |
116 $ref_dict['gene_models'] | |
117 #except | |
118 \$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf | |
119 #end try | |
120 genome_fasta = #slurp | |
121 #try | |
122 $ref_dict['genome_fasta'] | |
123 #except | |
124 \$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa | |
125 #end try | |
126 | |
127 # Repeat table from ucsc genome browser | |
128 repeats_filename = #slurp | |
129 #try | |
130 $ref_dict['repeats_filename'] | |
131 #except | |
132 \$(dataset_directory)/rmsk.txt | |
133 #end try | |
134 | |
135 # EST info downloaded from ucsc genome browser | |
136 est_fasta = #slurp | |
137 #try | |
138 $ref_dict['est_fasta'] | |
139 #except | |
140 \$(dataset_directory)/est.fa | |
141 #end try | |
142 est_alignments = #slurp | |
143 #try | |
144 $ref_dict['est_alignments'] | |
145 #except | |
146 \$(dataset_directory)/intronEst.txt | |
147 #end try | |
148 | |
149 # Unigene clusters downloaded from ncbi | |
150 unigene_fasta = #slurp | |
151 #try | |
152 $ref_dict['unigene_fasta'] | |
153 #except | |
154 \$(dataset_directory)/Hs.seq.uniq | |
155 #end try | |
156 | |
157 # Paths to external tools | |
158 bowtie_bin = #slurp | |
159 #try | |
160 $ref_dict['bowtie_bin'] | |
161 #except | |
162 /soft/bowtie/0.12.7/bowtie | |
163 #end try | |
164 bowtie_build_bin = #slurp | |
165 #try | |
166 $ref_dict['bowtie_build_bin'] | |
167 #except | |
168 /soft/bowtie/0.12.7/bowtie-build | |
169 #end try | |
170 blat_bin = #slurp | |
171 #try | |
172 $ref_dict['blat_bin'] | |
173 #except | |
174 /soft/blat/34/bin/blat | |
175 #end try | |
176 fatotwobit_bin = #slurp | |
177 #try | |
178 $ref_dict['fatotwobit_bin'] | |
179 #except | |
180 /soft/blat/34/bin/faToTwoBit | |
181 #end try | |
182 r_bin = #slurp | |
183 #try | |
184 $ref_dict['r_bin'] | |
185 #except | |
186 /project/sdml-sles11-weblocal/R-2.12.1/bin/R | |
187 #end try | |
188 rscript_bin = #slurp | |
189 #try | |
190 $ref_dict['rscript_bin'] | |
191 #except | |
192 /project/sdml-sles11-weblocal/R-2.12.1/bin/Rscript | |
193 #end try | |
194 | |
195 #raw | |
196 # Dataset files | |
197 dataset_prefix = $(dataset_directory)/defuse | |
198 chromosome_prefix = $(dataset_prefix).dna.chromosomes | |
199 exons_fasta = $(dataset_prefix).exons.fa | |
200 cds_fasta = $(dataset_prefix).cds.fa | |
201 cdna_regions = $(dataset_prefix).cdna.regions | |
202 cdna_fasta = $(dataset_prefix).cdna.fa | |
203 reference_fasta = $(dataset_prefix).reference.fa | |
204 rrna_fasta = $(dataset_prefix).rrna.fa | |
205 ig_gene_list = $(dataset_prefix).ig.gene.list | |
206 repeats_regions = $(dataset_directory)/repeats.regions | |
207 est_split_fasta1 = $(dataset_directory)/est.1.fa | |
208 est_split_fasta2 = $(dataset_directory)/est.2.fa | |
209 est_split_fasta3 = $(dataset_directory)/est.3.fa | |
210 est_split_fasta4 = $(dataset_directory)/est.4.fa | |
211 est_split_fasta5 = $(dataset_directory)/est.5.fa | |
212 est_split_fasta6 = $(dataset_directory)/est.6.fa | |
213 est_split_fasta7 = $(dataset_directory)/est.7.fa | |
214 est_split_fasta8 = $(dataset_directory)/est.8.fa | |
215 est_split_fasta9 = $(dataset_directory)/est.9.fa | |
216 | |
217 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs | |
218 prefilter1 = $(unigene_fasta) | |
219 | |
220 # deFuse scripts and tools | |
221 scripts_directory = $(source_directory)/scripts | |
222 tools_directory = $(source_directory)/tools | |
223 data_directory = $(source_directory)/data | |
224 #end raw | |
225 | |
226 # Path to samtools, 0.1.8 is compiled for you, use other versions at your own risk | |
227 samtools_bin = #slurp | |
228 #try | |
229 $ref_dict['samtools_bin'] | |
230 #except | |
231 \$(source_directory)/external/samtools-0.1.8/samtools | |
232 #end try | |
233 | |
234 # Bowtie parameters | |
235 bowtie_threads = #slurp | |
236 #try | |
237 $ref_dict['bowtie_threads'] | |
238 #except | |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
239 4 |
1 | 240 #end try |
241 bowtie_quals = #slurp | |
242 #try | |
243 $ref_dict['bowtie_quals'] | |
244 #except | |
245 --phred33-quals | |
246 #end try | |
247 max_insert_size = #slurp | |
248 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "": | |
249 $refGenomeSource.defuse_param.max_insert_size | |
250 #else | |
251 #try | |
252 $ref_dict['max_insert_size'] | |
253 #except | |
254 500 | |
255 #end try | |
256 #end if | |
257 | |
258 # Parameters for building the dataset | |
259 chromosomes = #slurp | |
260 #try | |
261 $ref_dict.chromosomes | |
262 #except | |
263 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT | |
264 #end try | |
265 mt_chromosome = #slurp | |
266 #try | |
267 $ref_dict['mt_chromosome'] | |
268 #except | |
269 MT | |
270 #end try | |
271 gene_sources = #slurp | |
272 #try | |
273 $ref_dict['gene_sources'] | |
274 #except | |
275 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding | |
276 #end try | |
277 ig_gene_sources = #slurp | |
278 #try | |
279 $ref_dict['ig_gene_sources'] | |
280 #except | |
281 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene | |
282 #end try | |
283 rrna_gene_sources = #slurp | |
284 #try | |
285 $ref_dict['rrna_gene_sources'] | |
286 #except | |
287 Mt_rRNA,rRNA,rRNA_pseudogene | |
288 #end try | |
289 | |
290 # Blat sequences per job | |
291 num_blat_sequences = #slurp | |
292 #try | |
293 $ref_dict['num_blat_sequences'] | |
294 #except | |
295 10000 | |
296 #end try | |
297 | |
298 # Minimum gene fusion range | |
299 dna_concordant_length = #slurp | |
300 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "": | |
301 $refGenomeSource.defuse_param.dna_concordant_length | |
302 #else | |
303 #try | |
304 $ref_dict['dna_concordant_length'] | |
305 #except | |
306 2000 | |
307 #end try | |
308 #end if | |
309 | |
310 # Trim length for discordant reads (split reads are not trimmed) | |
311 discord_read_trim = #slurp | |
312 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "": | |
313 $refGenomeSource.defuse_param.discord_read_trim | |
314 #else | |
315 #try | |
316 $ref_dict['discord_read_trim'] | |
317 #except | |
318 50 | |
319 #end try | |
320 #end if | |
321 | |
322 # Filtering parameters | |
323 clustering_precision = #slurp | |
324 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != "" | |
325 $refGenomeSource.defuse_param.clustering_precision | |
326 #else | |
327 #try | |
328 $ref_dict['clustering_precision'] | |
329 #except | |
330 0.95 | |
331 #end try | |
332 #end if | |
333 span_count_threshold = #slurp | |
334 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != "" | |
335 $refGenomeSource.defuse_param.span_count_threshold | |
336 #else | |
337 #try | |
338 $ref_dict['span_count_threshold'] | |
339 #except | |
340 5 | |
341 #end try | |
342 #end if | |
343 split_count_threshold = #slurp | |
344 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != "" | |
345 $refGenomeSource.defuse_param.split_count_threshold | |
346 #else | |
347 #try | |
348 $ref_dict['split_count_threshold'] | |
349 #except | |
350 3 | |
351 #end try | |
352 #end if | |
353 percent_identity_threshold = #slurp | |
354 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != "" | |
355 $refGenomeSource.defuse_param.percent_identity_threshold | |
356 #else | |
357 #try | |
358 $ref_dict['percent_identity_threshold'] | |
359 #except | |
360 0.90 | |
361 #end try | |
362 #end if | |
363 max_dist_pos = #slurp | |
364 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != "" | |
365 $refGenomeSource.defuse_param.max_dist_pos | |
366 #else | |
367 #try | |
368 $ref_dict['max_dist_pos'] | |
369 #except | |
370 600 | |
371 #end try | |
372 #end if | |
373 num_dist_genes = #slurp | |
374 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != "" | |
375 $refGenomeSource.defuse_param.num_dist_genes | |
376 #else | |
377 #try | |
378 $ref_dict['num_dist_genes'] | |
379 #except | |
380 500 | |
381 #end try | |
382 #end if | |
383 split_min_anchor = #slurp | |
384 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != "" | |
385 $refGenomeSource.defuse_param.split_min_anchor | |
386 #else | |
387 #try | |
388 $ref_dict['split_min_anchor'] | |
389 #except | |
390 4 | |
391 #end try | |
392 #end if | |
393 max_concordant_ratio = #slurp | |
394 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != "" | |
395 $refGenomeSource.defuse_param.max_concordant_ratio | |
396 #else | |
397 #try | |
398 $ref_dict['max_concordant_ratio'] | |
399 #except | |
400 0.1 | |
401 #end try | |
402 #end if | |
403 splice_bias = #slurp | |
404 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != "" | |
405 $refGenomeSource.defuse_param.splice_bias | |
406 #else | |
407 #try | |
408 $ref_dict['splice_bias'] | |
409 #except | |
410 10 | |
411 #end try | |
412 #end if | |
413 denovo_assembly = #slurp | |
414 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != "" | |
415 $refGenomeSource.defuse_param.denovo_assembly | |
416 #else | |
417 #try | |
418 $ref_dict['denovo_assembly'] | |
419 #except | |
420 no | |
421 #end try | |
422 #end if | |
423 probability_threshold = #slurp | |
424 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != "" | |
425 $refGenomeSource.defuse_param.probability_threshold | |
426 #else | |
427 #try | |
428 $ref_dict['probability_threshold'] | |
429 #except | |
430 0.50 | |
431 #end try | |
432 #end if | |
433 positive_controls = \$(data_directory)/controls.txt | |
434 | |
435 # Position density when calculating covariance | |
436 covariance_sampling_density = #slurp | |
437 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != "" | |
438 $refGenomeSource.defuse_param.covariance_sampling_density | |
439 #else | |
440 #try | |
441 $ref_dict['covariance_sampling_density'] | |
442 #except | |
443 0.01 | |
444 #end try | |
445 #end if | |
446 | |
447 | |
448 # Number of reads for each job in split | |
449 reads_per_job = 1000000 | |
450 | |
451 # Number of regions for each breakpoint sequence job in split | |
452 regions_per_job = 20 | |
453 | |
454 #raw | |
455 # If you have command line 'mail' and wish to be notified | |
456 # mailto = andrew.mcpherson@gmail.com | |
457 | |
458 # Remove temp files | |
459 remove_job_files = yes | |
460 remove_job_temp_files = yes | |
461 | |
462 # Converting to fastq | |
463 # Fastq converter config format 1 for reads stored in separate files for each end | |
464 # data_lane_rexex_N is a perl regex which stores the lane id in $1 | |
465 # data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1 | |
466 # data_compress_regex_N is a perl regex which stores the compression extension in $1 | |
467 # data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout | |
468 # Fastq converter config format 2 for reads stored in separate files for each end | |
469 # data_lane_regex_N is a perl regex which stores the lane id in $1 | |
470 # data_compress_regex_N is a perl regex which stores the compression extension in $1 | |
471 # data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout | |
472 # data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout | |
473 | |
474 data_lane_regex_1 = ^(.+)_[12]_export\.txt.*$ | |
475 data_end_regex_1 = ^.+_([12])_export\.txt.*$ | |
476 data_compress_regex_1 = ^.+_[12]_export\.txt(.*)$ | |
477 data_converter_1 = $(scripts_directory)/fq_all2std.pl export2std | |
478 | |
479 data_lane_regex_2 = ^(.+)_[12]_concat_qseq\.txt.*$ | |
480 data_end_regex_2 = ^.+_([12])_concat_qseq\.txt.*$ | |
481 data_compress_regex_2 = ^.+_[12]_concat_qseq\.txt(.*)$ | |
482 data_converter_2 = $(scripts_directory)/qseq2fastq.pl | |
483 | |
484 data_lane_regex_3 = ^(.+)\.bam.*$ | |
485 data_compress_regex_3 = ^.+\.bam(.*)$ | |
486 data_end1_converter_3 = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl | |
487 data_end2_converter_3 = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl | |
488 | |
489 data_lane_regex_4 = ^(.+).[12].fastq.*$ | |
490 data_end_regex_4 = ^.+.([12]).fastq.*$ | |
491 data_compress_regex_4 = ^.+.[12].fastq(.*)$ | |
492 data_converter_4 = cat | |
493 #end raw | |
494 | |
495 #end if | |
496 | |
497 </configfile> | |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
498 <configfile name="shscript"> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
499 #!/bin/bash |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
500 ## define some things for cheetah proccessing |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
501 #set $ds = chr(36) |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
502 #set $gt = chr(62) |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
503 #set $lt = chr(60) |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
504 #set $echo_cmd = 'echo' |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
505 ## Find the defuse.pl in the galaxy tool path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
506 #import Cheetah.FileUtils |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
507 #set $toolpath = '/'.join([$__root_dir__,'tools','defuse']) |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
508 #set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0] |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
509 #set $get_reads = $Cheetah.FileUtils.findFiles($toolpath,['get_reads.pl'],[],['tools','external','include','em','data'])[0] |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
510 ## declare a bash function for converting a results tsv into html with links to the get_reads output files |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
511 results2html() { |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
512 rlts=${ds}1 |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
513 rslt_name=`basename ${ds}rlts` |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
514 html=${ds}2 |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
515 echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse '${ds}rslt_name'${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
516 echo '${lt}h2${gt}Defuse '${ds}rslt_name'${lt}/h2${gt}${lt}table${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
517 if [ -z "${ds}3" ] |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
518 then |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
519 awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\ |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
520 ${ds}1 ~ /[1-9][0-9]*/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
521 echo '${lt}/table${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
522 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
523 else |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
524 export _EFP=${ds}3 |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
525 mkdir -p ${ds}_EFP |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
526 awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\ |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
527 ${ds}1 ~ /[1-9][0-9]*/{fn="cluster_"${ds}1"_reads.txt"; \ |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
528 printf("${lt}tr${gt}${lt}td${gt}${lt}a href=\"%s\"${gt}%s${lt}/a${gt}${lt}/td${gt}",fn, ${ds}1);for (i = 2; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
529 echo '${lt}/table${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
530 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
531 for i in `awk '${ds}1 ~ /[1-9][0-9]*/{print ${ds}1}' ${ds}rlts`; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
532 do fn=cluster_${ds}{i}_reads.txt; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
533 pn=${ds}_EFP/${ds}fn; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
534 perl $get_reads -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
535 done |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
536 fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
537 } |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
538 ## copy config to output |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
539 cp $defuse_config $config_txt |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
540 ## make a data_dir and ln -s the input fastq |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
541 mkdir -p data_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
542 ln -s $left_pairendreads data_dir/reads_1.fastq |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
543 ln -s $right_pairendreads data_dir/reads_2.fastq |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
544 ## ln to output_dir in from_work_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
545 #if $defuse_out.__str__ != 'None': |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
546 mkdir -p $defuse_out.extra_files_path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
547 ln -s $defuse_out.extra_files_path output_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
548 #else |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
549 mkdir -p output_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
550 #end if |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
551 ## run defuse.pl |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
552 perl $defuse -c $defuse_config -d data_dir -o output_dir -p 8 |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
553 ## copy primary results to output datasets |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
554 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
555 if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
556 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
557 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
558 ## create html with links for output_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
559 #if $defuse_out.__str__ != 'None': |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
560 if [ -e $defuse_out ] |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
561 then |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
562 echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
563 echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt} $defuse_out |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
564 pushd $defuse_out.extra_files_path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
565 for f in `find -L . -maxdepth 1 -type f`; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
566 do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt} $defuse_out; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
567 done |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
568 popd |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
569 echo '${lt}/ul${gt}' ${gt}${gt} $defuse_out |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
570 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} $defuse_out |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
571 fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
572 #end if |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
573 ## run get_reads.pl on each cluster |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
574 #if $fusion_reads.__str__ != 'None': |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
575 if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ] |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
576 then |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
577 mkdir -p $fusion_reads.extra_files_path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
578 results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.extra_files_path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
579 fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
580 #end if |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
581 </configfile> |
1 | 582 </configfiles> |
583 <outputs> | |
584 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> | |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
585 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
586 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output"> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
587 <filter>keep_output == True</filter> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
588 </data> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
589 <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
590 <filter>do_get_reads == True</filter> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
591 </data> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
592 <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" /> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
593 <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
594 <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> |
1 | 595 </outputs> |
596 <tests> | |
597 </tests> | |
598 <help> | |
599 **DeFuse** | |
600 | |
601 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. | |
602 | |
603 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138 | |
604 | |
605 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page | |
606 | |
607 ------ | |
608 | |
609 **Inputs** | |
610 | |
611 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). | |
612 | |
613 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. | |
614 | |
615 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_: | |
616 - genome_fasta from Ensembl | |
617 - gene_models from Ensembl | |
618 - repeats_filename from UCSC RepeatMasker rmsk.txt | |
619 - est_fasta from UCSC | |
620 - est_alignments from UCSC intronEst.txt | |
621 - unigene_fasta from NCBI | |
622 | |
623 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 | |
624 | |
625 ------ | |
626 | |
627 **Outputs** | |
628 | |
629 The galaxy history will contain 5 outputs: the config.txt file that provides DeFuse with its parameters, the defuse.log which details what DeFuse has done and can be useful in determining any errors, and the 3 results files that defuse generates. | |
630 | |
631 DeFuse generates 3 results files: results.txt, results.filtered.txt, and results.classify.txt. All three files have the same format, though results.classify.txt has a probability column from the application of the classifier to results.txt, and results.filtered.txt has been filtered according to the threshold probability as set in config.txt. | |
632 | |
633 The file format is tab delimited with one prediction per line, and the following fields per prediction (not necessarily in this order): | |
634 | |
635 - **Identification** | |
636 - cluster_id : random identifier assigned to each prediction | |
637 - library_name : library name given on the command line of defuse | |
638 - gene1 : ensembl id of gene 1 | |
639 - gene2 : ensembl id of gene 2 | |
640 - gene_name1 : name of gene 1 | |
641 - gene_name2 : name of gene 2 | |
642 - **Evidence** | |
643 - break_predict : breakpoint prediction method, denovo or splitr, that is considered most reliable | |
644 - concordant_ratio : proportion of spanning reads considered concordant by blat | |
645 - denovo_min_count : minimum kmer count across denovo assembled sequence | |
646 - denovo_sequence : fusion sequence predicted by debruijn based denovo sequence assembly | |
647 - denovo_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
648 - gene_align_strand1 : alignment strand for spanning read alignments to gene 1 | |
649 - gene_align_strand2 : alignment strand for spanning read alignments to gene 2 | |
650 - min_map_count : minimum of the number of genomic mappings for each spanning read | |
651 - max_map_count : maximum of the number of genomic mappings for each spanning read | |
652 - mean_map_count : average of the number of genomic mappings for each spanning read | |
653 - num_multi_map : number of spanning reads that map to more than one genomic location | |
654 - span_count : number of spanning reads supporting the fusion | |
655 - span_coverage1 : coverage of spanning reads aligned to gene 1 as a proportion of expected coverage | |
656 - span_coverage2 : coverage of spanning reads aligned to gene 2 as a proportion of expected coverage | |
657 - span_coverage_min : minimum of span_coverage1 and span_coverage2 | |
658 - span_coverage_max : maximum of span_coverage1 and span_coverage2 | |
659 - splitr_count : number of split reads supporting the prediction | |
660 - splitr_min_pvalue : p-value, lower values are evidence the prediction is a false positive | |
661 - splitr_pos_pvalue : p-value, lower values are evidence the prediction is a false positive | |
662 - splitr_sequence : fusion sequence predicted by split reads | |
663 - splitr_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
664 - **Annotation** | |
665 - adjacent : fusion between adjacent genes | |
666 - altsplice : fusion likely the product of alternative splicing between adjacent genes | |
667 - break_adj_entropy1 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 1 | |
668 - break_adj_entropy2 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 2 | |
669 - break_adj_entropy_min : minimum of break_adj_entropy1 and break_adj_entropy2 | |
670 - breakpoint_homology : number of nucleotides at the fusion splice that align equally well to gene 1 or gene 2 | |
671 - breakseqs_estislands_percident : maximum percent identity of fusion sequence alignments to est islands | |
672 - cdna_breakseqs_percident : maximum percent identity of fusion sequence alignments to cdna | |
673 - deletion : fusion produced by a genomic deletion | |
674 - est_breakseqs_percident : maximum percent identity of fusion sequence alignments to est | |
675 - eversion : fusion produced by a genomic eversion | |
676 - exonboundaries : fusion splice at exon boundaries | |
677 - expression1 : expression of gene 1 as number of concordant pairs aligned to exons | |
678 - expression2 : expression of gene 2 as number of concordant pairs aligned to exons | |
679 - gene_chromosome1 : chromosome of gene 1 | |
680 - gene_chromosome2 : chromosome of gene 2 | |
681 - gene_end1 : end position for gene 1 | |
682 - gene_end2 : end position for gene 2 | |
683 - gene_location1 : location of breakpoint in gene 1 | |
684 - gene_location2 : location of breakpoint in gene 2 | |
685 - gene_start1 : start of gene 1 | |
686 - gene_start2 : start of gene 2 | |
687 - gene_strand1 : strand of gene 1 | |
688 - gene_strand2 : strand of gene 2 | |
689 - genome_breakseqs_percident : maximum percent identity of fusion sequence alignments to genome | |
690 - genomic_break_pos1 : genomic position in gene 1 of fusion splice / breakpoint | |
691 - genomic_break_pos2 : genomic position in gene 2 of fusion splice / breakpoint | |
692 - genomic_strand1 : genomic strand in gene 1 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
693 - genomic_strand2 : genomic strand in gene 2 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
694 - interchromosomal : fusion produced by an interchromosomal translocation | |
695 - interrupted_index1 : ratio of coverage before and after the fusion splice / breakpoint in gene 1 | |
696 - interrupted_index2 : ratio of coverage before and after the fusion splice / breakpoint in gene 2 | |
697 - inversion : fusion produced by genomic inversion | |
698 - orf : fusion combines genes in a way that preserves a reading frame | |
699 - probability : probability produced by classification using adaboost and example positives/negatives (only given in results.classified.txt) | |
700 - read_through : fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement | |
701 - repeat_proportion1 : proportion of the spanning reads in gene 1 that span a repeat region | |
702 - repeat_proportion2 : proportion of the spanning reads in gene 2 that span a repeat region | |
703 - max_repeat_proportion : max of repeat_proportion1 and repeat_proportion2 | |
704 - splice_score : number of nucleotides similar to GTAG at fusion splice | |
705 - num_splice_variants : number of potential splice variants for this gene pair | |
706 - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2 | |
707 - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1 | |
708 | |
709 | |
710 **Example** | |
711 | |
712 results.tsv:: | |
713 | |
714 cluster_id splitr_sequence splitr_count splitr_span_pvalue splitr_pos_pvalue splitr_min_pvalue adjacent altsplice break_adj_entropy1 break_adj_entropy2 break_adj_entropy_min break_predict breakpoint_homology breakseqs_estislands_percident cdna_breakseqs_percident concordant_ratio deletion est_breakseqs_percident eversion exonboundaries expression1 expression2 gene1 gene2 gene_align_strand1 gene_align_strand2 gene_chromosome1 gene_chromosome2 gene_end1 gene_end2 gene_location1 gene_location2 gene_name1 gene_name2 gene_start1 gene_start2 gene_strand1 gene_strand2 genome_breakseqs_percident genomic_break_pos1 genomic_break_pos2 genomic_strand1 genomic_strand2 interchromosomal interrupted_index1 interrupted_index2 inversion library_name max_map_count max_repeat_proportion mean_map_count min_map_count num_multi_map num_splice_variants orf read_through repeat_proportion1 repeat_proportion2 span_count span_coverage1 span_coverage2 span_coverage_max span_coverage_min splice_score splicing_index1 splicing_index2 | |
715 1169 GCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT 2 0.000436307890680442 0.110748295953850 0.0880671602973091 N Y 3.19872427442695 3.48337348351473 3.19872427442695 splitr 0 0 0 0 Y 0 N N 0 0 ENSG00000105549 ENSG00000213753 + - 19 19 376013 59111168 intron upstream THEG AC016629.2 361750 59084870 - + 0 375099 386594 + - N 8.34107429512245 - N output_dir 82 0.677852348993289 40.6666666666667 1 11 1 N N 0.361271676300578 0.677852348993289 12 0.758602776578432 0.569678713445872 0.758602776578432 0.569678713445872 2 0.416666666666667 - | |
716 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - - | |
717 | |
718 </help> | |
719 </tool> |