Mercurial > repos > jjohnson > defuse
annotate defuse.xml @ 9:9f30de0ff090 draft
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Mon, 14 Jan 2013 11:26:17 -0600 |
parents | 57841f58676f |
children | f65857c1b92e |
rev | line source |
---|---|
7 | 1 <tool id="defuse" name="DeFuse" version="1.6"> |
1 | 2 <description>identify fusion transcripts</description> |
3 <requirements> | |
7 | 4 <requirement type="package" version="0.6.0">defuse</requirement> |
5 <requirement type="package" version="0.1.18">samtools</requirement> | |
5
3bd1087db05e
Add dependecies for bowtie, blat, and faToTwoBit
Jim Johnson <jj@umn.edu>
parents:
4
diff
changeset
|
6 <requirement type="package" version="0.12.7">bowtie</requirement> |
7 | 7 <requirement type="package" version="2012-07-20">gmap</requirement> |
5
3bd1087db05e
Add dependecies for bowtie, blat, and faToTwoBit
Jim Johnson <jj@umn.edu>
parents:
4
diff
changeset
|
8 <requirement type="package" version="34x10">blat</requirement> |
3bd1087db05e
Add dependecies for bowtie, blat, and faToTwoBit
Jim Johnson <jj@umn.edu>
parents:
4
diff
changeset
|
9 <requirement type="package" version="34x10">fatotwobit</requirement> |
1 | 10 </requirements> |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
11 <command interpreter="command"> /bin/bash $shscript </command> |
1 | 12 <inputs> |
13 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> | |
14 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> | |
15 <conditional name="refGenomeSource"> | |
16 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> | |
17 <option value="indexed">Use a built-in DeFuse Reference Dataset</option> | |
18 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> | |
19 </param> | |
20 <when value="indexed"> | |
21 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> | |
22 <options from_file="defuse.loc"> | |
23 <column name="name" index="1"/> | |
24 <column name="value" index="2"/> | |
25 <filter type="sort_by" column="0" /> | |
26 <validator type="no_options" message="No indexes are available" /> | |
27 </options> | |
28 </param> | |
29 <conditional name="defuse_param"> | |
30 <param name="settings" type="select" label="Defuse parameter settings" help=""> | |
31 <option value="preSet">Default settings</option> | |
32 <option value="full">Full parameter list</option> | |
33 </param> | |
34 <when value="preSet" /> | |
35 <when value="full"> | |
36 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> | |
37 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> | |
38 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> | |
39 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> | |
40 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
41 </param> | |
42 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> | |
43 <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" /> | |
44 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> | |
45 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
46 </param> | |
47 <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" /> | |
48 <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" /> | |
49 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> | |
50 <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio"> | |
51 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
52 </param> | |
53 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> | |
54 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> | |
55 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
56 </param> | |
57 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> | |
58 <help>Position density when calculating covariance</help> | |
59 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
60 </param> | |
61 <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> | |
62 <option value="">Use Default</option> | |
63 <option value="no">no</option> | |
64 <option value="yes">yes</option> | |
65 </param> | |
66 <!-- | |
67 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> | |
68 --> | |
69 </when> <!-- full --> | |
70 </conditional> <!-- defuse_param --> | |
71 </when> | |
72 <when value="history"> | |
73 <param name="config" type="data" format="txt" label="Defuse Config file" help=""/> | |
74 </when> <!-- history --> | |
75 </conditional> <!-- refGenomeSource --> | |
6 | 76 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" |
77 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, | |
78 but they require considerable diskspace, and should be deleted and purged when no longer needed."/> | |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
79 <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> |
1 | 80 </inputs> |
6 | 81 <outputs> |
82 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> | |
83 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> | |
84 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> | |
85 <filter>keep_output == True</filter> | |
86 </data> | |
87 <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" /> | |
88 <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> | |
89 <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> | |
90 <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> | |
91 <filter>do_get_reads == True</filter> | |
92 </data> | |
93 </outputs> | |
1 | 94 <configfiles> |
95 <configfile name="defuse_config"> | |
96 #import ast | |
97 #if $refGenomeSource.genomeSource == "history": | |
98 #include raw $refGenomeSource.config.__str__ | |
99 #else | |
100 #set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value)) | |
101 # | |
102 # Configuration file for defuse | |
103 # | |
104 # At a minimum, change all values enclused by [] | |
105 # | |
2
4245c2b047de
Changes for defuse-0.4.3, modifications for non-human genomes no longer required, defuse.xml searches for location of scripts/defuse.pl
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
106 |
1 | 107 # Directory where the defuse code was unpacked |
108 ## Default location in the tool/defuse directory | |
109 # source_directory = ${__root_dir__}/tools/defuse | |
110 source_directory = #slurp | |
111 #try | |
112 $ref_dict['source_directory'] | |
113 #except | |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
114 __DEFUSE_PATH__ |
1 | 115 #end try |
116 | |
117 # Directory where you want your dataset | |
118 dataset_directory = #slurp | |
119 #try | |
120 $ref_dict['dataset_directory'] | |
121 #except | |
122 /project/db/genomes/Hsapiens/hg19/defuse | |
123 #end try | |
124 | |
125 # Input genome and gene models | |
126 gene_models = #slurp | |
127 #try | |
128 $ref_dict['gene_models'] | |
129 #except | |
130 \$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf | |
131 #end try | |
132 genome_fasta = #slurp | |
133 #try | |
134 $ref_dict['genome_fasta'] | |
135 #except | |
136 \$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa | |
137 #end try | |
138 | |
139 # Repeat table from ucsc genome browser | |
140 repeats_filename = #slurp | |
141 #try | |
142 $ref_dict['repeats_filename'] | |
143 #except | |
144 \$(dataset_directory)/rmsk.txt | |
145 #end try | |
146 | |
147 # EST info downloaded from ucsc genome browser | |
148 est_fasta = #slurp | |
149 #try | |
150 $ref_dict['est_fasta'] | |
151 #except | |
152 \$(dataset_directory)/est.fa | |
153 #end try | |
154 est_alignments = #slurp | |
155 #try | |
156 $ref_dict['est_alignments'] | |
157 #except | |
158 \$(dataset_directory)/intronEst.txt | |
159 #end try | |
160 | |
161 # Unigene clusters downloaded from ncbi | |
162 unigene_fasta = #slurp | |
163 #try | |
164 $ref_dict['unigene_fasta'] | |
165 #except | |
166 \$(dataset_directory)/Hs.seq.uniq | |
167 #end try | |
168 | |
169 # Paths to external tools | |
170 bowtie_bin = #slurp | |
171 #try | |
172 $ref_dict['bowtie_bin'] | |
173 #except | |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
174 __BOWTIE_BIN__ |
1 | 175 #end try |
176 bowtie_build_bin = #slurp | |
177 #try | |
178 $ref_dict['bowtie_build_bin'] | |
179 #except | |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
180 __BOWTIE_BUILD_BIN__ |
1 | 181 #end try |
182 blat_bin = #slurp | |
183 #try | |
184 $ref_dict['blat_bin'] | |
185 #except | |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
186 __BLAT_BIN__ |
1 | 187 #end try |
188 fatotwobit_bin = #slurp | |
189 #try | |
190 $ref_dict['fatotwobit_bin'] | |
191 #except | |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
192 __FATOTWOBIT_BIN__ |
1 | 193 #end try |
8
57841f58676f
Add gmap_bin to defuse.xml configfile generation
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
194 gmap_bin = #slurp |
57841f58676f
Add gmap_bin to defuse.xml configfile generation
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
195 #try |
57841f58676f
Add gmap_bin to defuse.xml configfile generation
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
196 $ref_dict['gmap_bin'] |
57841f58676f
Add gmap_bin to defuse.xml configfile generation
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
197 #except |
57841f58676f
Add gmap_bin to defuse.xml configfile generation
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
198 __GMAP_BIN__ |
57841f58676f
Add gmap_bin to defuse.xml configfile generation
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
199 #end try |
9
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
200 gmap_bin = #slurp |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
201 #try |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
202 $ref_dict['gmap_bin'] |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
203 #except |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
204 __GMAP_BIN__ |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
205 #end try |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
206 gmap_setup_bin = #slurp |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
207 #try |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
208 $ref_dict['gmap_setup_bin'] |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
209 #except |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
210 __GMAP_SETUP_BIN__ |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
211 #end try |
1 | 212 r_bin = #slurp |
213 #try | |
214 $ref_dict['r_bin'] | |
215 #except | |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
216 __R_BIN__ |
1 | 217 #end try |
218 rscript_bin = #slurp | |
219 #try | |
220 $ref_dict['rscript_bin'] | |
221 #except | |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
222 __RSCRIPT_BIN__ |
1 | 223 #end try |
224 | |
9
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
225 # Directory where you want your dataset |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
226 gmap_index_directory = #slurp |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
227 #try |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
228 $ref_dict['gmap_index_directory'] |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
229 #except |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
230 $(dataset_directory)/gmap |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
231 #end try |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
232 |
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
233 |
1 | 234 #raw |
235 # Dataset files | |
236 dataset_prefix = $(dataset_directory)/defuse | |
237 chromosome_prefix = $(dataset_prefix).dna.chromosomes | |
238 exons_fasta = $(dataset_prefix).exons.fa | |
239 cds_fasta = $(dataset_prefix).cds.fa | |
240 cdna_regions = $(dataset_prefix).cdna.regions | |
241 cdna_fasta = $(dataset_prefix).cdna.fa | |
242 reference_fasta = $(dataset_prefix).reference.fa | |
243 rrna_fasta = $(dataset_prefix).rrna.fa | |
244 ig_gene_list = $(dataset_prefix).ig.gene.list | |
245 repeats_regions = $(dataset_directory)/repeats.regions | |
246 est_split_fasta1 = $(dataset_directory)/est.1.fa | |
247 est_split_fasta2 = $(dataset_directory)/est.2.fa | |
248 est_split_fasta3 = $(dataset_directory)/est.3.fa | |
249 est_split_fasta4 = $(dataset_directory)/est.4.fa | |
250 est_split_fasta5 = $(dataset_directory)/est.5.fa | |
251 est_split_fasta6 = $(dataset_directory)/est.6.fa | |
252 est_split_fasta7 = $(dataset_directory)/est.7.fa | |
253 est_split_fasta8 = $(dataset_directory)/est.8.fa | |
254 est_split_fasta9 = $(dataset_directory)/est.9.fa | |
255 | |
256 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs | |
257 prefilter1 = $(unigene_fasta) | |
258 | |
259 # deFuse scripts and tools | |
260 scripts_directory = $(source_directory)/scripts | |
261 tools_directory = $(source_directory)/tools | |
262 data_directory = $(source_directory)/data | |
263 #end raw | |
264 | |
265 # Path to samtools, 0.1.8 is compiled for you, use other versions at your own risk | |
266 samtools_bin = #slurp | |
267 #try | |
268 $ref_dict['samtools_bin'] | |
269 #except | |
270 \$(source_directory)/external/samtools-0.1.8/samtools | |
271 #end try | |
272 | |
273 # Bowtie parameters | |
274 bowtie_threads = #slurp | |
275 #try | |
276 $ref_dict['bowtie_threads'] | |
277 #except | |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
278 4 |
1 | 279 #end try |
280 bowtie_quals = #slurp | |
281 #try | |
282 $ref_dict['bowtie_quals'] | |
283 #except | |
284 --phred33-quals | |
285 #end try | |
286 max_insert_size = #slurp | |
287 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "": | |
288 $refGenomeSource.defuse_param.max_insert_size | |
289 #else | |
290 #try | |
291 $ref_dict['max_insert_size'] | |
292 #except | |
293 500 | |
294 #end try | |
295 #end if | |
296 | |
297 # Parameters for building the dataset | |
298 chromosomes = #slurp | |
299 #try | |
300 $ref_dict.chromosomes | |
301 #except | |
302 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT | |
303 #end try | |
304 mt_chromosome = #slurp | |
305 #try | |
306 $ref_dict['mt_chromosome'] | |
307 #except | |
308 MT | |
309 #end try | |
310 gene_sources = #slurp | |
311 #try | |
312 $ref_dict['gene_sources'] | |
313 #except | |
314 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding | |
315 #end try | |
316 ig_gene_sources = #slurp | |
317 #try | |
318 $ref_dict['ig_gene_sources'] | |
319 #except | |
320 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene | |
321 #end try | |
322 rrna_gene_sources = #slurp | |
323 #try | |
324 $ref_dict['rrna_gene_sources'] | |
325 #except | |
326 Mt_rRNA,rRNA,rRNA_pseudogene | |
327 #end try | |
328 | |
329 # Blat sequences per job | |
330 num_blat_sequences = #slurp | |
331 #try | |
332 $ref_dict['num_blat_sequences'] | |
333 #except | |
334 10000 | |
335 #end try | |
336 | |
337 # Minimum gene fusion range | |
338 dna_concordant_length = #slurp | |
339 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "": | |
340 $refGenomeSource.defuse_param.dna_concordant_length | |
341 #else | |
342 #try | |
343 $ref_dict['dna_concordant_length'] | |
344 #except | |
345 2000 | |
346 #end try | |
347 #end if | |
348 | |
349 # Trim length for discordant reads (split reads are not trimmed) | |
350 discord_read_trim = #slurp | |
351 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "": | |
352 $refGenomeSource.defuse_param.discord_read_trim | |
353 #else | |
354 #try | |
355 $ref_dict['discord_read_trim'] | |
356 #except | |
357 50 | |
358 #end try | |
359 #end if | |
360 | |
361 # Filtering parameters | |
362 clustering_precision = #slurp | |
363 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != "" | |
364 $refGenomeSource.defuse_param.clustering_precision | |
365 #else | |
366 #try | |
367 $ref_dict['clustering_precision'] | |
368 #except | |
369 0.95 | |
370 #end try | |
371 #end if | |
372 span_count_threshold = #slurp | |
373 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != "" | |
374 $refGenomeSource.defuse_param.span_count_threshold | |
375 #else | |
376 #try | |
377 $ref_dict['span_count_threshold'] | |
378 #except | |
379 5 | |
380 #end try | |
381 #end if | |
382 split_count_threshold = #slurp | |
383 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != "" | |
384 $refGenomeSource.defuse_param.split_count_threshold | |
385 #else | |
386 #try | |
387 $ref_dict['split_count_threshold'] | |
388 #except | |
389 3 | |
390 #end try | |
391 #end if | |
392 percent_identity_threshold = #slurp | |
393 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != "" | |
394 $refGenomeSource.defuse_param.percent_identity_threshold | |
395 #else | |
396 #try | |
397 $ref_dict['percent_identity_threshold'] | |
398 #except | |
399 0.90 | |
400 #end try | |
401 #end if | |
402 max_dist_pos = #slurp | |
403 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != "" | |
404 $refGenomeSource.defuse_param.max_dist_pos | |
405 #else | |
406 #try | |
407 $ref_dict['max_dist_pos'] | |
408 #except | |
409 600 | |
410 #end try | |
411 #end if | |
412 num_dist_genes = #slurp | |
413 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != "" | |
414 $refGenomeSource.defuse_param.num_dist_genes | |
415 #else | |
416 #try | |
417 $ref_dict['num_dist_genes'] | |
418 #except | |
419 500 | |
420 #end try | |
421 #end if | |
422 split_min_anchor = #slurp | |
423 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != "" | |
424 $refGenomeSource.defuse_param.split_min_anchor | |
425 #else | |
426 #try | |
427 $ref_dict['split_min_anchor'] | |
428 #except | |
429 4 | |
430 #end try | |
431 #end if | |
432 max_concordant_ratio = #slurp | |
433 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != "" | |
434 $refGenomeSource.defuse_param.max_concordant_ratio | |
435 #else | |
436 #try | |
437 $ref_dict['max_concordant_ratio'] | |
438 #except | |
439 0.1 | |
440 #end try | |
441 #end if | |
442 splice_bias = #slurp | |
443 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != "" | |
444 $refGenomeSource.defuse_param.splice_bias | |
445 #else | |
446 #try | |
447 $ref_dict['splice_bias'] | |
448 #except | |
449 10 | |
450 #end try | |
451 #end if | |
452 denovo_assembly = #slurp | |
453 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != "" | |
454 $refGenomeSource.defuse_param.denovo_assembly | |
455 #else | |
456 #try | |
457 $ref_dict['denovo_assembly'] | |
458 #except | |
459 no | |
460 #end try | |
461 #end if | |
462 probability_threshold = #slurp | |
463 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != "" | |
464 $refGenomeSource.defuse_param.probability_threshold | |
465 #else | |
466 #try | |
467 $ref_dict['probability_threshold'] | |
468 #except | |
469 0.50 | |
470 #end try | |
471 #end if | |
472 positive_controls = \$(data_directory)/controls.txt | |
473 | |
474 # Position density when calculating covariance | |
475 covariance_sampling_density = #slurp | |
476 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != "" | |
477 $refGenomeSource.defuse_param.covariance_sampling_density | |
478 #else | |
479 #try | |
480 $ref_dict['covariance_sampling_density'] | |
481 #except | |
482 0.01 | |
483 #end try | |
484 #end if | |
485 | |
486 | |
487 # Number of reads for each job in split | |
488 reads_per_job = 1000000 | |
489 | |
490 # Number of regions for each breakpoint sequence job in split | |
491 regions_per_job = 20 | |
492 | |
493 #raw | |
494 # If you have command line 'mail' and wish to be notified | |
495 # mailto = andrew.mcpherson@gmail.com | |
496 | |
497 # Remove temp files | |
498 remove_job_files = yes | |
499 remove_job_temp_files = yes | |
500 | |
501 # Converting to fastq | |
502 # Fastq converter config format 1 for reads stored in separate files for each end | |
503 # data_lane_rexex_N is a perl regex which stores the lane id in $1 | |
504 # data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1 | |
505 # data_compress_regex_N is a perl regex which stores the compression extension in $1 | |
506 # data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout | |
507 # Fastq converter config format 2 for reads stored in separate files for each end | |
508 # data_lane_regex_N is a perl regex which stores the lane id in $1 | |
509 # data_compress_regex_N is a perl regex which stores the compression extension in $1 | |
510 # data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout | |
511 # data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout | |
512 | |
513 data_lane_regex_1 = ^(.+)_[12]_export\.txt.*$ | |
514 data_end_regex_1 = ^.+_([12])_export\.txt.*$ | |
515 data_compress_regex_1 = ^.+_[12]_export\.txt(.*)$ | |
516 data_converter_1 = $(scripts_directory)/fq_all2std.pl export2std | |
517 | |
518 data_lane_regex_2 = ^(.+)_[12]_concat_qseq\.txt.*$ | |
519 data_end_regex_2 = ^.+_([12])_concat_qseq\.txt.*$ | |
520 data_compress_regex_2 = ^.+_[12]_concat_qseq\.txt(.*)$ | |
521 data_converter_2 = $(scripts_directory)/qseq2fastq.pl | |
522 | |
523 data_lane_regex_3 = ^(.+)\.bam.*$ | |
524 data_compress_regex_3 = ^.+\.bam(.*)$ | |
525 data_end1_converter_3 = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl | |
526 data_end2_converter_3 = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl | |
527 | |
528 data_lane_regex_4 = ^(.+).[12].fastq.*$ | |
529 data_end_regex_4 = ^.+.([12]).fastq.*$ | |
530 data_compress_regex_4 = ^.+.[12].fastq(.*)$ | |
531 data_converter_4 = cat | |
532 #end raw | |
533 | |
534 #end if | |
535 | |
536 </configfile> | |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
537 <configfile name="shscript"> |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
538 #!/bin/bash |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
539 ## define some things for cheetah proccessing |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
540 #set $ds = chr(36) |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
541 #set $amp = chr(38) |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
542 #set $gt = chr(62) |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
543 #set $lt = chr(60) |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
544 #set $echo_cmd = 'echo' |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
545 ## Find the defuse.pl in the galaxy tool path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
546 #import Cheetah.FileUtils |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
547 ## declare a bash function for converting a results tsv into html with links to the get_reads output files |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
548 results2html() { |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
549 rlts=${ds}1 |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
550 rslt_name=`basename ${ds}rlts` |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
551 html=${ds}2 |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
552 echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse '${ds}rslt_name'${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
553 echo '${lt}h2${gt}Defuse '${ds}rslt_name'${lt}/h2${gt}${lt}table${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
554 if [ -z "${ds}3" ] |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
555 then |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
556 awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\ |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
557 ${ds}1 ~ /[1-9][0-9]*/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
558 echo '${lt}/table${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
559 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
560 else |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
561 export _EFP=${ds}3 |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
562 mkdir -p ${ds}_EFP |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
563 awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\ |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
564 ${ds}1 ~ /[1-9][0-9]*/{fn="cluster_"${ds}1"_reads.txt"; \ |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
565 printf("${lt}tr${gt}${lt}td${gt}${lt}a href=\"%s\"${gt}%s${lt}/a${gt}${lt}/td${gt}",fn, ${ds}1);for (i = 2; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
566 echo '${lt}/table${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
567 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
568 for i in `awk '${ds}1 ~ /[1-9][0-9]*/{print ${ds}1}' ${ds}rlts`; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
569 do fn=cluster_${ds}{i}_reads.txt; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
570 pn=${ds}_EFP/${ds}fn; |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
571 perl \${DEFUSE_PATH}/scripts/get_reads.pl -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn; |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
572 done |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
573 fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
574 } |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
575 ## substitute pathnames into config file |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
576 if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
577 if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
578 if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
579 if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
580 if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
581 if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi |
8
57841f58676f
Add gmap_bin to defuse.xml configfile generation
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
582 if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi |
9
9f30de0ff090
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
583 if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
584 if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
585 if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
586 |
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
587 |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
588 ## copy config to output |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
589 cp $defuse_config $config_txt |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
590 ## make a data_dir and ln -s the input fastq |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
591 mkdir -p data_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
592 ln -s $left_pairendreads data_dir/reads_1.fastq |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
593 ln -s $right_pairendreads data_dir/reads_2.fastq |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
594 ## ln to output_dir in from_work_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
595 #if $defuse_out.__str__ != 'None': |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
596 mkdir -p $defuse_out.extra_files_path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
597 ln -s $defuse_out.extra_files_path output_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
598 #else |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
599 mkdir -p output_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
600 #end if |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
601 ## run defuse.pl |
4
679a5c7b1294
deFuse version 0.5.0 - Use tool_dependencies.xml
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
602 perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -d data_dir -o output_dir -p 8 |
3
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
603 ## copy primary results to output datasets |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
604 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
605 if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
606 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
607 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
608 ## create html with links for output_dir |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
609 #if $defuse_out.__str__ != 'None': |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
610 if [ -e $defuse_out ] |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
611 then |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
612 echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
613 echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt} $defuse_out |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
614 pushd $defuse_out.extra_files_path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
615 for f in `find -L . -maxdepth 1 -type f`; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
616 do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt} $defuse_out; |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
617 done |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
618 popd |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
619 echo '${lt}/ul${gt}' ${gt}${gt} $defuse_out |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
620 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} $defuse_out |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
621 fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
622 #end if |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
623 ## run get_reads.pl on each cluster |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
624 #if $fusion_reads.__str__ != 'None': |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
625 if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ] |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
626 then |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
627 mkdir -p $fusion_reads.extra_files_path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
628 results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.extra_files_path |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
629 fi |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
630 #end if |
c90022a13c7c
DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace.
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
631 </configfile> |
1 | 632 </configfiles> |
6 | 633 |
1 | 634 <tests> |
635 </tests> | |
636 <help> | |
637 **DeFuse** | |
638 | |
639 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. | |
640 | |
641 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138 | |
642 | |
643 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page | |
644 | |
645 ------ | |
646 | |
647 **Inputs** | |
648 | |
649 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). | |
650 | |
651 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. | |
652 | |
653 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_: | |
654 - genome_fasta from Ensembl | |
655 - gene_models from Ensembl | |
656 - repeats_filename from UCSC RepeatMasker rmsk.txt | |
657 - est_fasta from UCSC | |
658 - est_alignments from UCSC intronEst.txt | |
659 - unigene_fasta from NCBI | |
660 | |
661 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 | |
662 | |
663 ------ | |
664 | |
665 **Outputs** | |
666 | |
667 The galaxy history will contain 5 outputs: the config.txt file that provides DeFuse with its parameters, the defuse.log which details what DeFuse has done and can be useful in determining any errors, and the 3 results files that defuse generates. | |
668 | |
669 DeFuse generates 3 results files: results.txt, results.filtered.txt, and results.classify.txt. All three files have the same format, though results.classify.txt has a probability column from the application of the classifier to results.txt, and results.filtered.txt has been filtered according to the threshold probability as set in config.txt. | |
670 | |
671 The file format is tab delimited with one prediction per line, and the following fields per prediction (not necessarily in this order): | |
672 | |
673 - **Identification** | |
674 - cluster_id : random identifier assigned to each prediction | |
675 - library_name : library name given on the command line of defuse | |
676 - gene1 : ensembl id of gene 1 | |
677 - gene2 : ensembl id of gene 2 | |
678 - gene_name1 : name of gene 1 | |
679 - gene_name2 : name of gene 2 | |
680 - **Evidence** | |
681 - break_predict : breakpoint prediction method, denovo or splitr, that is considered most reliable | |
682 - concordant_ratio : proportion of spanning reads considered concordant by blat | |
683 - denovo_min_count : minimum kmer count across denovo assembled sequence | |
684 - denovo_sequence : fusion sequence predicted by debruijn based denovo sequence assembly | |
685 - denovo_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
686 - gene_align_strand1 : alignment strand for spanning read alignments to gene 1 | |
687 - gene_align_strand2 : alignment strand for spanning read alignments to gene 2 | |
688 - min_map_count : minimum of the number of genomic mappings for each spanning read | |
689 - max_map_count : maximum of the number of genomic mappings for each spanning read | |
690 - mean_map_count : average of the number of genomic mappings for each spanning read | |
691 - num_multi_map : number of spanning reads that map to more than one genomic location | |
692 - span_count : number of spanning reads supporting the fusion | |
693 - span_coverage1 : coverage of spanning reads aligned to gene 1 as a proportion of expected coverage | |
694 - span_coverage2 : coverage of spanning reads aligned to gene 2 as a proportion of expected coverage | |
695 - span_coverage_min : minimum of span_coverage1 and span_coverage2 | |
696 - span_coverage_max : maximum of span_coverage1 and span_coverage2 | |
697 - splitr_count : number of split reads supporting the prediction | |
698 - splitr_min_pvalue : p-value, lower values are evidence the prediction is a false positive | |
699 - splitr_pos_pvalue : p-value, lower values are evidence the prediction is a false positive | |
700 - splitr_sequence : fusion sequence predicted by split reads | |
701 - splitr_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
702 - **Annotation** | |
703 - adjacent : fusion between adjacent genes | |
704 - altsplice : fusion likely the product of alternative splicing between adjacent genes | |
705 - break_adj_entropy1 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 1 | |
706 - break_adj_entropy2 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 2 | |
707 - break_adj_entropy_min : minimum of break_adj_entropy1 and break_adj_entropy2 | |
708 - breakpoint_homology : number of nucleotides at the fusion splice that align equally well to gene 1 or gene 2 | |
709 - breakseqs_estislands_percident : maximum percent identity of fusion sequence alignments to est islands | |
710 - cdna_breakseqs_percident : maximum percent identity of fusion sequence alignments to cdna | |
711 - deletion : fusion produced by a genomic deletion | |
712 - est_breakseqs_percident : maximum percent identity of fusion sequence alignments to est | |
713 - eversion : fusion produced by a genomic eversion | |
714 - exonboundaries : fusion splice at exon boundaries | |
715 - expression1 : expression of gene 1 as number of concordant pairs aligned to exons | |
716 - expression2 : expression of gene 2 as number of concordant pairs aligned to exons | |
717 - gene_chromosome1 : chromosome of gene 1 | |
718 - gene_chromosome2 : chromosome of gene 2 | |
719 - gene_end1 : end position for gene 1 | |
720 - gene_end2 : end position for gene 2 | |
721 - gene_location1 : location of breakpoint in gene 1 | |
722 - gene_location2 : location of breakpoint in gene 2 | |
723 - gene_start1 : start of gene 1 | |
724 - gene_start2 : start of gene 2 | |
725 - gene_strand1 : strand of gene 1 | |
726 - gene_strand2 : strand of gene 2 | |
727 - genome_breakseqs_percident : maximum percent identity of fusion sequence alignments to genome | |
728 - genomic_break_pos1 : genomic position in gene 1 of fusion splice / breakpoint | |
729 - genomic_break_pos2 : genomic position in gene 2 of fusion splice / breakpoint | |
730 - genomic_strand1 : genomic strand in gene 1 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
731 - genomic_strand2 : genomic strand in gene 2 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
732 - interchromosomal : fusion produced by an interchromosomal translocation | |
733 - interrupted_index1 : ratio of coverage before and after the fusion splice / breakpoint in gene 1 | |
734 - interrupted_index2 : ratio of coverage before and after the fusion splice / breakpoint in gene 2 | |
735 - inversion : fusion produced by genomic inversion | |
736 - orf : fusion combines genes in a way that preserves a reading frame | |
737 - probability : probability produced by classification using adaboost and example positives/negatives (only given in results.classified.txt) | |
738 - read_through : fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement | |
739 - repeat_proportion1 : proportion of the spanning reads in gene 1 that span a repeat region | |
740 - repeat_proportion2 : proportion of the spanning reads in gene 2 that span a repeat region | |
741 - max_repeat_proportion : max of repeat_proportion1 and repeat_proportion2 | |
742 - splice_score : number of nucleotides similar to GTAG at fusion splice | |
743 - num_splice_variants : number of potential splice variants for this gene pair | |
744 - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2 | |
745 - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1 | |
746 | |
747 | |
748 **Example** | |
749 | |
750 results.tsv:: | |
751 | |
752 cluster_id splitr_sequence splitr_count splitr_span_pvalue splitr_pos_pvalue splitr_min_pvalue adjacent altsplice break_adj_entropy1 break_adj_entropy2 break_adj_entropy_min break_predict breakpoint_homology breakseqs_estislands_percident cdna_breakseqs_percident concordant_ratio deletion est_breakseqs_percident eversion exonboundaries expression1 expression2 gene1 gene2 gene_align_strand1 gene_align_strand2 gene_chromosome1 gene_chromosome2 gene_end1 gene_end2 gene_location1 gene_location2 gene_name1 gene_name2 gene_start1 gene_start2 gene_strand1 gene_strand2 genome_breakseqs_percident genomic_break_pos1 genomic_break_pos2 genomic_strand1 genomic_strand2 interchromosomal interrupted_index1 interrupted_index2 inversion library_name max_map_count max_repeat_proportion mean_map_count min_map_count num_multi_map num_splice_variants orf read_through repeat_proportion1 repeat_proportion2 span_count span_coverage1 span_coverage2 span_coverage_max span_coverage_min splice_score splicing_index1 splicing_index2 | |
753 1169 GCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT 2 0.000436307890680442 0.110748295953850 0.0880671602973091 N Y 3.19872427442695 3.48337348351473 3.19872427442695 splitr 0 0 0 0 Y 0 N N 0 0 ENSG00000105549 ENSG00000213753 + - 19 19 376013 59111168 intron upstream THEG AC016629.2 361750 59084870 - + 0 375099 386594 + - N 8.34107429512245 - N output_dir 82 0.677852348993289 40.6666666666667 1 11 1 N N 0.361271676300578 0.677852348993289 12 0.758602776578432 0.569678713445872 0.758602776578432 0.569678713445872 2 0.416666666666667 - | |
754 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - - | |
755 | |
756 </help> | |
757 </tool> |