Mercurial > repos > jjohnson > defuse
comparison datamanager_create_reference.xml @ 11:b22f8634ff84 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/defuse commit 23b94b5747c6956360cd2eca0a07a669929ea141-dirty
author | jjohnson |
---|---|
date | Sun, 17 Jan 2016 14:11:06 -0500 |
parents | |
children | 3a4876d01c7e |
comparison
equal
deleted
inserted
replaced
10:f65857c1b92e | 11:b22f8634ff84 |
---|---|
1 <tool id="data_manager_defuse_reference" name="DeFuse Reference DataManager" version="1.6.1" tool_type="manage_data"> | |
2 <description>create a defuse reference from Ensembl and UCSC sources</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.6.1">defuse</requirement> | |
5 <requirement type="package" version="0.1.18">samtools</requirement> | |
6 <requirement type="package" version="1.0.0">bowtie</requirement> | |
7 <requirement type="package" version="2013-05-09">gmap</requirement> | |
8 <requirement type="package" version="latest">kent</requirement> | |
9 </requirements> | |
10 <command interpreter="python"> datamanager_create_reference.py | |
11 --dbkey $genome.ensembl_genome_version | |
12 --description "$genome.ensembl_prefix $genome.ensembl_genome_version ($genome.ucsc_genome_version)" | |
13 --defuse_config $defuse_config | |
14 --defuse_script $defuse_script | |
15 $out_file | |
16 </command> | |
17 <inputs> | |
18 <conditional name="genome"> | |
19 <param name="choice" type="select" label="Select a Genome Build"> | |
20 <option value="GRCh38">Homo_sapiens GRCh38 hg38</option> | |
21 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option> | |
22 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option> | |
23 <option value="GRCm38">Mus_musculus GRCm38 mm10</option> | |
24 <option value="NCBIM37">Mus_musculus NCBIM37 mm9</option> | |
25 <option value="Rnor_5.0">Rattus_norvegicus Rnor_5.0 rn5</option> | |
26 <option value="user_specified">User specified</option> | |
27 </param> | |
28 <when value="GRCh38"> | |
29 <param name="ensembl_organism" type="hidden" value="homo_sapiens"/> | |
30 <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/> | |
31 <param name="ensembl_genome_version" type="hidden" value="GRCh38"/> | |
32 <param name="ensembl_version" type="hidden" value="80"/> | |
33 <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/> | |
34 <param name="ncbi_prefix" type="hidden" value="Hs"/> | |
35 <param name="ucsc_genome_version" type="hidden" value="hg38"/> | |
36 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/> | |
37 <param name="mt_chromosome" type="hidden" value="MT"/> | |
38 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
39 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
40 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
41 </when> | |
42 <when value="GRCh37"> | |
43 <param name="ensembl_organism" type="hidden" value="homo_sapiens"/> | |
44 <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/> | |
45 <param name="ensembl_genome_version" type="hidden" value="GRCh37"/> | |
46 <param name="ensembl_version" type="hidden" value="71"/> | |
47 <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/> | |
48 <param name="ncbi_prefix" type="hidden" value="Hs"/> | |
49 <param name="ucsc_genome_version" type="hidden" value="hg19"/> | |
50 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/> | |
51 <param name="mt_chromosome" type="hidden" value="MT"/> | |
52 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
53 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
54 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
55 </when> | |
56 <when value="NCBI36"> | |
57 <param name="ensembl_organism" type="hidden" value="homo_sapiens"/> | |
58 <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/> | |
59 <param name="ensembl_genome_version" type="hidden" value="NCBI36"/> | |
60 <param name="ensembl_version" type="hidden" value="54"/> | |
61 <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/> | |
62 <param name="ncbi_prefix" type="hidden" value="Hs"/> | |
63 <param name="ucsc_genome_version" type="hidden" value="hg18"/> | |
64 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/> | |
65 <param name="mt_chromosome" type="hidden" value="MT"/> | |
66 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
67 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
68 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
69 </when> | |
70 <when value="GRCm38"> | |
71 <param name="ensembl_organism" type="hidden" value="mus_musculus"/> | |
72 <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/> | |
73 <param name="ensembl_genome_version" type="hidden" value="GRCm38"/> | |
74 <param name="ensembl_version" type="hidden" value="71"/> | |
75 <param name="ncbi_organism" type="hidden" value="Mus_musculus"/> | |
76 <param name="ncbi_prefix" type="hidden" value="Mm"/> | |
77 <param name="ucsc_genome_version" type="hidden" value="mm10"/> | |
78 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/> | |
79 <param name="mt_chromosome" type="hidden" value="MT"/> | |
80 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
81 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
82 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
83 </when> | |
84 <when value="NCBIM37"> | |
85 <param name="ensembl_organism" type="hidden" value="mus_musculus"/> | |
86 <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/> | |
87 <param name="ensembl_genome_version" type="hidden" value="NCBIM37"/> | |
88 <param name="ensembl_version" type="hidden" value="67"/> | |
89 <param name="ncbi_organism" type="hidden" value="Mus_musculus"/> | |
90 <param name="ncbi_prefix" type="hidden" value="Mm"/> | |
91 <param name="ucsc_genome_version" type="hidden" value="mm9"/> | |
92 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/> | |
93 <param name="mt_chromosome" type="hidden" value="MT"/> | |
94 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
95 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
96 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
97 </when> | |
98 <when value="Rnor_5.0"> | |
99 <param name="ensembl_organism" type="hidden" value="rattus_norvegicus"/> | |
100 <param name="ensembl_prefix" type="hidden" value="Rattus_norvegicus"/> | |
101 <param name="ensembl_genome_version" type="hidden" value="Rnor_5.0"/> | |
102 <param name="ensembl_version" type="hidden" value="71"/> | |
103 <param name="ncbi_organism" type="hidden" value="Rattus_norvegicus"/> | |
104 <param name="ncbi_prefix" type="hidden" value="Rn"/> | |
105 <param name="ucsc_genome_version" type="hidden" value="rn5"/> | |
106 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT"/> | |
107 <param name="mt_chromosome" type="hidden" value="MT"/> | |
108 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
109 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
110 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
111 </when> | |
112 <when value="user_specified"> | |
113 <param name="ensembl_organism" type="text" value="" label="Ensembl Organism Name" help="Examples: homo_sapiens, mus_musculus, rattus_norvegicus"/> | |
114 <param name="ensembl_prefix" type="text" value="" label="Ensembl Organism prefix" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/> | |
115 <param name="ensembl_genome_version" type="text" value="" label="Ensembl Genome Version" help="Examples: GRCh37, GRCm38, Rnor_5.0"/> | |
116 <param name="ensembl_version" type="integer" value="" label="Ensembl Release Version" help="Example: 71"/> | |
117 <param name="ncbi_organism" type="text" value="" label="NCBI Organism Name" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/> | |
118 <param name="ncbi_prefix" type="text" value="" label="NCBI Organism Unigene prefix" help="Examples: Hs, Mm, Rn"/> | |
119 <param name="ucsc_genome_version" type="text" value="" label="UCSC Genome Version" help="Examples: hg19, mm10, rn5"/> | |
120 <param name="chromosomes" type="text" value="" label="Chromosomes for Ensembl genome build" > | |
121 <help> Examples: | |
122 Homo_sapiens: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT | |
123 Mus_musculus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT | |
124 Rattus_norvegicus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT | |
125 ( ftp://ftp.ensembl.org/pub/release-71/fasta/homo_sapiens/dna/ ) | |
126 </help> | |
127 </param> | |
128 <param name="mt_chromosome" type="text" value="MT" label="Ensembl Mitochonrial Chromosome name" /> | |
129 <param name="gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding" label="Gene sources" /> | |
130 <param name="ig_gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene" label="IG Gene sources" /> | |
131 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" /> | |
132 </when> | |
133 </conditional> | |
134 </inputs> | |
135 <outputs> | |
136 <data name="out_file" format="data_manager_json" label="${tool.name} : ${genome.ensembl_genome_version}"/> | |
137 </outputs> | |
138 <stdio> | |
139 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" /> | |
140 <regex match="Error:" | |
141 source="both" | |
142 level="fatal" | |
143 description="Error running Create DeFuse Reference" /> | |
144 | |
145 </stdio> | |
146 <configfiles> | |
147 <configfile name="defuse_config"> | |
148 # | |
149 # Configuration file for defuse | |
150 # | |
151 # Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ | |
152 # will be set by the runtime script using the ENV PATH | |
153 # | |
154 | |
155 # Directory where the defuse code was unpacked | |
156 source_directory = __DEFUSE_PATH__ | |
157 | |
158 # Organism IDs | |
159 ensembl_organism = $genome.ensembl_organism | |
160 ensembl_prefix = $genome.ensembl_prefix | |
161 ensembl_version = $genome.ensembl_version | |
162 ensembl_genome_version = $genome.ensembl_genome_version | |
163 ucsc_genome_version = $genome.ucsc_genome_version | |
164 ncbi_organism = $genome.ncbi_organism | |
165 ncbi_prefix = $genome.ncbi_prefix | |
166 | |
167 # Directory where you want your dataset | |
168 dataset_directory = __DATASET_DIRECTORY__ | |
169 | |
170 #raw | |
171 # Input genome and gene models | |
172 gene_models = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).gtf | |
173 genome_fasta = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).dna.chromosomes.fa | |
174 | |
175 # Repeat table from ucsc genome browser | |
176 repeats_filename = $(dataset_directory)/repeats.txt | |
177 | |
178 # EST info downloaded from ucsc genome browser | |
179 est_fasta = $(dataset_directory)/est.fa | |
180 est_alignments = $(dataset_directory)/intronEst.txt | |
181 | |
182 # Unigene clusters downloaded from ncbi | |
183 unigene_fasta = $(dataset_directory)/$(ncbi_prefix).seq.uniq | |
184 #end raw | |
185 | |
186 # Paths to external tools | |
187 samtools_bin = __SAMTOOLS_BIN__ | |
188 bowtie_bin = __BOWTIE_BIN__ | |
189 bowtie_build_bin = __BOWTIE_BUILD_BIN__ | |
190 blat_bin = __BLAT_BIN__ | |
191 fatotwobit_bin = __FATOTWOBIT_BIN__ | |
192 gmap_bin = __GMAP_BIN__ | |
193 gmap_setup_bin = __GMAP_SETUP_BIN__ | |
194 r_bin = __R_BIN__ | |
195 rscript_bin = __RSCRIPT_BIN__ | |
196 | |
197 #raw | |
198 # Directory where you want your dataset | |
199 gmap_index_directory = $(dataset_directory)/gmap | |
200 #end raw | |
201 | |
202 #raw | |
203 # Dataset files | |
204 dataset_prefix = $(dataset_directory)/defuse | |
205 chromosome_prefix = $(dataset_prefix).dna.chromosomes | |
206 exons_fasta = $(dataset_prefix).exons.fa | |
207 cds_fasta = $(dataset_prefix).cds.fa | |
208 cdna_regions = $(dataset_prefix).cdna.regions | |
209 cdna_fasta = $(dataset_prefix).cdna.fa | |
210 reference_fasta = $(dataset_prefix).reference.fa | |
211 rrna_fasta = $(dataset_prefix).rrna.fa | |
212 ig_gene_list = $(dataset_prefix).ig.gene.list | |
213 repeats_regions = $(dataset_directory)/repeats.regions | |
214 est_split_fasta1 = $(dataset_directory)/est.1.fa | |
215 est_split_fasta2 = $(dataset_directory)/est.2.fa | |
216 est_split_fasta3 = $(dataset_directory)/est.3.fa | |
217 est_split_fasta4 = $(dataset_directory)/est.4.fa | |
218 est_split_fasta5 = $(dataset_directory)/est.5.fa | |
219 est_split_fasta6 = $(dataset_directory)/est.6.fa | |
220 est_split_fasta7 = $(dataset_directory)/est.7.fa | |
221 est_split_fasta8 = $(dataset_directory)/est.8.fa | |
222 est_split_fasta9 = $(dataset_directory)/est.9.fa | |
223 | |
224 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs | |
225 prefilter1 = $(unigene_fasta) | |
226 | |
227 # deFuse scripts and tools | |
228 scripts_directory = $(source_directory)/scripts | |
229 tools_directory = $(source_directory)/tools | |
230 data_directory = $(source_directory)/data | |
231 #end raw | |
232 | |
233 # Parameters for building the dataset | |
234 chromosomes = $genome.chromosomes | |
235 mt_chromosome = $genome.mt_chromosome | |
236 gene_sources = $genome.gene_sources | |
237 ig_gene_sources = $genome.ig_gene_sources | |
238 rrna_gene_sources = $genome.rrna_gene_sources | |
239 gene_biotypes = $genome.gene_sources | |
240 ig_gene_biotypes = $genome.ig_gene_sources | |
241 rrna_gene_biotypes = $genome.rrna_gene_sources | |
242 | |
243 #raw | |
244 # Remove temp files | |
245 remove_job_files = yes | |
246 remove_job_temp_files = yes | |
247 #end raw | |
248 </configfile> | |
249 <configfile name="defuse_script">#slurp | |
250 #!/bin/bash | |
251 ## define some things for cheetah proccessing | |
252 #set $amp = chr(38) | |
253 #set $gt = chr(62) | |
254 ## substitute pathnames into config file | |
255 if `grep __DATASET_DIRECTORY__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DATASET_DIRECTORY__#\$1#" $defuse_config; fi | |
256 if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi | |
257 if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi | |
258 if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi | |
259 if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi | |
260 if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi | |
261 if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi | |
262 if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi | |
263 if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi | |
264 if `grep __GMAP_INDEX_DIR__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_INDEX_DIR=`pwd`/gmap;then sed -i'.tmp' "s#__GMAP_INDEX_DIR__#\${GMAP_INDEX_DIR}#" $defuse_config; fi | |
265 if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi | |
266 if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi | |
267 ## copy config to output | |
268 cp $defuse_config \$1/defuse_config.txt | |
269 ## Run the create_reference_dataset.pl | |
270 perl \${DEFUSE_PATH}/scripts/create_reference_dataset.pl -c $defuse_config | |
271 </configfile> | |
272 </configfiles> | |
273 | |
274 <tests> | |
275 </tests> | |
276 <help> | |
277 **DeFuse** | |
278 | |
279 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6_ manual for details. | |
280 | |
281 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: | |
282 - genome_fasta from Ensembl | |
283 - gene_models from Ensembl | |
284 - repeats_filename from UCSC RepeatMasker rmsk.txt | |
285 - est_fasta from UCSC | |
286 - est_alignments from UCSC intronEst.txt | |
287 - unigene_fasta from NCBI | |
288 | |
289 The create_defuse_reference Galaxy tool downloads the reference genome and other source files, and builds any derivative files including bowtie indices, gmap indices, and 2bit files. Expect this step to take at least 12 hours. | |
290 | |
291 | |
292 It will generate the refernce data for deFuse Galaxy tool. | |
293 | |
294 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138 | |
295 | |
296 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page | |
297 | |
298 .. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1 | |
299 | |
300 ------ | |
301 | |
302 **Outputs** | |
303 | |
304 The galaxy history will contain: the config.txt file that provides DeFuse with the reference data paths. | |
305 | |
306 </help> | |
307 </tool> |