Mercurial > repos > iuc > homer_findmotifsgenome
comparison homer_findMotifsGenome.xml @ 0:ec974e69e0b5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
author | iuc |
---|---|
date | Sun, 08 Aug 2021 11:02:42 +0000 |
parents | |
children | 3126da33847c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ec974e69e0b5 |
---|---|
1 <tool id="homer_findMotifsGenome" name="findMotifsGenome" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05" license="MIT"> | |
2 <description/> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="xrefs"/> | |
7 <expand macro="requirements"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 ## Taken from fastqc: | |
10 #import re | |
11 #import os | |
12 #set input_name = re.sub('[^\w\-\s]', '_', str($input.element_identifier)) | |
13 ln -s '${input}' '${input_name}' && | |
14 #set output = $input_name + '_motif' | |
15 ## Process the genome: | |
16 #if str( $genome.source ) == "installed": | |
17 #set genome_file = re.sub('[^\w\-\s]', '_', str($genome.all_fasta_source.fields.value)) + '.fa' | |
18 ln -s '$genome.all_fasta_source.fields.path' '$genome_file' && | |
19 #elif str( $genome.source ) == "preparsed": | |
20 #set genome_file = os.path.split(str($genome.homer_preparse_source.fields.path_fasta))[-1] | |
21 ln -s '$genome.homer_preparse_source.fields.path_fasta' '$genome_file' && | |
22 #elif str( $genome.source ) == "history": | |
23 #set genome_file = re.sub('[^\w\-\s]', '_', str($genome.fasta.name)) + '.fa' | |
24 ln -s '$genome.fasta' '$genome_file' && | |
25 #end if | |
26 ## Command: | |
27 findMotifsGenome.pl | |
28 ## Peak: | |
29 '${input_name}' | |
30 ## Genome: | |
31 '$genome_file' | |
32 ## Ouptut folder: | |
33 '${output}' | |
34 ## Options | |
35 #if str( $genome.source ) == "preparsed": | |
36 -preparsedDir '$genome.homer_preparse_source.fields.path' | |
37 #if str( $genome.homer_preparse_source.fields.mask ) == 'True': | |
38 -mask | |
39 #end if | |
40 #if str( $genome.choose_center.center ) == "centered": | |
41 -size '$genome.homer_preparse_source.fields.size' | |
42 #else | |
43 #set sizee = int($genome.choose_center.sizes) + int($genome.homer_preparse_source.fields.size) | |
44 -size '$genome.choose_center.sizes','${sizee}' | |
45 #end if | |
46 #else: | |
47 #if $genome.mask | |
48 -mask | |
49 #end if | |
50 #if $genome.fixed_size.size_fixed == "given": | |
51 -size given | |
52 #else: | |
53 #if str( $genome.fixed_size.choose_center.center ) == "centered": | |
54 -size '$genome.fixed_size.size' | |
55 #else | |
56 #set sizee = int($genome.fixed_size.choose_center.sizes) + int($genome.fixed_size.size) | |
57 -size '$genome.fixed_size.choose_center.sizes','${sizee}' | |
58 #end if | |
59 #end if | |
60 #end if | |
61 -len '$len' | |
62 -S $S | |
63 -mis $mis | |
64 $norevopp | |
65 $nomotif | |
66 $rna | |
67 -mset $motif_options.mset | |
68 $motif_options.basic | |
69 $motif_options.bits | |
70 $motif_options.nocheck | |
71 #if $motif_options.mcheck: | |
72 -mcheck '$motif_options.mcheck' | |
73 #end if | |
74 $motif_options.noknown | |
75 #if $motif_options.mknown: | |
76 -mknown '$motif_options.mknown' | |
77 #end if | |
78 $motif_options.nofacts | |
79 $motif_options.seqlogo | |
80 $advanced.norm | |
81 $advanced.h | |
82 #if str($advanced.N): | |
83 -N $advanced.N | |
84 #end if | |
85 -local $advanced.local | |
86 -redundant $advanced.redundant | |
87 -maxN $advanced.maxN | |
88 #if $advanced.maskMotif: | |
89 -maskMotif '$advanced.maskMotif' | |
90 #end if | |
91 #if $advanced.opt: | |
92 -opt '$advanced.opt' | |
93 #end if | |
94 $advanced.rand | |
95 #if $advanced.ref: | |
96 -ref '$advanced.ref' | |
97 #end if | |
98 $advanced.oligo | |
99 #if $advanced.fdr: | |
100 -fdr $advanced.fdr | |
101 #end if | |
102 #if str( $advanced.homer12.version ) == "homer2": | |
103 -nlen '$advanced.homer12.nlen' | |
104 -nmax '$advanced.homer12.nmax' | |
105 $advanced.homer12.neutral | |
106 -e '$advanced.homer12.e' | |
107 $advanced.homer12.quickMask | |
108 -minlp '$advanced.homer12.minlp' | |
109 #elif str( $advanced.homer12.version ) == "homer1": | |
110 -depth '$advanced.homer12.depth' | |
111 #end if | |
112 #if not $nomotif: | |
113 && cp '${output}'/homerResults.html outputHomer.html | |
114 && cp -r '${output}' '${html_homer_file.files_path}' | |
115 #end if | |
116 #if not $motif_options.noknown: | |
117 && cp '${output}'/knownResults.html outputKnown.html | |
118 && cp -r '${output}' '${html_file.files_path}' | |
119 #end if | |
120 ]]></command> | |
121 <inputs> | |
122 <param name="input" type="data" format="bed,encodepeak,tabular" label="Peak file"/> | |
123 <conditional name="genome"> | |
124 <param name="source" type="select" label="Will you select a reference genome from your history or use a installed genome?"> | |
125 <option value="preparsed">Preparsed (fasta is available and has been preparsed to specific size)</option> | |
126 <option value="installed">Installed (fasta is available but will be preparsed as run time)</option> | |
127 <option value="history">From History (fasta will be preparsed at run time)</option> | |
128 </param> | |
129 <when value="preparsed"> | |
130 <param name="homer_preparse_source" type="select" label="Preparsed FASTA"> | |
131 <options from_data_table="homer_preparse"> | |
132 <filter type="sort_by" column="2"/> | |
133 <filter type="static_value" column="version" value="@IDX_VERSION@"/> | |
134 <validator type="no_options" message="No preparsed genomes are available"/> | |
135 </options> | |
136 </param> | |
137 <expand macro="choose_center"/> | |
138 </when> | |
139 <when value="installed"> | |
140 <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> | |
141 <options from_data_table="all_fasta"> | |
142 <filter type="sort_by" column="2"/> | |
143 <validator type="no_options" message="No references are available"/> | |
144 </options> | |
145 </param> | |
146 <expand macro="mask_size"/> | |
147 </when> | |
148 <when value="history"> | |
149 <param name="fasta" type="data" format="fasta" label="Select reference genome"/> | |
150 <expand macro="mask_size"/> | |
151 </when> | |
152 </conditional> | |
153 <param argument="-len" type="text" value="8,10,12" label="comma-separated motif lengths" help="values greater 12 may cause the program to run out of memory - in these cases decrease the number of sequences analyzed (-N), or try analyzing shorter sequence regions (i.e. -size 100)"> | |
154 <validator type="regex" message="motif lengths must be comma-separated integers without space">^(\d+,)*(\d+)$</validator> | |
155 </param> | |
156 <param argument="-S" type="integer" min="1" value="25" label="Number of motifs to find"/> | |
157 <param argument="-mis" type="integer" min="0" value="2" label="Number of mismatches during global optimisation"/> | |
158 <param argument="-norevopp" type="boolean" truevalue="-norevopp" falsevalue="" checked="false" label="Don't search reverse strand for motifs"/> | |
159 <param argument="-nomotif" type="boolean" truevalue="-nomotif" falsevalue="" checked="false" label="Don't search for de novo motif enrichment"/> | |
160 <param argument="-rna" type="boolean" truevalue="-rna" falsevalue="" checked="false" label="output RNA motif logos and compare to RNA motif database" help="automatically sets -norevopp"/> | |
161 <section name="motif_options" title="Known Motif Options/Visualization" expanded="False"> | |
162 <param argument="-mset" type="select" label="Check against motif collects"> | |
163 <option value="auto" selected="True">automatic</option> | |
164 <option value="vertebrates">vertebrates</option> | |
165 <option value="insects">insects</option> | |
166 <option value="worms">worms</option> | |
167 <option value="plants">plants</option> | |
168 <option value="yeast">yeast</option> | |
169 <option value="all">all</option> | |
170 </param> | |
171 <param argument="-basic" type="boolean" truevalue="-basic" falsevalue="" checked="false" label="Just visualize de novo motifs, don't check similarity with known motifs"/> | |
172 <param argument="-bits" type="boolean" truevalue="-bits" falsevalue="" checked="false" label="Scale sequence logos by information content" help="TODO"/> | |
173 <param argument="-nocheck" type="boolean" truevalue="-nocheck" falsevalue="" checked="false" label="Don't search for de novo vs. known motif similarity"/> | |
174 <param argument="-mcheck" type="data" optional="true" format="txt" label="known motifs to check against de novo motifs"/> | |
175 <param argument="-noknown" type="boolean" truevalue="-noknown" falsevalue="" checked="false" label="Don't search for known motif enrichment"/> | |
176 <param argument="-mknown" type="data" optional="true" format="txt" label="Known motifs to check for enrichment"/> | |
177 <param argument="-nofacts" type="boolean" truevalue="-nofacts" falsevalue="" checked="false" label="Omit humor"/> | |
178 <param argument="-seqlogo" type="boolean" truevalue="-seqlogo" falsevalue="" checked="false" label="Use weblogo/seqlogo/ghostscript to generate logos, default uses SVG now"/> | |
179 </section> | |
180 <section name="advanced" title="Advanced options" expanded="false"> | |
181 <param name="norm" type="select" label="Sequence normalization options:"> | |
182 <option value="-gc" selected="true">use GC% for sequence content normalization</option> | |
183 <option value="-cpg">use CpG% instead of GC% for sequence content normalization</option> | |
184 <option value="-noweight">no CG correction</option> | |
185 </param> | |
186 <param argument="-h" type="boolean" truevalue="-h" falsevalue="" checked="false" label="Use hypergeometric for p-values, binomial is default"/> | |
187 <param argument="-N" type="integer" min="0" value="" optional="true" label="Number of sequences to use for motif finding, default=max(50k, 2x input)"/> | |
188 <param argument="-local" type="integer" min="0" value="0" label="local background size in bp for each side of regions" help="0 means no local background."/> | |
189 <param argument="-redundant" type="float" min="0" max="2" value="2" label="Remove redundant sequences matching greater than # fraction, i.e. -redundant 0.5"/> | |
190 <param argument="-maxN" type="float" min="0" max="1" value="0.7" label="maximum percentage of N's in sequence to consider for motif finding"/> | |
191 <param argument="-maskMotif" type="data" format="txt" multiple="true" optional="true" label="motifs to mask before motif finding"/> | |
192 <param argument="-opt" type="data" format="txt" multiple="true" optional="true" label="motifs to optimize or change length of"/> | |
193 <param argument="-rand" type="boolean" truevalue="-rand" falsevalue="" checked="false" label="randomize target and background sequences labels"/> | |
194 <param argument="-ref" optional="true" type="data" format="tabular,bed,encodepeak" label="use file for target and background - first argument is list of peak ids for targets"/> | |
195 <param argument="-oligo" type="boolean" truevalue="-oligo" falsevalue="" checked="false" label="Perform analysis of individual oligo enrichment"/> | |
196 <param argument="-fdr" type="integer" min="0" value="" label="Number of randomizations to calculate empirical FDR for de novo discovery" optional="true"/> | |
197 <conditional name="homer12"> | |
198 <param name="version" type="select" label="Which homer version do you want to use"> | |
199 <option value="homer2" selected="true">homer2 (default)</option> | |
200 <option value="homer1">homer1 (to force the use of the original homer)</option> | |
201 </param> | |
202 <when value="homer2"> | |
203 <param argument="-nlen" type="integer" min="0" value="3" label="length of lower-order oligos to normalize in background"/> | |
204 <param argument="-nmax" type="integer" min="0" value="160" label="Max normalization iterations"/> | |
205 <param argument="-neutral" type="boolean" truevalue="-neutral" falsevalue="" checked="false" label="weight sequences to neutral frequencies, i.e. 25%, 6.25%, etc."/> | |
206 <param argument="-olen" type="integer" min="0" value="" optional="true" label="lower-order oligo normalization for oligo table, use if -nlen isn't working well"/> | |
207 <param argument="-e" type="float" min="0" max="1" value="0" label="" help="Maximum expected motif instance per bp in random sequence"/> | |
208 <param argument="-quickMask" type="boolean" truevalue="-quickMask" falsevalue="" checked="false" label="skip full masking after finding motifs, similar to original homer"/> | |
209 <param argument="-minlp" type="float" value="-10" label="stop looking for motifs when seed logp score gets above this number"/> | |
210 </when> | |
211 <when value="homer1"> | |
212 <param argument="-depth" type="select" label="time spent on local optimization default"> | |
213 <option value="low">low</option> | |
214 <option value="med" selected="true">med</option> | |
215 <option value="high">high</option> | |
216 <option value="allnight">allnight</option> | |
217 </param> | |
218 </when> | |
219 </conditional> | |
220 </section> | |
221 </inputs> | |
222 <outputs> | |
223 <data format="html" name="html_file" from_work_dir="outputKnown.html" label="${tool.name} on ${on_string}: Known motifs"> | |
224 <filter>motif_options['noknown'] is False</filter> | |
225 </data> | |
226 <data format="html" name="html_homer_file" from_work_dir="outputHomer.html" label="${tool.name} on ${on_string}: De novo motifs"> | |
227 <filter>nomotif is False</filter> | |
228 </data> | |
229 </outputs> | |
230 <tests> | |
231 <test expect_num_outputs="2"> | |
232 <param name="input" value="fake_phix_peaks.bed"/> | |
233 <conditional name="genome"> | |
234 <param name="source" value="installed"/> | |
235 <param name="all_fasta_source" value="phiX174"/> | |
236 </conditional> | |
237 <output name="html_file" file="motif_test1/knownResults.html" ftype="html" lines_diff="2"/> | |
238 <output name="html_homer_file"> | |
239 <assert_contents> | |
240 <has_text text="fake_phix_peaks_bed_motif/ - Homer de novo Motif Results"/> | |
241 <has_text text="Total target sequences = 1"/> | |
242 <has_text text="Jaspar"/> | |
243 </assert_contents> | |
244 </output> | |
245 </test> | |
246 <test expect_num_outputs="2"> | |
247 <param name="input" value="CTCF_peaks_shifted.bed"/> | |
248 <conditional name="genome"> | |
249 <param name="source" value="history"/> | |
250 <param name="fasta" value="chr2_subset.fa"/> | |
251 </conditional> | |
252 <output name="html_file"> | |
253 <assert_contents> | |
254 <has_text text="CTCF_peaks_shifted_bed_motif - Homer Known Motif Enrichment Results"/> | |
255 <has_text text="Total Target Sequences = 40"/> | |
256 <has_text text="CTCF(Zf)/CD4+-CTCF-ChIP-Seq(Barski_et_al.)/Homer"/> | |
257 </assert_contents> | |
258 </output> | |
259 <output name="html_homer_file"> | |
260 <assert_contents> | |
261 <has_text text="CTCF_peaks_shifted_bed_motif/ - Homer de novo Motif Results"/> | |
262 <has_text text="Total target sequences = 40"/> | |
263 <has_text_matching expression="CTCF(Zf)|CTCF/MA|BORIS|CTCFL"/> | |
264 </assert_contents> | |
265 </output> | |
266 </test> | |
267 <test expect_num_outputs="2"> | |
268 <param name="input" value="CTCF_peaks_shifted.bed"/> | |
269 <param name="mask" value="true"/> | |
270 <conditional name="genome"> | |
271 <param name="source" value="history"/> | |
272 <param name="fasta" value="chr2_subset.fa"/> | |
273 </conditional> | |
274 <output name="html_file"> | |
275 <assert_contents> | |
276 <has_text text="CTCF_peaks_shifted_bed_motif - Homer Known Motif Enrichment Results"/> | |
277 <has_text text="Total Target Sequences = 34"/> | |
278 <has_text text="CTCF(Zf)/CD4+-CTCF-ChIP-Seq(Barski_et_al.)/Homer"/> | |
279 </assert_contents> | |
280 </output> | |
281 <output name="html_homer_file"> | |
282 <assert_contents> | |
283 <has_text text="CTCF_peaks_shifted_bed_motif/ - Homer de novo Motif Results"/> | |
284 <has_text text="Total target sequences = 34"/> | |
285 <has_text_matching expression="CTCF(Zf)|CTCF/MA|BORIS|CTCFL"/> | |
286 </assert_contents> | |
287 </output> | |
288 </test> | |
289 <test expect_num_outputs="1"> | |
290 <param name="input" value="CTCF_peaks_shifted.bed"/> | |
291 <conditional name="genome"> | |
292 <param name="source" value="history"/> | |
293 <param name="fasta" value="chr2_subset.fa"/> | |
294 </conditional> | |
295 <section name="motif_options"> | |
296 <param name="mset" value="plants"/> | |
297 </section> | |
298 <param name="nomotif" value="true"/> | |
299 <output name="html_file"> | |
300 <assert_contents> | |
301 <has_text text="CTCF_peaks_shifted_bed_motif - Homer Known Motif Enrichment Results"/> | |
302 <has_text text="Total Target Sequences = 40"/> | |
303 <has_text text="RAP26"/> | |
304 </assert_contents> | |
305 </output> | |
306 </test> | |
307 </tests> | |
308 <help><![CDATA[ | |
309 | |
310 .. class:: infomark | |
311 | |
312 This is a wrapper for findMotifsGenome.pl from HOMER but not all options are included. | |
313 | |
314 Program will find de novo and known motifs in regions in the genome. | |
315 | |
316 Usage:: | |
317 | |
318 findMotifsGenome.pl <pos file> <genome> <output directory> [additional options] | |
319 | |
320 Example:: | |
321 | |
322 findMotifsGenome.pl peaks.txt mm8r peakAnalysis -size 200 -len 8 | |
323 | |
324 Possible Genomes:: | |
325 | |
326 -- or -- | |
327 Custom: provide the path to genome FASTA files (directory or single file) | |
328 Heads up: will create the directory "preparsed/" in same location. | |
329 | |
330 Basic options:: | |
331 | |
332 -mask (mask repeats/lower case sequence, can also add 'r' to genome, i.e. mm9r) | |
333 -bg <background position file> (genomic positions to be used as background, default=automatic) | |
334 removes background positions overlapping with target positions unless -keepOverlappingBg is used | |
335 -chopify (chop up large background regions to the avg size of target regions) | |
336 -len <#>[,<#>,<#>...] (motif length, default=8,10,12) [NOTE: values greater 12 may cause the program | |
337 to run out of memory - in these cases decrease the number of sequences analyzed (-N), | |
338 or try analyzing shorter sequence regions (i.e. -size 100)] | |
339 -size <#> (fragment size to use for motif finding, default=200) | |
340 -size <#,#> (i.e. -size -100,50 will get sequences from -100 to +50 relative from center) | |
341 -size given (uses the exact regions you give it) | |
342 -S <#> (Number of motifs to optimize, default: 25) | |
343 -mis <#> (global optimization: searches for strings with # mismatches, default: 2) | |
344 -norevopp (don't search reverse strand for motifs) | |
345 -nomotif (don't search for de novo motif enrichment) | |
346 -rna (output RNA motif logos and compare to RNA motif database, automatically sets -norevopp) | |
347 | |
348 Scanning sequence for motifs:: | |
349 | |
350 -find <motif file> (This will cause the program to only scan for motifs) | |
351 | |
352 Known Motif Options/Visualization:: | |
353 | |
354 -mset <vertebrates|insects|worms|plants|yeast|all> (check against motif collects, default: auto) | |
355 -basic (just visualize de novo motifs, don't check similarity with known motifs) | |
356 -bits (scale sequence logos by information content, default: doesn't scale) | |
357 -nocheck (don't search for de novo vs. known motif similarity) | |
358 -mcheck <motif file> (known motifs to check against de novo motifs, | |
359 -float (allow adjustment of the degeneracy threshold for known motifs to improve p-value[dangerous]) | |
360 -noknown (don't search for known motif enrichment, default: -known) | |
361 -mknown <motif file> (known motifs to check for enrichment, | |
362 -nofacts (omit humor) | |
363 -seqlogo (use weblogo/seqlogo/ghostscript to generate logos, default uses SVG now) | |
364 | |
365 Sequence normalization options:: | |
366 | |
367 -gc (use GC% for sequence content normalization, now the default) | |
368 -cpg (use CpG% instead of GC% for sequence content normalization) | |
369 -noweight (no CG correction) | |
370 Also -nlen <#>, -olen <#>, see homer2 section below. | |
371 | |
372 Advanced options:: | |
373 | |
374 -h (use hypergeometric for p-values, binomial is default) | |
375 -N <#> (Number of sequences to use for motif finding, default=max(50k, 2x input) | |
376 -local <#> (use local background, # of equal size regions around peaks to use i.e. 2) | |
377 -redundant <#> (Remove redundant sequences matching greater than # percent, i.e. -redundant 0.5) | |
378 -maxN <#> (maximum percentage of N's in sequence to consider for motif finding, default: 0.7) | |
379 -maskMotif <motif file1> [motif file 2]... (motifs to mask before motif finding) | |
380 -opt <motif file1> [motif file 2]... (motifs to optimize or change length of) | |
381 -rand (randomize target and background sequences labels) | |
382 -ref <peak file> (use file for target and background - first argument is list of peak ids for targets) | |
383 -oligo (perform analysis of individual oligo enrichment) | |
384 -dumpFasta (Dump fasta files for target and background sequences for use with other programs) | |
385 -preparse (force new background files to be created) | |
386 -preparsedDir <directory> (location to search for preparsed file and/or place new files) | |
387 -keepFiles (keep temporary files) | |
388 -fdr <#> (Calculate empirical FDR for de novo discovery #=number of randomizations) | |
389 | |
390 homer2 specific options:: | |
391 | |
392 -homer2 (use homer2 instead of original homer, default) | |
393 -nlen <#> (length of lower-order oligos to normalize in background, default: -nlen 3) | |
394 -nmax <#> (Max normalization iterations, default: 160) | |
395 -neutral (weight sequences to neutral frequencies, i.e. 25%, 6.25%, etc.) | |
396 -olen <#> (lower-order oligo normalization for oligo table, use if -nlen isn't working well) | |
397 -p <#> (Number of processors to use, default: 1) | |
398 -e <#> (Maximum expected motif instance per bp in random sequence, default: 0.01) | |
399 -cache <#> (size in MB for statistics cache, default: 500) | |
400 -quickMask (skip full masking after finding motifs, similar to original homer) | |
401 -minlp <#> (stop looking for motifs when seed logp score gets above #, default: -10) | |
402 | |
403 Original homer specific options:: | |
404 | |
405 -homer1 (to force the use of the original homer) | |
406 -depth [low|med|high|allnight] (time spent on local optimization default: med) | |
407 | |
408 | |
409 ]]></help> | |
410 <expand macro="citation"/> | |
411 </tool> |