comparison multigps.xml @ 0:4aa531981208 draft

Uploaded
author iuc
date Mon, 13 Mar 2017 13:25:01 -0400
parents
children 7e0a12282c7f
comparison
equal deleted inserted replaced
-1:000000000000 0:4aa531981208
1 <tool id="multigps" name="MultiGPS" version="0.73.0">
2 <description>analyzes collections of multi-condition ChIP-seq data</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <requirements>
7 <requirement type="package" version="0.73">multigps</requirement>
8 </requirements>
9 <command detect_errors="aggressive">
10 <![CDATA[
11 #set output_dir = $output_html.files_path
12 mkdir -p $output_dir &&
13 multigps
14 ## General options
15 ## Append .txt extensions to events hrefs
16 ## in output dataset so files will render
17 ## in the browser.
18 --eventsaretxt
19 ## Do not run the parallel version of meme
20 ## since it is not yet available in conda.
21 --meme1proc
22 --expt '$expt'
23 --format $expt.ext
24 #if str($ctrl) != 'None':
25 --ctrl '$ctrl'
26 #end if
27 --threads \${GALAXY_SLOTS:-4}
28 --geninfo '$chromInfo'
29 ## Advanced options
30 #set aoc = $advanced_options_cond
31 #if str($aoc.advanced_options) == 'display':
32 #set bmsc = $aoc.binding_model_smoothing_cond
33 #set gmsc = $aoc.gauss_model_smoothing_cond
34 #set rbec = $aoc.report_binding_events_cond
35 #set rloc = $aoc.reads_limits_options_cond
36 #set sdc = $aoc.scale_data_cond
37 #set umc = $aoc.use_motif_cond
38 #if str($umc.use_motif) == 'yes':
39 #set rgc = $umc.reference_genome_cond
40 --seq
41 #if str($rgc.reference_genome_source) == 'cached':
42 '${rgc.reference_genome.fields.path}'
43 #else:
44 '${rgc.reference_genome}'
45 #end if
46 #end if
47 ## Limits on how many reads
48 #if str($rloc.reads_limits) == 'yes':
49 --fixedpb $rloc.fixedpb
50 --poissongausspb $rloc.poissongausspb
51 #if str($rloc.nonunique) == 'yes':
52 --nonunique
53 #end if
54 --mappability $rloc.mappability
55 #if str($rloc.nocache) == 'yes':
56 --nocache
57 #end if
58 #end if
59 ## Scaling data
60 #if str($sdc.scale_data) == 'yes':
61 #if str($sdc.scaling) == 'no':
62 --noscaling $sdc.scaling
63 #end if
64 #if str($sdc.medianscale) == 'yes':
65 --medianscale $sdc.medianscale
66 #end if
67 #if str($sdc.regressionscale) == 'yes':
68 --regressionscale $sdc.regressionscale
69 #end if
70 #if str($sdc.sesscale) == 'yes':
71 --sesscale $sdc.sesscale
72 #end if
73 #if $sdc.fixedscaling > 0:
74 ‒‒fixedscaling $sdc.fixedscaling
75 #end if
76 --scalewin $sdc.scalewin
77 #if str($sdc.plotscaling) == 'yes':
78 --plotscaling $sdc.plotscaling
79 #end if
80 #end if
81 ## Running MultiGPS
82 #if str($aoc.readdistributionfile) != 'None':
83 --d '$aoc.readdistributionfile'
84 #end if
85 --r $aoc.maxtrainingrounds
86 #if str($aoc.nomodelupdate) == 'no':
87 --nomodelupdate
88 #end if
89 --minmodelupdateevents $aoc.minmodelupdateevents
90 #if str($bmsc.nomodelsmoothing) == 'no':
91 --nomodelsmoothing
92 #else:
93 --splinesmoothparam $bmsc.splinesmoothparam
94 #end if
95 #if str($gmsc.gaussmodelsmoothing) == 'yes':
96 --gaussmodelsmoothing
97 --gausssmoothparam $gmsc.gausssmoothparam
98 #end if
99 #if str($aoc.jointinmodel) == 'yes':
100 --jointinmodel
101 #end if
102 #if str($aoc.fixedmodelrange) == 'yes':
103 --fixedmodelrange
104 #end if
105 --prlogconf $aoc.prlogconf
106 #if $aoc.fixedalpha > 0:
107 --fixedalpha $aoc.fixedalpha
108 #end if
109 --alphascale $aoc.alphascale
110 #if str($aoc.mlconfignotshared) == 'no':
111 --mlconfignotshared
112 #end if
113 #if str($aoc.exclude) != 'None':
114 --exclude '$aoc.exclude'
115 #end if
116 ## MultiGPS priors
117 #if str($umc.use_motif) == 'yes':
118 #set mpc = $umc.multigps_priors_cond
119 #if str($mpc.multigps_priors) == 'yes':
120 #set bmc = $mpc.both_motifs_cond
121 #if str($mpc.noposprior) == 'no':
122 --noposprior
123 #end if
124 --probshared $mpc.probshared
125 #if str($bmc.nomotifs) == 'yes':
126 --memenmotifs $bmc.memenmotifs
127 --mememinw $bmc.mememinw
128 --mememaxw $bmc.mememaxw
129 #else:
130 #set mfoc = $bmc.nomotifprior_cond
131 --nomotifs
132 --nomotifprior $mfoc.nomotifprior
133 #if str($mfoc.nomotifprior) == 'yes':
134 --memenmotifs $mfoc.memenmotifs
135 --mememinw $mfoc.mememinw
136 --mememaxw $mfoc.mememaxw
137 #end if
138 #end if
139 #end if
140 #end if
141 ## Reporting binding events
142 #if str($rbec.report_binding_events) == 'yes':
143 --q $rbec.minqvalue
144 --minfold $rbec.minfold
145 #if str($rbec.nodifftests) == 'no':
146 --nodifftests
147 #end if
148 --edgerod $rbec.edgerod
149 --diffp $rbec.diffp
150 #end if
151 #end if
152 2>&1
153 --out '$output_html.files_path'
154 && cp $output_dir/*.events.txt '$experiment_events' || true
155 && mv $output_dir/*.html '$output_html' || true
156 && mv $output_dir/*.table.txt '$all_events_table' || true
157 && mv $output_dir/*.counts '$replicates_counts' || true
158 ]]>
159 </command>
160 <inputs>
161 <param name="expt" type="data" format="bam,bed,scidx" label="Run MultiGPS on">
162 <validator type="unspecified_build" />
163 </param>
164 <param name="ctrl" type="data" format="bam,bed,scidx" optional="True" label="Optional file containing reads from a control experiment" help="Must be same forat as the input above" />
165 <!-- Advanced options -->
166 <conditional name="advanced_options_cond">
167 <param name="advanced_options" type="select" label="Advanced options">
168 <option value="hide" selected="true">Hide</option>
169 <option value="display">Display</option>
170 </param>
171 <when value="display">
172 <!-- Limits on how many reads -->
173 <conditional name="reads_limits_options_cond">
174 <param name="reads_limits" type="select" label="Set limits on how many reads can have their 5′ end at the same position in each replicate?" help="Default behavior is to estimate a global per-base limit from a Poisson distribution parameterized by the number of reads divided by the number of mappable bases in the genome. The per-base limit is set as the count corresponding to the 10^-7 probability level from the Poisson.">
175 <option value="no" selected="True">No</option>
176 <option value="yes">Yes</option>
177 </param>
178 <when value="no" />
179 <when value="yes">
180 <param name="fixedpb" type="integer" value="0" min="0" label="Fixed per-base limit" help="Zero value estimates from background model"/>
181 <param name="poissongausspb" type="integer" value="0" min="0" label="Poisson threshold for filtering per base" help="Filter per base using the specified Poisson threshold parameterized by a local Gaussian sliding window" />
182 <param name="nonunique" type="select" label="Use non-unique reads?">
183 <option value="no" selected="True">No</option>
184 <option value="yes">Yes</option>
185 </param>
186 <param name="mappability" type="float" value="0.0" min="0.8" label="Fraction of the genome that is mappable for these experiments" />
187 <param name="nocache" type="select" label="Turn off caching of the entire set of experiments?" help="Run slower with less memory" >
188 <option value="no" selected="True">No</option>
189 <option value="yes">Yes</option>
190 </param>
191 </when>
192 </conditional>
193 <!-- Scaling data -->
194 <conditional name="scale_data_cond">
195 <param name="scale_data" type="select" label="Set data scaling parameters?" help="Default behavior is to scale signal to corresponding controls using regression on the set of signal/control ratios in 10Kbp windows.">
196 <option value="no" selected="True">No</option>
197 <option value="yes">Yes</option>
198 </param>
199 <when value="yes">
200 <param name="scaling" type="select" label="Use signal vs control scaling?">
201 <option value="yes" selected="True">Yes</option>
202 <option value="no">No</option>
203 </param>
204 <param name="medianscale" type="select" label="Use the median signal/control ratio as the scaling factor?">
205 <option value="no" selected="True">No</option>
206 <option value="yes">Yes</option>
207 </param>
208 <param name="regressionscale" type="select" label="Use scaling by regression on binned tag counts?">
209 <option value="no" selected="True">No</option>
210 <option value="yes">Yes</option>
211 </param>
212 <param name="sesscale" type="select" label="Estimate scaling factor by SES?" help="SES: Diaz, et al. Stat Appl Genet Mol Biol. 2012">
213 <option value="no" selected="True">No</option>
214 <option value="yes">Yes</option>
215 </param>
216 <param name="fixedscaling" type="float" value="0.0" min="0.0" label="Multiply control counts by total tag count ratio and then by this factor" help="Set as 0 to skip" />
217 <param name="scalewin" type="integer" min="0" value="500" label="Window size for estimating scaling ratios" help="The value is the number of base pairs. Use something much smaller than the default if scaling via SES (e.g. 200)." />
218 <param name="plotscaling" type="select" label="Plot diagnostic information for the chosen scaling method?">
219 <option value="no" selected="True">No</option>
220 <option value="yes">Yes</option>
221 </param>
222 </when>
223 <when value="no" />
224 </conditional>
225 <!-- Running MultiGPS -->
226 <param name="readdistributionfile" type="data" optional="True" format="tabular" label="Optional binding event read distribution file for initializing models" help="A default initial distribution appropriate for ChIP-seq data is used if this option is not specified." />
227 <param name="maxtrainingrounds" type="integer" value="3" min="0" label="Maximum number of training rounds for updating binding event read distributions" />
228 <param name="nomodelupdate" type="select" label="Perform binding model updates?">
229 <option value="yes" selected="True">Yes</option>
230 <option value="no">No</option>
231 </param>
232 <param name="minmodelupdateevents" type="integer" value="500" min="0" label="Minimum number of events to support an update of the read distribution" />
233 <conditional name="binding_model_smoothing_cond">
234 <param name="nomodelsmoothing" type="select" label="Perform binding model smoothing?" help="Smoothing performed with a cubic spline.">
235 <option value="yes" selected="True">Yes</option>
236 <option value="no">No</option>
237 </param>
238 <when value="yes">
239 <param name="splinesmoothparam" type="integer" value="30" min="0" label="Spline smoothing parameter" />
240 </when>
241 <when value="no" />
242 </conditional>
243 <conditional name="gauss_model_smoothing_cond">
244 <param name="gaussmodelsmoothing" type="select" label="Use Gaussian model smoothing?" help="Select No to smooth with a cubic spline.">
245 <option value="no" selected="True">No</option>
246 <option value="yes">Yes</option>
247 </param>
248 <when value="no" />
249 <when value="yes">
250 <param name="gausssmoothparam" type="integer" value="3" min="0" label="Smoothing factor" help="Gaussian smoothing standard deviation." />
251 </when>
252 </conditional>
253 <param name="jointinmodel" type="select" label="Allow joint events in model updates?">
254 <option value="no" selected="True">No</option>
255 <option value="yes">Yes</option>
256 </param>
257 <param name="fixedmodelrange" type="select" label="Keep binding model range fixed to inital size?" help="Select No to vary automatically">
258 <option value="no" selected="True">No</option>
259 <option value="yes">Yes</option>
260 </param>
261 <param name="prlogconf" type="integer" value="-6" label="Poisson log threshold for potential region scanning" />
262 <param name="fixedalpha" type="integer" value="0" min="0" label="Impose this alpha" help="This is a sparse prior on binding events in the MultiGPS model. It can be interpreted as a minimum number of reads that each binding event must be responsible for in the model. A zero value will estimate the alpha automatically." />
263 <param name="alphascale" type="float" value="1.0" min="0" label="Alpha scaling factor" />
264 <param name="mlconfignotshared" type="select" label="Share component configs in the ML step?" help="Mainly affects the quantification of binding levels for binding events that are not shared but are located at nearby locations across experiments.">
265 <option value="yes" selected="True">Yes</option>
266 <option value="no">No</option>
267 </param>
268 <param name="exclude" type="data" optional="True" format="txt" label="Optional file containing a set of regions to ignore during MultiGPS training" help="Ideally exclude the mitochondrial genome and other blacklisted regions that contain artifactual accumulations of reads in both ChIP-seq and control experiments." />
269 <!-- MultiGPS priors -->
270 <conditional name="use_motif_cond">
271 <param name="use_motif" type="select" label="Perform motif-finding or use a motif-prior?">
272 <option value="no" selected="True">No</option>
273 <option value="yes">Yes</option>
274 </param>
275 <when value="yes">
276 <!-- Specifying the genome -->
277 <conditional name="reference_genome_cond">
278 <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
279 <option value="cached">locally cached</option>
280 <option value="history">from history</option>
281 </param>
282 <when value="cached">
283 <param name="reference_genome" type="select" label="Using reference genome">
284 <options from_data_table="all_fasta">
285 <filter type="data_meta" key="dbkey" ref="expt" column="1"/>
286 </options>
287 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
288 </param>
289 </when>
290 <when value="history">
291 <param name="reference_genome" type="data" format="fasta" label="Using reference genome">
292 <options>
293 <filter type="data_meta" key="dbkey" ref="expt"/>
294 </options>
295 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
296 </param>
297 </when>
298 </conditional>
299 <!-- MultiGPS priors options -->
300 <conditional name="multigps_priors_cond">
301 <param name="multigps_priors" type="select" label="Specify MultiGPS priors options?">
302 <option value="no" selected="True">No</option>
303 <option value="yes">Yes</option>
304 </param>
305 <when value="no" />
306 <when value="yes">
307 <param name="noposprior" type="select" label="Perform inter-experiment positional prior?">
308 <option value="yes" selected="True">Yes</option>
309 <option value="no">No</option>
310 </param>
311 <param name="probshared" type="float" value="0.9" min="0.0" label="Probability that events are shared across conditions" />
312 <conditional name="both_motifs_cond">
313 <param name="nomotifs" type="select" label="Perform both motif-finding and motif priors?">
314 <option value="yes" selected="True">Yes</option>
315 <option value="no">No</option>
316 </param>
317 <when value="yes">
318 <expand macro="motif_finding_params" />
319 </when>
320 <when value="no">
321 <conditional name="nomotifprior_cond">
322 <param name="nomotifprior" type="select" label="Perform motif-finding only?" help="Selecting Yes turns off motif priors.">
323 <option value="no" selected="True">No</option>
324 <option value="yes">Yes</option>
325 </param>
326 <when value="no" />
327 <when value="yes">
328 <expand macro="motif_finding_params" />
329 </when>
330 </conditional>
331 </when>
332 </conditional>
333 </when>
334 </conditional>
335 </when>
336 <when value="no" />
337 </conditional>
338 <!-- Reporting binding events -->
339 <conditional name="report_binding_events_cond">
340 <param name="report_binding_events" type="select" label="Report binding events?">
341 <option value="no" selected="True">No</option>
342 <option value="yes">Yes</option>
343 </param>
344 <when value="no" />
345 <when value="yes">
346 <param name="minqvalue" type="float" min="0" value="0.001" label="Minimum Q-value (corrected p-value) of reported binding events" />
347 <param name="minfold" type="float" min="0" value="1.5" label="Minimum event fold-change vs scaled control" />
348 <param name="nodifftests" type="select" label="Run differential enrichment tests?">
349 <option value="yes" selected="True">Yes</option>
350 <option value="no">No</option>
351 </param>
352 <param name="edgerod" type="float" min="0" value="0.15" label="EdgeR over-dispersion parameter value" />
353 <param name="diffp" type="float" min="0" value="0.01" label="Minimum p-value for reporting differential enrichment" />
354 </when>
355 </conditional>
356 </when>
357 <when value="hide" />
358 </conditional>
359 </inputs>
360 <outputs>
361 <data name="replicates_counts" format="tabular" label="${tool.name} replicates counts on ${on_string}"/>
362 <data name="all_events_table" format="tabular" label="${tool.name} all events table on ${on_string}"/>
363 <data name="experiment_events" format="tabular" label="${tool.name} experiment events on ${on_string}"/>
364 <data name="output_html" format="html" label="${tool.name} on ${on_string}"/>
365 </outputs>
366 <tests>
367 <test>
368 <param name="expt" value="expt_hg19.scidx" ftype="scidx" dbkey="hg19" />
369 <param name="advanced_options_cond" value="hide" />
370 <output name="output_html" file="hg19_output_html1.html" ftype="html" compare="contains"/>
371 <output name="experiment_events" file="hg19_experiment_events1.tabular" ftype="tabular"/>
372 <output name="all_events_table" file="hg19_all_events_table1.tabular" ftype="tabular"/>
373 <output name="replicates_counts" file="hg19_replicates_counts1.tabular" ftype="tabular"/>
374 </test>
375 <test>
376 <param name="expt" value="expt_hg19.scidx" ftype="scidx" dbkey="hg19" />
377 <param name="ctrl" value="cntrl_hg19.scidx" ftype="scidx" dbkey="hg19" />
378 <param name="advanced_options_cond" value="display" />
379 <output name="output_html" file="hg19_output_html2.html" ftype="html" compare="contains"/>
380 <output name="experiment_events" file="hg19_experiment_events2.tabular" ftype="tabular"/>
381 <output name="all_events_table" file="hg19_all_events_table2.tabular" ftype="tabular"/>
382 <output name="replicates_counts" file="hg19_replicates_counts2.tabular" ftype="tabular"/>
383 </test>
384 </tests>
385 <help>
386
387 **What it does**
388
389 MultiGPS is a framework for analyzing collections of multi-condition ChIP-seq datasets and characterizing
390 differential binding events between conditions. MultiGPS encourages consistency in the reported binding
391 event locations across conditions and provides accurate estimation of ChIP enrichment levels at each event.
392 MultiGPS performs significant EM optimization of binding events along the genome and across experimental
393 conditions, and it integrates motif-finding via MEME. The tool loads all data into memory, so the potential
394 exists for time and memory intensive analyses if running over many conditions or large datasets.
395
396 -----
397
398 **Options**
399
400 * **Loading data:**
401
402 - **Optional file containing reads from a control experiment** - must be same format as input experiment
403 - **Fixed per-base limit** - Fixed per-base limit (default: estimated from background model).
404 - **Poisson threshold for filtering per base** - Look at neighboring positions to decide what the per-base limit should be.
405 - **Use non-unique reads** - Use non-unique reads.
406 - **Fraction of the genome that is mappable for these experiments** - Fraction of the genome that is mappable for these experiments
407 - **Turn off caching of the entire set of experiments?** - Flag to turn off caching of the entire set of experiments (i.e. run slower with less memory).
408
409 * **Scaling control vs signal counts:**
410
411 - **Use signal vs control scaling?** - Flag to turn off auto estimation of signal vs control scaling factor
412 - **Use the median signal/control ratio as the scaling factor?** - Flag to use scaling by median ratio (default = scaling by NCIS).
413 - **Use scaling by regression on binned tag counts?** - Flag to use scaling by regression (default = scaling by NCIS).
414 - **Estimate scaling factor by SES?** - Specify whether to estimate scaling factor by SES.
415 - **Multiply control counts by total tag count ratio and then by this factor** - Multiply control counts by total tag count ratio and then by this factor (default: NCIS).
416 - **Window size for estimating scaling ratios** - Window size in base pairs for estimating scaling ratios
417 - **Plot diagnostic information for the chosen scaling method?** - Flag to plot diagnostic information for the chosen scaling method.
418
419 * **Running MultiGPS:**
420
421 - **Optional binding event read distribution file** - Binding event read distribution file for initializing models. The true distribution of reads around binding events is estimated during MultiGPS training. A default initial distribution appropriate for ChIP-seq data is used if this option is not specified.
422 - **Maximum number of training rounds for updating binding event read distributions** - Maximum number of training rounds for updating binding event read distributions.
423 - **Perform binding model updates?** - Perform binding model updates?
424 - **Minimum number of events to support an update of the read distribution** - Minimum number of events to support an update of the read distribution
425 - **Perform binding model smoothing?** - Smooth with a cubic spline using a specified smoothing factor.
426 - **Spline smoothing parameter** - Smoothing parameter for smoothing cubic spline.
427 - **Perform Gaussian model smoothing?** - Select "Yes" to use Gaussian model smoothing using a specified smoothing factor if binding model smoothing is not performed.
428 - **Allow joint events in model updates?** - Specify whether to allow joint events in model updates.
429 - **Keep binding model range fixed to inital size?** - Flag to keep binding model range fixed to inital size (default: vary automatically)
430 - **Poisson log threshold for potential region scanning** - Poisson log threshold for potential region scanning.
431 - **Alpha scaling factor** - Alpha scaling factor. Increasing this parameter results in stricter binding event calls.
432 - **Impose this alpha** - The alpha parameter is a sparse prior on binding events in the MultiGPS model. It can be interpreted as a minimum number of reads that each binding event must be responsible for in the model. Default: estimate alpha automatically.
433 - **Share component configs in the ML step?** - Flag to not share component configs in the ML step
434 - **Optional file containing a set of regions to ignore during MultiGPS training** - File containing a set of regions to ignore during MultiGPS training. It’s a good idea to exclude the mitochondrial genome and other ‘blacklisted’ regions that contain artifactual accumulations of reads in both ChIP-seq and control experiments. MultiGPS will waste time trying to model binding events in these regions, even though they will not typically appear significantly enriched over the control (and thus will not be reported to the user).
435
436 * **MultiGPS priors:**
437
438 - **Perform inter-experiment positional prior?** - Flag to turn off inter-experiment positional prior (default=on).
439 - **Probability that events are shared across conditions** - Probability that events are shared across conditions.
440 - **Perform both motif-finding and motif priors?** - Flag to turn off motif-finding and motif priors.
441 - **Perform motif-finding only?** - Flag to turn off motif priors only.
442 - **Number of motifs MEME should find for each condition** - Number of motifs MEME should find for each condition.
443 - **Minimum motif width for MEME** - minw arg for MEME.
444 - **Maximum motif width for MEME** - maxw arg for MEME.
445
446 * **Reporting binding events:**
447
448 - **Minimum Q-value (corrected p-value) of reported binding events** - Minimum Q-value (corrected p-value) of reported binding events.
449 - **Minimum event fold-change vs scaled control** - Minimum event fold-change vs scaled control.
450 - **Run differential enrichment tests?** - Choose whether to run differential enrichment tests.
451 - **EdgeR over-dispersion parameter value** - EdgeR over-dispersion parameter value.
452 - **Minimum p-value for reporting differential enrichment** - Minimum p-value for reporting differential enrichment.
453
454 </help>
455 <citations>
456 <citation type="doi">10.1371/journal.pcbi.1003501</citation>
457 </citations>
458 </tool>