comparison mosaics.xml @ 3:95a657f15ba7 draft

Uploaded
author dongjun
date Thu, 10 Jan 2013 15:56:57 -0500
parents
children
comparison
equal deleted inserted replaced
2:b6d0c6ceda2c 3:95a657f15ba7
1 <tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="2.0.0">
2
3 <description></description>
4
5 <parallelism method="basic"></parallelism>
6
7 <requirements>
8 <requirement type="binary">R</requirement>
9 </requirements>
10
11 <command interpreter="perl">
12 mosaics_wrapper.pl
13 ## ChIP file info
14 $readFileType.chipParams.chip
15 $readFileType.chipParams.chipFileFormat
16 ## control file info
17 $readFileType.controlParams.control
18 $readFileType.controlParams.controlFileFormat
19 ## peak file info
20 $out_peak
21 $OutfileFormat
22 ## analysis type
23 IO
24 ## optional output
25 $report_summary
26 $report_gof
27 $report_exploratory
28 ## settings for model fitting and peak calling: required (FALSE, FALSE, 0.05, 200, 50, 0)
29 $readFileType.pet
30 $by_chr
31 $fdrLevel
32 $fragLen
33 $binSize
34 $capping
35 #if $fitParams.fSettingsType == "preSet"
36 ## settings for model fitting and peak calling: optional
37 BIC
38 automatic
39 0.25
40 200
41 50
42 10
43 ## setting for parallel computing
44 TRUE
45 8
46 #else
47 $fitParams.signalModel
48 $fitParams.bgEst
49 $fitParams.d
50 $fitParams.maxgap
51 $fitParams.minsize
52 $fitParams.thres
53 $fitParams.parallel
54 $fitParams.nCore
55 #end if
56 </command>
57
58 <inputs>
59 <conditional name="readFileType">
60 <param name="pet" type="select" label="Paired-end tag (PET) or single-end tag (SET) data">
61 <option value="FALSE">Single-end tag (SET) data</option>
62 <option value="TRUE">Paired-end tag (PET) data</option>
63 </param>
64 <when value="FALSE"> <!-- SET -->
65 <conditional name="chipParams">
66 <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM file formats for single-end tag (SET) data.">
67 <option value="eland_result">Eland result</option>
68 <option value="eland_extended">Eland extended</option>
69 <option value="eland_export">Eland export</option>
70 <option value="bowtie">Bowtie default</option>
71 <option value="sam">SAM</option>
72 <option value="bed">BED</option>
73 <option value="csem">CSEM</option>
74 </param>
75 <when value="eland_result">
76 <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
77 </when>
78 <when value="eland_extended">
79 <param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/>
80 </when>
81 <when value="eland_export">
82 <param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/>
83 </when>
84 <when value="bowtie">
85 <param name="chip" type="data" label="Bowtie default file for ChIP sample"/>
86 </when>
87 <when value="sam">
88 <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
89 </when>
90 <when value="bed">
91 <param name="chip" type="data" format="bed" label="BED file for ChIP sample"/>
92 </when>
93 <when value="csem">
94 <param name="chip" type="data" format="csem" label="CSEM file for ChIP sample"/>
95 </when>
96 </conditional> <!-- chipParams -->
97
98 <conditional name="controlParams">
99 <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM file formats for single-end tag (SET) data.">
100 <option value="eland_result">Eland result</option>
101 <option value="eland_extended">Eland extended</option>
102 <option value="eland_export">Eland export</option>
103 <option value="bowtie">Bowtie default</option>
104 <option value="sam">SAM</option>
105 <option value="bed">BED</option>
106 <option value="csem">CSEM</option>
107 </param>
108 <when value="eland_result">
109 <param name="control" type="data" format="eland" label="Eland result file for control sample"/>
110 </when>
111 <when value="eland_extended">
112 <param name="control" type="data" format="eland" label="Eland extended file for control sample"/>
113 </when>
114 <when value="eland_export">
115 <param name="control" type="data" format="eland" label="Eland export file for control sample"/>
116 </when>
117 <when value="bowtie">
118 <param name="control" type="data" label="Bowtie default file for control sample"/>
119 </when>
120 <when value="sam">
121 <param name="control" type="data" format="sam" label="SAM file for control sample"/>
122 </when>
123 <when value="bed">
124 <param name="control" type="data" format="bed" label="BED file for control sample"/>
125 </when>
126 <when value="csem">
127 <param name="control" type="data" format="csem" label="CSEM file for control sample"/>
128 </when>
129 </conditional> <!-- controlParams -->
130 </when>
131 <when value="TRUE"> <!-- PET -->
132 <conditional name="chipParams">
133 <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result and SAM file formats for paired-end tag (PET) data.">
134 <option value="eland_result">Eland result</option>
135 <option value="sam">SAM</option>
136 </param>
137 <when value="eland_result">
138 <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
139 </when>
140 <when value="sam">
141 <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
142 </when>
143 </conditional> <!-- chipParams -->
144
145 <conditional name="controlParams">
146 <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result and SAM file formats for paired-end tag (PET) data.">
147 <option value="eland_result">Eland result</option>
148 <option value="sam">SAM</option>
149 </param>
150 <when value="eland_result">
151 <param name="control" type="data" format="eland" label="Eland result file for control sample"/>
152 </when>
153 <when value="sam">
154 <param name="control" type="data" format="sam" label="SAM file for control sample"/>
155 </when>
156 </conditional> <!-- controlParams -->
157 </when>
158 </conditional><!-- readFileType -->
159
160 <param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table.">
161 <option value="bed">BED</option>
162 <option value="gff">GFF</option>
163 <option value="txt">table</option>
164 </param>
165 <param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" />
166 <param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" />
167 <param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" />
168
169 <param name="by_chr" type="select" label="Genome-wide analysis or chromosome-wise analysis" help="If genome-wide analysis is used, one model is fitted for all the chromosomes. If chromosome-wise analysis is used, different model is fitted for each chromosome separately." >
170 <option value="FALSE">Genome-wide analysis</option>
171 <option value="TRUE">Chromosome-wise analysis</option>
172 </param>
173 <param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" />
174 <param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." />
175 <param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." />
176 <param name="capping" type="integer" value="0" label="Maximum number of reads allowed to start at each nucleotide position" help="If non-positive value is specified (e.g., 0), any number of reads are allowed at each nucleotide position (i.e., no filtering). By default, filtering is NOT used." />
177
178 <conditional name="fitParams">
179 <param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'.">
180 <option value="preSet">Commonly used</option>
181 <option value="full">Full parameter list</option>
182 </param>
183 <when value="preSet" />
184 <when value="full">
185 <param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC.">
186 <option value="BIC">Automatic model selection based on BIC</option>
187 <option value="1S">One-signal-component model</option>
188 <option value="2S">Two-signal-component model</option>
189 </param>
190 <param name="bgEst" type="select" label="Background estimation approach" help="By default, background estimation approach is automatically determined based on the data.">
191 <option value="automatic">Automatic selection based on the data</option>
192 <option value="matchLow">Based on bins with low tag counts</option>
193 <option value="rMOM">Robust method of moment (MOM)</option>
194 </param>
195 <param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." />
196 <param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." />
197 <param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." />
198 <param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." />
199 <param name="parallel" type="select" label="Use parallel computing?">
200 <option value="TRUE">Use parallel computing.</option>
201 <option value="FALSE">NOT use parallel computing.</option>
202 </param>
203 <param name="nCore" type="integer" value="8" label="Number of CPUs" help="Number of CPUs used for parallel computing. Relevant only when parallel computing is used. Default is to use 8 CPUs." />
204 </when> <!-- full -->
205 </conditional> <!-- fitParams -->
206 </inputs>
207
208 <outputs>
209 <data format="tabular" name="out_peak">
210 <change_format>
211 <when input="OutfileFormat" value="bed" format="bed" />
212 <when input="OutfileFormat" value="gff" format="gff" />
213 </change_format>
214 </data>
215 <data format="txt" name="report_summary">
216 <filter>summary == 1</filter>
217 </data>
218 <data format="pdf" name="report_gof">
219 <filter>gof == 1</filter>
220 </data>
221 <data format="pdf" name="report_exploratory">
222 <filter>exploratory == 1</filter>
223 </data>
224 </outputs>
225
226 <help>
227
228 **What it does**
229
230 MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions).
231 MOSAiCS is also available in Bioconductor_ as a R package.
232 We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_.
233
234 Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," *Journal of the American Statistical Association*, Vol. 106, pp. 891--903.
235
236 .. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.11/bioc/html/mosaics.html
237 .. _Google group: http://groups.google.com/group/mosaics_user_group
238 .. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706
239
240 ------
241
242 **Input formats**
243
244 MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM formats for single-end tag (SET) data. For paired-end tag (PET) data, MOSAiCS accepts Eland result and SAM formats.
245
246 ------
247
248 **Outputs**
249
250 Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak.
251
252 If the output is a table, it has the following columns::
253
254 Column Description
255 -------- --------------------------------------------------------
256 1 Chromosome of the peak
257 2 Start position of the peak
258 3 End position of the peak
259 4 Width of the peak
260 5 Averaged posterior probability of the peak
261 6 Minimum posterior probability of the peak
262 7 Averaged ChIP tag counts of the peak
263 8 Maximum ChIP tag counts of the peak
264 9 Averaged control tag counts of the peak
265 10 Averaged control tag counts of the peak, scaled by sequencing depth
266 11 Averaged log base 2 ratio of ChIP over input tag counts
267
268 If the output is in BED format, it has the following columns::
269
270 Column Description
271 ------------ --------------------------------------------------------
272 1 chrom Chromosome of the peak
273 2 chromStart Start position of the peak
274 3 chromEnd End position of the peak
275 4 name Always "MOSAiCS_peak"
276 5 score Averaged ChIP tag counts of the peak
277
278 If the output is in GFF format, it has the following columns::
279
280 Column Description
281 --------- --------------------------------------------------------
282 1 seqname Chromosome of the peak
283 2 source Always "MOSAiCS"
284 3 feature Always "MOSAiCS_peak"
285 4 start Start position of the peak
286 5 end End position of the peak
287 6 score Averaged ChIP tag counts of the peak
288 7 strand Always "."
289 8 frame Always "."
290 9 group Always "."
291
292 ------
293
294 **Reports for diagnostics**
295
296 *Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results.
297
298 *Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data.
299
300 *Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts.
301
302 More details regarding these reports can be found here_:
303
304 ------
305
306 **Settings for model fitting and peak calling**
307
308 More details about the tuning of these parameters can be found here_:
309
310 .. _here: http://www.bioconductor.org/packages/2.11/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf
311
312 </help>
313 </tool>