Mercurial > repos > iuc > genrich
comparison genrich.xml @ 0:a41d96fc0b20 draft
planemo upload for repository https://github.com/jsh58/Genrich commit 38aa99ebf650c22a1c4965f6f008882aea7033ba
author | iuc |
---|---|
date | Mon, 15 Jul 2019 09:43:27 -0400 |
parents | |
children | db50f51a2952 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a41d96fc0b20 |
---|---|
1 <tool id="genrich" name="Genrich" version="0.5"> | |
2 <description>Detecting sites of genomic enrichment</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.5">genrich</requirement> | |
5 <requirement type="package" version="1.9">samtools</requirement> | |
6 </requirements> | |
7 | |
8 <version_command>Genrich --version</version_command> | |
9 | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 | |
12 #set $file_stderr = 'genrich_stderr' | |
13 | |
14 Genrich | |
15 | |
16 ########### | |
17 ## Input ## | |
18 ########### | |
19 | |
20 ## Treatment File(s) | |
21 #if str($treatment.t_multi_select) == "Yes": | |
22 -t ${ ' '.join( [ "'%s'" % $x for $x in $treatment.input_treatment_file] ) } | |
23 #else | |
24 -t '$treatment.input_treatment_file' | |
25 #end if | |
26 | |
27 ## Control File(s) | |
28 #if str($control.c_select) == "Yes": | |
29 #if str($control.c_multiple.c_multi_select) == "Yes": | |
30 -c ${ ' '.join( [ "'%s'" % $x for $x in $control.c_multiple.input_control_file] ) } | |
31 #else | |
32 -c '$control.c_multiple.input_control_file' | |
33 #end if | |
34 #end if | |
35 | |
36 #################### | |
37 ## Filter Options ## | |
38 #################### | |
39 | |
40 #if $filter_options.duplicates: | |
41 '$filter_options.duplicates' '${out_dups}' | |
42 #end if | |
43 | |
44 #if $filter_options.exclude_chr: | |
45 -e '$filter_options.exclude_chr' | |
46 #end if | |
47 | |
48 #if str($cond_exclude.exclude_select) == "Yes": | |
49 -E $cond_exclude.erf | |
50 #end if | |
51 | |
52 -m $filter_options.min_mapq | |
53 -s $filter_options.alignment_score | |
54 $filter_options.unpaired | |
55 | |
56 #if $filter_options.alignment_lengths: | |
57 -w $filter_options.alignment_lengths | |
58 #end if | |
59 | |
60 $filter_options.alignment_lengths2 | |
61 | |
62 ################## | |
63 ## ATAC Options ## | |
64 ################## | |
65 | |
66 $atac_options.atac | |
67 -d $atac_options.expand_sites | |
68 | |
69 ######################### | |
70 ## Peakcalling Options ## | |
71 ######################### | |
72 | |
73 -q $peakcalling_options.max_q | |
74 | |
75 #if $peakcalling_options.max_p: | |
76 -p $peakcalling_options.max_p | |
77 #end if | |
78 | |
79 -a $peakcalling_options.min_auc | |
80 -l $peakcalling_options.min_peak_length | |
81 -g $peakcalling_options.max_dist | |
82 | |
83 ################### | |
84 ## Other Options ## | |
85 ################### | |
86 | |
87 $other_options.skip_peak_calling | |
88 -v | |
89 | |
90 #################### | |
91 ## Output Options ## | |
92 #################### | |
93 | |
94 #if $output_options.bedgraph1: | |
95 -f '${out_bedgraph1}' | |
96 #end if | |
97 | |
98 #if $output_options.bedgraph2: | |
99 -k '${out_bedgraph2}' | |
100 #end if | |
101 | |
102 #if $output_options.bed: | |
103 -b '${out_bed}' | |
104 #end if | |
105 | |
106 -o '${outfile}' | |
107 | |
108 2>&1 > $file_stderr && | |
109 exit_code_for_galaxy=\$? && | |
110 cat $file_stderr 2>&1 && | |
111 exit \$exit_code_for_galaxy | |
112 | |
113 ]]></command> | |
114 <inputs> | |
115 <conditional name="treatment"> | |
116 <param name="t_multi_select" type="select" label="Are you pooling Treatment Files?" help="For more information, see Help section below" > | |
117 <option value="No" selected="True">No</option> | |
118 <option value="Yes">Yes</option> | |
119 </param> | |
120 <when value="No" > | |
121 <param name="input_treatment_file" argument="-t" type="data" format="qname_sorted.bam" label="Treatment File(s)" /> | |
122 </when> | |
123 <when value="Yes"> | |
124 <param name="input_treatment_file" argument="-t" type="data" format="qname_sorted.bam" multiple="true" label="Treatment Files" /> | |
125 </when> | |
126 </conditional> | |
127 | |
128 <conditional name="control"> | |
129 <param name="c_select" type="select" label="Do you have a Control File?" > | |
130 <option value="Yes">Yes</option> | |
131 <option value="No" selected="True">No</option> | |
132 </param> | |
133 <when value="Yes"> | |
134 <conditional name="c_multiple"> | |
135 <param name="c_multi_select" type="select" label="Are you pooling Control Files?" help="For more information, see Help section below" > | |
136 <option value="No" selected="True">No</option> | |
137 <option value="Yes">Yes</option> | |
138 </param> | |
139 <when value="No" > | |
140 <param name="input_control_file" argument="-c" type="data" format="qname_sorted.bam" label="Control File(s)" /> | |
141 </when> | |
142 <when value="Yes"> | |
143 <param name="input_control_file" argument="-c" type="data" format="qname_sorted.bam" multiple="true" label="Control Files" /> | |
144 </when> | |
145 </conditional> | |
146 </when> | |
147 <when value="No" /> | |
148 </conditional> | |
149 | |
150 <!-- Filter Options --> | |
151 <conditional name="cond_exclude"> | |
152 <param name="exclude_select" type="select" label="Do you have a BED file of genomic regions to exclude?" help="Input BED file of genomic regions to exclude." > | |
153 <option value="No" selected="True">No</option> | |
154 <option value="Yes">Yes</option> | |
155 </param> | |
156 <when value="No" /> | |
157 <when value="Yes"> | |
158 <param name="erf" argument="-E" type="data" format="bed" label="BED File" /> | |
159 </when> | |
160 </conditional> | |
161 | |
162 <section name="filter_options" title="Filter Options"> | |
163 <param name="duplicates" argument="-r" type="boolean" value="False" truevalue="-r -R" falsevalue="" label="Remove PCR duplicates" help="In this process, it analyzes reads/fragments based on their alignments, in three separate groups (proper pairs, discordant pairs, and singletons), and removes those identified as duplicates from further analysis. One novel feature is that this evaluation takes into account reads/fragments with multiple alignments."/> | |
164 <param name="exclude_chr" argument="-e" type="text" optional="True" label="Comma-separated list of chromosomes to exclude" help="All alignments to the given list of chromosomes (reference sequences) are excluded from peak-calling. More details can be found in the tool description."> | |
165 <sanitizer> | |
166 <valid initial="string.printable"> | |
167 <remove value="'"/> | |
168 </valid> | |
169 </sanitizer> | |
170 </param> | |
171 <param name="min_mapq" argument="-m" type="integer" min="0" value="0" label="Minimum MAPQ to keep an alignment." help="All alignments with MAPQ less than the given value are eliminated. This is equivalent to filtering with samtools view -q. This option should not be used if the SAM/BAM lists multiple alignments for some reads/fragments. Instead, filtering should be accomplished via -s. (def. 0)" /> | |
172 <param name="alignment_score" argument="-s" type="float" min="0.0" value="0.0" label="Keep sec alns with AS >= bestAS." help="Genrich considers all secondary alignments of multimapping reads, but, by default, it keeps only the alignments whose scores are equal to the best score for the read/fragment. Setting a value such as -s 20 causes Genrich also to keep secondary alignments whose scores are within 20 of the best. (def. 0)" /> | |
173 <param name="unpaired" argument="-y" type="boolean" value="False" truevalue="-y" falsevalue="" label="Keep unpaired alignments." help="Unpaired alignments are kept, just as they appear in the SAM/BAM. (def. false)"/> | |
174 <param name="alignment_lengths" argument="-w" type="integer" min="1" optional="True" value="" label="Keep unpaired alignments with a certain length." help="Unpaired alignments are kept, with their lengths changed to the given value (from their 5' ends). (def. not defined)" /> | |
175 <param name="alignment_lengths2" argument="-x" type="boolean" value="False" truevalue="-x" falsevalue="" label="Keep unpaired alns, lengths changed to paired average." help="Unpaired alignments are kept, with their lengths changed to the average length of fragments inferred from properly paired alignments (excluding those aligning to skipped chromosomes [-e]). (def. not defined)"/> | |
176 </section> | |
177 | |
178 <!-- ATAC Options --> | |
179 <section name="atac_options" title="ATAC Options"> | |
180 <param name="atac" argument="-j" type="boolean" value="False" truevalue="-j" falsevalue="" label="Use ATAC-seq mode." help="Use ATAC-seq mode (def. false)"/> | |
181 <param name="expand_sites" argument="-d" type="integer" min="0" value="100" label="Expand cut sites." help="Expand cut sites to x bp (def. 100)" /> | |
182 </section> | |
183 | |
184 <!-- Peakcalling Options --> | |
185 <section name="peakcalling_options" title="Peakcalling Options"> | |
186 <param name="max_q" argument="-q" type="float" min="0.0" max="1.0" value="0.05" label="Maximum q-value." help="Maximum q-value (FDR-adjusted p-value). These parameters establish the statistical threshold below which a base is considered significantly enriched in the experimental sample(s) vs. the control/background. The significance value is automatically converted to a -log10 scale by Genrich. (def. 0.05)" /> | |
187 <param name="max_p" argument="-p" type="float" min="0" max="1.0" optional="True" value="" label="Maximum p-value." help="When -p is selected, q-values are not calculated (reported as -1). (def. turned off)" /> | |
188 <param name="min_auc" argument="-a" type="float" min="0" value="20.0" label="Minimum AUC for a peak." help="Minimum AUC for a peak. (def. 20.0)" /> | |
189 <param name="min_peak_length" argument="-l" type="integer" min="0" value="0" label="Minimum length of a peak." help="With this option, any potential peak whose length is below the specified value is discarded, regardless of its significance. The default of 0 means that no peaks are eliminated on this basis. (def. 0)" /> | |
190 <param name="max_dist" argument="-g" type="integer" min="0" value="100" label="Maximum distance between signif. sites." help="This parameter sets the maximum distance between sites that achieve significance in order for them to be linked together into the same potential peak. (def. 100)" /> | |
191 </section> | |
192 | |
193 <!-- Other Options --> | |
194 <section name="other_options" title="Other Options"> | |
195 <param name="skip_peak_calling" argument="-X" type="boolean" value="False" truevalue="-X" falsevalue="" label="Skip peak-calling." help="This is a convenience option for those who are unsure of the peak-calling parameters but do not want to run the full analysis multiple times. Genrich interprets the alignment files (including identifying PCR duplicates) and produces intermediate log files, but does not perform the peak-calling step."/> | |
196 </section> | |
197 | |
198 <!-- Output Options --> | |
199 <section name="output_options" title="Output Options"> | |
200 <param name="bedgraph1" argument="-f" type="boolean" value="False" label="Bedgraph-ish p/q Values" help="Output bedgraph-ish file for p/q values."/> | |
201 <param name="bedgraph2" argument="-k" type="boolean" value="False" label="Bedgraph-ish Pileups" help="Output bedgraph-ish file for pileups and p-values."/> | |
202 <param name="bed" argument="-b" type="boolean" value="False" label="Bed File" help="Output BED file for reads/fragments/intervals."/> | |
203 </section> | |
204 </inputs> | |
205 | |
206 | |
207 <outputs> | |
208 <data name="outfile" format="encodepeak" label="${tool.name} on ${on_string}"/> | |
209 | |
210 <data name="out_bedgraph1" format="bedgraph" from_work_dir="*.bedgraph" label="${tool.name} on ${on_string}: Bedgraph p/q"> | |
211 <filter>(output_options['bedgraph1'] is True)</filter> | |
212 </data> | |
213 <data name="out_bedgraph2" format="bedgraph" from_work_dir="*.bedgraph" label="${tool.name} on ${on_string}: Bedgraph Pileups"> | |
214 <filter>(output_options['bedgraph2'] is True)</filter> | |
215 </data> | |
216 <data name="out_bed" format="bed" from_work_dir="*.bed" label="${tool.name} on ${on_string}: Bed reads/fragments/intervals"> | |
217 <filter>(output_options['bed'] is True)</filter> | |
218 </data> | |
219 <data name="out_dups" format="txt" from_work_dir="*.txt" label="${tool.name} on ${on_string}: PCR duplicates"> | |
220 <filter>(filter_options['duplicates'] is True)</filter> | |
221 </data> | |
222 </outputs> | |
223 <tests> | |
224 <!-- ATAC Test Data --> | |
225 <test expect_num_outputs="4"> | |
226 <param name="input_treatment_file" ftype="bam" value="atac_test.bam" /> | |
227 <param name="atac" value="True" /> | |
228 <param name="bedgraph1" value="True" /> | |
229 <param name="bedgraph2" value="True" /> | |
230 <param name="bed" value="True" /> | |
231 <output name="outfile" ftype="encodepeak" file="atac_out.encodepeak" /> | |
232 <output name="out_bedgraph1" ftype="bedgraph" file="atac_out2.bedgraph" /> | |
233 <output name="out_bedgraph2" ftype="bedgraph" file="atac_out3.bedgraph" compare="contains" lines_diff="1" /> | |
234 <output name="out_bed" ftype="bed" file="atac_out4.bed" /> | |
235 </test> | |
236 <!-- ChIP Test Data with Control--> | |
237 <test expect_num_outputs="4"> | |
238 <param name="input_treatment_file" ftype="bam" value="CTCF_PE_ChIP_chr22.bam" /> | |
239 <param name="input_control_file" ftype="bam" value="CTCF_PE_CTRL_chr22.bam" /> | |
240 <param name="c_select" value="Yes" /> | |
241 <param name="bedgraph1" value="True" /> | |
242 <param name="bedgraph2" value="True" /> | |
243 <param name="bed" value="True" /> | |
244 <output name="outfile" ftype="encodepeak" file="CTCF.encodepeak" /> | |
245 <output name="out_bedgraph1" ftype="bedgraph" file="CTCF1.bedgraph" /> | |
246 <output name="out_bedgraph2" ftype="bedgraph" file="CTCF2.bedgraph" compare="contains" lines_diff="1" /> | |
247 <output name="out_bed" ftype="bed" file="CTCF.bed" /> | |
248 </test> | |
249 </tests> | |
250 <help><![CDATA[ | |
251 | |
252 .. class:: infomark | |
253 | |
254 **What it does** | |
255 | |
256 ------------------- | |
257 | |
258 **Genrich** Genrich is a peak-caller for genomic enrichment assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated following the assay and produces a file detailing peaks of significant enrichment. | |
259 | |
260 ATAC-seq is a method for assessing genomic regions of open chromatin. Since only the ends of the DNA fragments indicate where the transposase enzyme was able to insert into the chromatin, it may not be optimal to interpret alignments. Genrich has an alternative analysis mode for ATAC-seq in which it creates intervals centered on transposase cut sites. The remainder of the peak-calling process (calculating pileups and significance values) is identical to the default analysis mode. Note that the interval lengths (not the fragment lengths) are used to sum the total sequence information for the calculation of control/background pileup values. | |
261 | |
262 ------------------- | |
263 | |
264 **Inputs** | |
265 | |
266 ------------------- | |
267 | |
268 Genrich analyzes alignment files in SAM/BAM format. SAM files must have a header. | |
269 SAM/BAM files for multiple replicates can be specified, comma-separated (or space-separated, in quotes). | |
270 Multiple SAM/BAM files for a single replicate should be combined in advance via samtools merge. | |
271 The SAM/BAM files must be sorted by queryname (via samtools sort -n). | |
272 | |
273 | |
274 ----------- | |
275 | |
276 **Outputs** | |
277 | |
278 ----------- | |
279 | |
280 As indicated, the output file is in ENCODE narrowPeak format. Here are details of the fields: | |
281 * 1. chrom Name of the chromosome | |
282 * 2. chromStart Starting position of the peak (0-based) | |
283 * 3. chromEnd Ending position of the peak (not inclusive) | |
284 * 4. name peak_N, where N is the 0-based count | |
285 * 5. score Average AUC (total AUC / bp) × 1000, rounded to the nearest int (max. 1000) | |
286 * 6. strand . (no orientation) | |
287 * 7. signalValue Total area under the curve (AUC) | |
288 * 8. pValue Summit -log10(p-value) | |
289 * 9. qValue Summit -log10(q-value), or -1 if not available (e.g. with -p) | |
290 * 10. peak Summit position (0-based offset from chromStart): the midpoint of the peak interval with the highest significance (the longest interval in case of ties) | |
291 | |
292 Example: | |
293 chr1 894446 894988 peak_10 402 . 217.824936 4.344683 1.946031 317 | |
294 chr1 895834 896167 peak_11 343 . 114.331093 4.344683 1.946031 90 | |
295 | |
296 Optional files | |
297 | |
298 -c Input SAM/BAM file(s) for control sample(s) | |
299 | |
300 Alignment files for control samples (e.g. input DNA) can be specified, although this is not strictly required. | |
301 SAM/BAM files for multiple replicates can be listed, comma-separated (or space-separated, in quotes) and in the same order as the experimental files. Missing control files should be indicated with null. | |
302 | |
303 -f Output bedgraph-ish file for p/q values | |
304 | |
305 With a single replicate, this log file lists experimental/control pileup values, p- and q-values, and significance (*) for each interval. | |
306 | |
307 Example: | |
308 chr1 894435 894436 33.000000 2.477916 3.183460 1.208321 | |
309 chr1 894436 894442 34.000000 2.477916 3.231466 1.241843 | |
310 chr1 894442 894446 35.000000 2.477916 3.278469 1.274561 | |
311 chr1 894446 894447 36.000000 2.477916 3.324516 1.306471 * | |
312 chr1 894447 894450 39.000000 2.477916 3.457329 1.398035 * | |
313 chr1 894450 894451 40.000000 2.477916 3.499948 1.427253 * | |
314 chr1 894451 894460 41.000000 2.477916 3.541798 1.455938 * | |
315 | |
316 With multiple replicates, this log file lists p-values of each replicate, combined p-value, q-value, and significance for each interval. | |
317 Note that this file (as well as the -k file, below) is called "bedgraph-ish" because it contains multiple dataValue fields, which isn't strictly allowed in the bedGraph format. However, a simple application of awk can produce the desired bedgraph files for visualization purposes (see this awk reference for a guide to printing specific fields of input records). | |
318 When peak-calling is skipped (-X), the significance column is not produced. | |
319 | |
320 -k Output bedgraph-ish file for pileups and p-values | |
321 | |
322 For each replicate, sequentially, this file lists a header line (# experimental file: <name>; control file: <name>), followed by experimental/control pileups and a p-value for each interval. This is the way to examine pileup values with multiple replicates, since the -f log file does not supply them in that case. | |
323 | |
324 -b Output BED file for reads/fragments/intervals | |
325 | |
326 This is an unsorted BED file of the reads/fragments/intervals analyzed. The 4th column gives the read name, number of valid alignments, 'E'xperimental or 'C'ontrol, and sample number (0-based), e.g. SRR5427886.59_2_E_0. | |
327 | |
328 -R Output file for PCR duplicates (only with -r) | |
329 | |
330 This log file lists the header of each read/fragment classified as a PCR duplicate, followed by the alignment, the header of the read/fragment it matched, and the alignment type. | |
331 | |
332 Example: | |
333 SRR5427886.5958 chr4:185201876-185201975 SRR5427886.4688 paired | |
334 SRR5427886.1826 chr12:34372610,+;chr1:91852878,- SRR5427886.2040 discordant | |
335 SRR5427886.10866 chr14:53438632,+ SRR5427886.4746 single | |
336 | |
337 The duplicates from multiple input files are separated by a comment line listing the next filename, such as # experimental file #0: SRR5427886.bam. | |
338 This file can be used to filter the original SAM/BAM file, using a simple script such as getReads.py, for example. | |
339 | |
340 | |
341 -------------------- | |
342 | |
343 **More Information** | |
344 | |
345 -------------------- | |
346 | |
347 See the excellent `Genrich documentation`_ | |
348 | |
349 .. _`Genrich documentation`: https://github.com/jsh58/Genrich | |
350 | |
351 | |
352 -------------------- | |
353 | |
354 **Galaxy Wrapper Development** | |
355 | |
356 -------------------- | |
357 | |
358 Author: Florian Heyl <heylf@informatik.uni-freiburg.de> | |
359 | |
360 | |
361 ]]></help> | |
362 <citations> | |
363 <citation type="bibtex"> | |
364 @misc{genrich, | |
365 title = {Genrich}, | |
366 url = {https://github.com/jsh58/Genrich}, | |
367 urldate = {2019-07-15}, | |
368 author = {John M. Gaspar}, | |
369 year = {2018}, | |
370 } | |
371 </citation> | |
372 </citations> | |
373 </tool> |