Mercurial > repos > swebb > pycrac
comparison pyCRAC/pyPileup.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:19b20927172d |
---|---|
1 <?xml version="1.0" encoding="utf-8"?> | |
2 <tool id ="pyPileup" name="pyPileup"> | |
3 <requirements> | |
4 <requirement type="package">pyCRAC</requirement> | |
5 </requirements> | |
6 <command interpreter="python"> | |
7 /usr/local/bin/pyPileup.py | |
8 -f $ftype.input | |
9 --file_type $ftype.file_type | |
10 #if $geneOpt.alignGene == "gene": | |
11 -g $geneOpt.genes | |
12 #end if# | |
13 #if $geneOpt.alignGene == "chr": | |
14 --chr $geneOpt.chr | |
15 #end if# | |
16 #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard": | |
17 --discarded $discarded | |
18 #end if# | |
19 --gtf=$addGTF.gtf | |
20 --tab=$addTab.tab | |
21 #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit": | |
22 --align_quality=$ftype.addAlignOpt.align_quality | |
23 --align_score=$ftype.addAlignOpt.align_score | |
24 --distance=$ftype.addAlignOpt.d | |
25 --length=$ftype.addAlignOpt.length | |
26 #if int($ftype.addAlignOpt.max) > 0: | |
27 --max=$ftype.addAlignOpt.max | |
28 #end if# | |
29 $ftype.addAlignOpt.unique | |
30 $ftype.addAlignOpt.blocks | |
31 $ftype.addAlignOpt.mutations | |
32 #if $ftype.disc.discard == "--discarded": | |
33 --discarded $discarded | |
34 #end if# | |
35 #end if# | |
36 #if $addOpt.options == "edit": | |
37 --range=$addOpt.range | |
38 --overlap=$addOpt.overlap | |
39 $addOpt.iclip | |
40 $addOpt.ignore | |
41 -s $addOpt.sequence | |
42 #if int($addOpt.limit) > 0: | |
43 --limit=$addOpt.limit | |
44 #end if# | |
45 #end if# | |
46 -o $output | |
47 </command> | |
48 <version_command>/usr/local/bin/pyPileup.py --version</version_command> | |
49 <inputs> | |
50 | |
51 | |
52 <conditional name="geneOpt"> | |
53 <param name="alignGene" type="select" label="Do you want to align reads to genes or chromosome co-ordinates?"> | |
54 <option value="gene" selected="true">Genes</option> | |
55 <option value="chr">Chromosome Co-ordinates</option> | |
56 </param> | |
57 <when value="chr"> | |
58 <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file containing an identifier, chromosome name, start position, end position and strand ('-' or '+')"/> | |
59 </when> | |
60 <when value="gene"> | |
61 <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/> | |
62 </when> | |
63 </conditional> | |
64 <conditional name="addGTF"> | |
65 <param name="gtfFile" type="select" label="Choose GTF File from"> | |
66 <option value="default" selected="true">Defaults</option> | |
67 <option value="other">History</option> | |
68 </param> | |
69 <when value="default"> | |
70 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> | |
71 <options from_data_table="pycrac_gtf"/> | |
72 </param> | |
73 </when> | |
74 <when value="other"> | |
75 <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> | |
76 </when> | |
77 </conditional> | |
78 <conditional name="addTab"> | |
79 <param name="tabFile" type="select" label="Choose Genomic Reference Sequence from"> | |
80 <option value="default" selected="true">Defaults</option> | |
81 <option value="other">History</option> | |
82 </param> | |
83 <when value="default"> | |
84 <param name="tab" type="select" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"> | |
85 <options from_data_table="pycrac_tab"/> | |
86 </param> | |
87 </when> | |
88 <when value="other"> | |
89 <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/> | |
90 </when> | |
91 </conditional> | |
92 | |
93 | |
94 <conditional name="ftype"> | |
95 <param name="file_type" type="select" label="Input File Type --file_type"> | |
96 <option value="novo" selected="true">Novo</option> | |
97 <option value="sam">Sam/BAM</option> | |
98 <option value="gtf">GTF</option> | |
99 </param> | |
100 <when value="sam"> | |
101 <param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .bam" /> | |
102 <conditional name="disc"> | |
103 <param name="discard" type="select" label="Print discarded reads to a separate file"> | |
104 <option value="" selected="true">OFF</option> | |
105 <option value="discard">ON</option> | |
106 </param> | |
107 <when value="discard"> | |
108 </when> | |
109 <when value=""> | |
110 </when> | |
111 </conditional> | |
112 <conditional name="addAlignOpt"> | |
113 <param name="alignoptions" type="select" label="Alignment Options"> | |
114 <option value="default" selected="true">Default</option> | |
115 <option value="edit">Edit</option> | |
116 </param> | |
117 <when value="edit"> | |
118 <param name="mutations" type="select" label="Filter reads by mutations --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not."> | |
119 <option value="" selected="true">Off</option> | |
120 <option value="--mutations=delsonly">deletions</option> | |
121 <option value="--mutations=subsonly">substitutions</option> | |
122 <option value="--mutations=TC">T->C mutations</option> | |
123 <option value="--mutations=allmuts">all mutations</option> | |
124 <option value="--mutations=nomuts">no mutations</option> | |
125 </param> | |
126 <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" > | |
127 <validator type="in_range" min="0" message="Please enter a value >= 0"/> | |
128 </param> | |
129 <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" > | |
130 <validator type="in_range" min="0" message="Please enter a value >= 0"/> | |
131 </param> | |
132 <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" > | |
133 <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/> | |
134 </param> | |
135 <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads"> | |
136 <validator type="in_range" min="1" message="Please enter a value >= 0"/> | |
137 </param> | |
138 <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000"> | |
139 <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/> | |
140 </param> | |
141 <param name="unique" type="select" label="Remove reads with multiple alignment locations --unique"> | |
142 <option value="" selected="true">OFF</option> | |
143 <option value="--unique">ON</option> | |
144 </param> | |
145 <param name="blocks" type="select" label="Only count reads with same start and end coords once --blocks"> | |
146 <option value="" selected="true">OFF</option> | |
147 <option value="--blocks">ON</option> | |
148 </param> | |
149 </when> | |
150 <when value="default"> | |
151 </when> | |
152 </conditional> | |
153 </when> | |
154 <when value="novo"> | |
155 <param format="tabular" name="input" type="data" label="Input File -f" help="Alignment file of type .novo" /> | |
156 <conditional name="disc"> | |
157 <param name="discard" type="select" label="Print discarded reads to a separate file"> | |
158 <option value="" selected="true">OFF</option> | |
159 <option value="discard">ON</option> | |
160 </param> | |
161 <when value="discard"> | |
162 </when> | |
163 <when value=""> | |
164 </when> | |
165 </conditional> | |
166 <conditional name="addAlignOpt"> | |
167 <param name="alignoptions" type="select" label="Alignment Options"> | |
168 <option value="default" selected="true">Default</option> | |
169 <option value="edit">Edit</option> | |
170 </param> | |
171 <when value="edit"> | |
172 <param name="mutations" type="select" label="Filter reads by mutations --mutations" help="cross-linking sites are often | |
173 highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not."> | |
174 <option value="" selected="true">Off</option> | |
175 <option value="--mutations=delsonly">deletions</option> | |
176 <option value="--mutations=subsonly">substitutions</option> | |
177 <option value="--mutations=TC">T->C mutations</option> | |
178 <option value="--mutations=allmuts">all mutations</option> | |
179 <option value="--mutations=nomuts">no mutations</option> | |
180 </param> | |
181 <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" > | |
182 <validator type="in_range" min="0" message="Please enter a value >= 0"/> | |
183 </param> | |
184 <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" > | |
185 <validator type="in_range" min="0" message="Please enter a value >= 0"/> | |
186 </param> | |
187 <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" > | |
188 <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/> | |
189 </param> | |
190 <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads"> | |
191 <validator type="in_range" min="1" message="Please enter a value >= 0"/> | |
192 </param> | |
193 <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000"> | |
194 <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/> | |
195 </param> | |
196 <param name="unique" type="select" label="Remove reads with multiple alignment locations --unique"> | |
197 <option value="" selected="true">OFF</option> | |
198 <option value="--unique">ON</option> | |
199 </param> | |
200 <param name="blocks" type="select" label="Only count reads with same start and end coords once --blocks"> | |
201 <option value="" selected="true">OFF</option> | |
202 <option value="--blocks">ON</option> | |
203 </param> | |
204 </when> | |
205 <when value="default"> | |
206 </when> | |
207 </conditional> | |
208 </when> | |
209 <when value="gtf"> | |
210 <param format="gtf" name="input" type="data" label="Input File -f" help="File of type .gtf" /> | |
211 </when> | |
212 </conditional> | |
213 | |
214 <conditional name="addOpt"> | |
215 <param name="options" type="select" label="Standard Options"> | |
216 <option value="default" selected="true">Default</option> | |
217 <option value="edit">Edit</option> | |
218 </param> | |
219 <when value="edit"> | |
220 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> | |
221 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> | |
222 </param> | |
223 <param name="ignore" type="select" label="Ignore strand information? --ignorestrand"> | |
224 <option value="" selected="true">No</option> | |
225 <option value="--ignorestrand">Yes</option> | |
226 </param> | |
227 <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. "> | |
228 <validator type="in_range" min="1" message="Please enter a positive integer"/> | |
229 </param> | |
230 <param name="sequence" type="select" label="Align reads to --sequence"> | |
231 <option value="genomic" selected="true">Genomic Sequence</option> | |
232 <option value="coding">Coding Sequence</option> | |
233 </param> | |
234 <param name="iclip" type="select" label="iCLIP mode --iCLIP"> | |
235 <option value="" selected="true">OFF</option> | |
236 <option value="--iCLIP">ON</option> | |
237 </param> | |
238 <param format="integer" name="limit" type="integer" label="Limit number of reads to count that map to a particular region --limit" value="0" size="15" help="Set to 0 for unlimited reads" > | |
239 <validator type="in_range" min="0" message="Please enter a value greater than 1 or set to 0 for unlimited reads"/> | |
240 </param> | |
241 </when> | |
242 <when value="default"> | |
243 </when> | |
244 </conditional> | |
245 <param name="label" type="text" format="txt" size="30" value="pyPileup" label="Enter output file label -o" /> | |
246 </inputs> | |
247 <outputs> | |
248 <data format="tabular" name="output" label="${label.value}.pileup"/> | |
249 <data format="txt" name="discarded" label="${label.value}_discarded.txt"> | |
250 <filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] == "discard"</filter> | |
251 </data> | |
252 </outputs> | |
253 <help> | |
254 | |
255 | |
256 .. class:: infomark | |
257 | |
258 **pyPileup** | |
259 | |
260 pyPileup is part of the pyCRAC_ package. Produces pileups containing the number of hits, substitutions and deletions for each nucleotide covered by | |
261 reads in specific genes or genomic regions | |
262 | |
263 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html | |
264 | |
265 ------ | |
266 | |
267 **Parameter list** | |
268 | |
269 File input options:: | |
270 | |
271 -f FILE, --input_file=FILE | |
272 As input files you can use Novoalign native output, | |
273 SAM, pyMotif or pyReadCounters GTF files as input | |
274 file. By default it expects data from the standard | |
275 input. Make sure to specify the file type of the file | |
276 you want to have analyzed using the --file_type | |
277 option! | |
278 -o OUTPUT_FILE, --output_file=OUTPUT_FILE | |
279 Use this flag to override the standard output file | |
280 names. All pileups will be written to one output file. | |
281 -g FILE, --genes_file=FILE | |
282 here you need to type in the name of your gene list | |
283 file (1 column) or the hittable file | |
284 --chr=FILE | |
285 if you simply would like to align reads against a | |
286 genomic sequence you should generate a tab delimited | |
287 file containing an identifyer, chromosome name, start | |
288 position, end position and strand | |
289 --gtf=annotation_file.gtf | |
290 type the path to the gtf annotation file that you want | |
291 to use | |
292 --tab=tab_file.tab | |
293 type the path to the tab file that contains the | |
294 genomic reference sequence | |
295 --file_type=FILE_TYPE | |
296 use this option to specify the file type (i.e. 'novo', | |
297 'sam', 'gtf'). This will tell the program which | |
298 parsers to use for processing the files. Default = | |
299 'novo' | |
300 | |
301 pyPileup specific options:: | |
302 | |
303 --limit=500 | |
304 with this option you can select how many reads mapped | |
305 to a particular gene/ORF/region you want to count. | |
306 Default = All | |
307 --iCLIP | |
308 This turns on the iCLIP mode and the pileups will | |
309 report cross-linking site frequencies in iCLIP data in | |
310 reference sequences | |
311 | |
312 Common options:: | |
313 | |
314 -v, --verbose | |
315 prints all the status messages to a file rather than | |
316 the standard output | |
317 --ignorestrand | |
318 this flag tells the program to ignore strand | |
319 information and all overlapping reads will considered | |
320 sense reads. Useful for analysing ChIP or RIP data | |
321 --zip=FILE | |
322 use this option to compress all the output files in a | |
323 single zip file | |
324 --overlap=1 | |
325 sets the number of nucleotides a read has to overlap | |
326 with a gene before it is considered a hit. Default = | |
327 1 nucleotide | |
328 -s genomic, --sequence=genomic | |
329 with this option you can select whether you want the | |
330 reads aligned to the genomic or the coding sequence. | |
331 Default = genomic | |
332 -r 100, --range=100 | |
333 allows you to set the length of the UTR regions. If | |
334 you set '-r 50' or '--range=50', then the program will | |
335 set a fixed length (50 bp) regardless of whether the | |
336 GTF file has genes with annotated UTRs. | |
337 | |
338 Options for novo, SAM and BAM files:: | |
339 | |
340 --align_quality=100, --mapping_quality=100 | |
341 with these options you can set the alignment quality | |
342 (Novoalign) or mapping quality (SAM) threshold. Reads | |
343 with qualities lower than the threshold will be | |
344 ignored. Default = 0 | |
345 --align_score=100 | |
346 with this option you can set the alignment score | |
347 threshold. Reads with alignment scores lower than the | |
348 threshold will be ignored. Default = 0 | |
349 -l 100, --length=100 | |
350 to set read length threshold. Default = 1000 | |
351 -m 100000, --max=100000 | |
352 maximum number of mapped reads that will be analyzed. | |
353 Default = All | |
354 --unique | |
355 with this option reads with multiple alignment | |
356 locations will be removed. Default = Off | |
357 --blocks | |
358 with this option reads with the same start and end | |
359 coordinates on a chromosome will only be counted once. | |
360 Default = Off | |
361 --discarded=FILE | |
362 prints the lines from the alignments file that were | |
363 discarded by the parsers. This file contains reads | |
364 that were unmapped (NM), of poor quality (i.e. QC) or | |
365 paired reads that were mapped to different chromosomal | |
366 locations or were too far apart on the same | |
367 chromosome. Useful for debugging purposes | |
368 -d 1000, --distance=1000 | |
369 this option allows you to set the maximum number of | |
370 base-pairs allowed between two non-overlapping paired | |
371 reads. Default = 1000 | |
372 --mutations=delsonly | |
373 Use this option to only track mutations that are of | |
374 interest. For CRAC data this is usually deletions | |
375 (--mutations=delsonly). For PAR-CLIP data this is | |
376 usually T-C mutations (--mutations=TC). Other options | |
377 are: do not report any mutations: --mutations=nomuts. | |
378 Only report specific base mutations, for example only | |
379 in T's, C's and G's :--mutations=[TCG]. The brackets | |
380 are essential. Other nucleotide combinations are also | |
381 possible | |
382 | |
383 </help> | |
384 </tool> |