comparison pyCRAC/pyPileup.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:19b20927172d
1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id ="pyPileup" name="pyPileup">
3 <requirements>
4 <requirement type="package">pyCRAC</requirement>
5 </requirements>
6 <command interpreter="python">
7 /usr/local/bin/pyPileup.py
8 -f $ftype.input
9 --file_type $ftype.file_type
10 #if $geneOpt.alignGene == "gene":
11 -g $geneOpt.genes
12 #end if#
13 #if $geneOpt.alignGene == "chr":
14 --chr $geneOpt.chr
15 #end if#
16 #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":
17 --discarded $discarded
18 #end if#
19 --gtf=$addGTF.gtf
20 --tab=$addTab.tab
21 #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":
22 --align_quality=$ftype.addAlignOpt.align_quality
23 --align_score=$ftype.addAlignOpt.align_score
24 --distance=$ftype.addAlignOpt.d
25 --length=$ftype.addAlignOpt.length
26 #if int($ftype.addAlignOpt.max) > 0:
27 --max=$ftype.addAlignOpt.max
28 #end if#
29 $ftype.addAlignOpt.unique
30 $ftype.addAlignOpt.blocks
31 $ftype.addAlignOpt.mutations
32 #if $ftype.disc.discard == "--discarded":
33 --discarded $discarded
34 #end if#
35 #end if#
36 #if $addOpt.options == "edit":
37 --range=$addOpt.range
38 --overlap=$addOpt.overlap
39 $addOpt.iclip
40 $addOpt.ignore
41 -s $addOpt.sequence
42 #if int($addOpt.limit) > 0:
43 --limit=$addOpt.limit
44 #end if#
45 #end if#
46 -o $output
47 </command>
48 <version_command>/usr/local/bin/pyPileup.py --version</version_command>
49 <inputs>
50
51
52 <conditional name="geneOpt">
53 <param name="alignGene" type="select" label="Do you want to align reads to genes or chromosome co-ordinates?">
54 <option value="gene" selected="true">Genes</option>
55 <option value="chr">Chromosome Co-ordinates</option>
56 </param>
57 <when value="chr">
58 <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file containing an identifier, chromosome name, start position, end position and strand ('-' or '+')"/>
59 </when>
60 <when value="gene">
61 <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/>
62 </when>
63 </conditional>
64 <conditional name="addGTF">
65 <param name="gtfFile" type="select" label="Choose GTF File from">
66 <option value="default" selected="true">Defaults</option>
67 <option value="other">History</option>
68 </param>
69 <when value="default">
70 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
71 <options from_data_table="pycrac_gtf"/>
72 </param>
73 </when>
74 <when value="other">
75 <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
76 </when>
77 </conditional>
78 <conditional name="addTab">
79 <param name="tabFile" type="select" label="Choose Genomic Reference Sequence from">
80 <option value="default" selected="true">Defaults</option>
81 <option value="other">History</option>
82 </param>
83 <when value="default">
84 <param name="tab" type="select" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
85 <options from_data_table="pycrac_tab"/>
86 </param>
87 </when>
88 <when value="other">
89 <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
90 </when>
91 </conditional>
92
93
94 <conditional name="ftype">
95 <param name="file_type" type="select" label="Input File Type --file_type">
96 <option value="novo" selected="true">Novo</option>
97 <option value="sam">Sam/BAM</option>
98 <option value="gtf">GTF</option>
99 </param>
100 <when value="sam">
101 <param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .bam" />
102 <conditional name="disc">
103 <param name="discard" type="select" label="Print discarded reads to a separate file">
104 <option value="" selected="true">OFF</option>
105 <option value="discard">ON</option>
106 </param>
107 <when value="discard">
108 </when>
109 <when value="">
110 </when>
111 </conditional>
112 <conditional name="addAlignOpt">
113 <param name="alignoptions" type="select" label="Alignment Options">
114 <option value="default" selected="true">Default</option>
115 <option value="edit">Edit</option>
116 </param>
117 <when value="edit">
118 <param name="mutations" type="select" label="Filter reads by mutations --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
119 <option value="" selected="true">Off</option>
120 <option value="--mutations=delsonly">deletions</option>
121 <option value="--mutations=subsonly">substitutions</option>
122 <option value="--mutations=TC">T->C mutations</option>
123 <option value="--mutations=allmuts">all mutations</option>
124 <option value="--mutations=nomuts">no mutations</option>
125 </param>
126 <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
127 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
128 </param>
129 <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
130 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
131 </param>
132 <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
133 <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
134 </param>
135 <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
136 <validator type="in_range" min="1" message="Please enter a value >= 0"/>
137 </param>
138 <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
139 <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
140 </param>
141 <param name="unique" type="select" label="Remove reads with multiple alignment locations --unique">
142 <option value="" selected="true">OFF</option>
143 <option value="--unique">ON</option>
144 </param>
145 <param name="blocks" type="select" label="Only count reads with same start and end coords once --blocks">
146 <option value="" selected="true">OFF</option>
147 <option value="--blocks">ON</option>
148 </param>
149 </when>
150 <when value="default">
151 </when>
152 </conditional>
153 </when>
154 <when value="novo">
155 <param format="tabular" name="input" type="data" label="Input File -f" help="Alignment file of type .novo" />
156 <conditional name="disc">
157 <param name="discard" type="select" label="Print discarded reads to a separate file">
158 <option value="" selected="true">OFF</option>
159 <option value="discard">ON</option>
160 </param>
161 <when value="discard">
162 </when>
163 <when value="">
164 </when>
165 </conditional>
166 <conditional name="addAlignOpt">
167 <param name="alignoptions" type="select" label="Alignment Options">
168 <option value="default" selected="true">Default</option>
169 <option value="edit">Edit</option>
170 </param>
171 <when value="edit">
172 <param name="mutations" type="select" label="Filter reads by mutations --mutations" help="cross-linking sites are often
173 highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
174 <option value="" selected="true">Off</option>
175 <option value="--mutations=delsonly">deletions</option>
176 <option value="--mutations=subsonly">substitutions</option>
177 <option value="--mutations=TC">T->C mutations</option>
178 <option value="--mutations=allmuts">all mutations</option>
179 <option value="--mutations=nomuts">no mutations</option>
180 </param>
181 <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
182 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
183 </param>
184 <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
185 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
186 </param>
187 <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
188 <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
189 </param>
190 <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
191 <validator type="in_range" min="1" message="Please enter a value >= 0"/>
192 </param>
193 <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
194 <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
195 </param>
196 <param name="unique" type="select" label="Remove reads with multiple alignment locations --unique">
197 <option value="" selected="true">OFF</option>
198 <option value="--unique">ON</option>
199 </param>
200 <param name="blocks" type="select" label="Only count reads with same start and end coords once --blocks">
201 <option value="" selected="true">OFF</option>
202 <option value="--blocks">ON</option>
203 </param>
204 </when>
205 <when value="default">
206 </when>
207 </conditional>
208 </when>
209 <when value="gtf">
210 <param format="gtf" name="input" type="data" label="Input File -f" help="File of type .gtf" />
211 </when>
212 </conditional>
213
214 <conditional name="addOpt">
215 <param name="options" type="select" label="Standard Options">
216 <option value="default" selected="true">Default</option>
217 <option value="edit">Edit</option>
218 </param>
219 <when value="edit">
220 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
221 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
222 </param>
223 <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
224 <option value="" selected="true">No</option>
225 <option value="--ignorestrand">Yes</option>
226 </param>
227 <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
228 <validator type="in_range" min="1" message="Please enter a positive integer"/>
229 </param>
230 <param name="sequence" type="select" label="Align reads to --sequence">
231 <option value="genomic" selected="true">Genomic Sequence</option>
232 <option value="coding">Coding Sequence</option>
233 </param>
234 <param name="iclip" type="select" label="iCLIP mode --iCLIP">
235 <option value="" selected="true">OFF</option>
236 <option value="--iCLIP">ON</option>
237 </param>
238 <param format="integer" name="limit" type="integer" label="Limit number of reads to count that map to a particular region --limit" value="0" size="15" help="Set to 0 for unlimited reads" >
239 <validator type="in_range" min="0" message="Please enter a value greater than 1 or set to 0 for unlimited reads"/>
240 </param>
241 </when>
242 <when value="default">
243 </when>
244 </conditional>
245 <param name="label" type="text" format="txt" size="30" value="pyPileup" label="Enter output file label -o" />
246 </inputs>
247 <outputs>
248 <data format="tabular" name="output" label="${label.value}.pileup"/>
249 <data format="txt" name="discarded" label="${label.value}_discarded.txt">
250 <filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] == "discard"</filter>
251 </data>
252 </outputs>
253 <help>
254
255
256 .. class:: infomark
257
258 **pyPileup**
259
260 pyPileup is part of the pyCRAC_ package. Produces pileups containing the number of hits, substitutions and deletions for each nucleotide covered by
261 reads in specific genes or genomic regions
262
263 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
264
265 ------
266
267 **Parameter list**
268
269 File input options::
270
271 -f FILE, --input_file=FILE
272 As input files you can use Novoalign native output,
273 SAM, pyMotif or pyReadCounters GTF files as input
274 file. By default it expects data from the standard
275 input. Make sure to specify the file type of the file
276 you want to have analyzed using the --file_type
277 option!
278 -o OUTPUT_FILE, --output_file=OUTPUT_FILE
279 Use this flag to override the standard output file
280 names. All pileups will be written to one output file.
281 -g FILE, --genes_file=FILE
282 here you need to type in the name of your gene list
283 file (1 column) or the hittable file
284 --chr=FILE
285 if you simply would like to align reads against a
286 genomic sequence you should generate a tab delimited
287 file containing an identifyer, chromosome name, start
288 position, end position and strand
289 --gtf=annotation_file.gtf
290 type the path to the gtf annotation file that you want
291 to use
292 --tab=tab_file.tab
293 type the path to the tab file that contains the
294 genomic reference sequence
295 --file_type=FILE_TYPE
296 use this option to specify the file type (i.e. 'novo',
297 'sam', 'gtf'). This will tell the program which
298 parsers to use for processing the files. Default =
299 'novo'
300
301 pyPileup specific options::
302
303 --limit=500
304 with this option you can select how many reads mapped
305 to a particular gene/ORF/region you want to count.
306 Default = All
307 --iCLIP
308 This turns on the iCLIP mode and the pileups will
309 report cross-linking site frequencies in iCLIP data in
310 reference sequences
311
312 Common options::
313
314 -v, --verbose
315 prints all the status messages to a file rather than
316 the standard output
317 --ignorestrand
318 this flag tells the program to ignore strand
319 information and all overlapping reads will considered
320 sense reads. Useful for analysing ChIP or RIP data
321 --zip=FILE
322 use this option to compress all the output files in a
323 single zip file
324 --overlap=1
325 sets the number of nucleotides a read has to overlap
326 with a gene before it is considered a hit. Default =
327 1 nucleotide
328 -s genomic, --sequence=genomic
329 with this option you can select whether you want the
330 reads aligned to the genomic or the coding sequence.
331 Default = genomic
332 -r 100, --range=100
333 allows you to set the length of the UTR regions. If
334 you set '-r 50' or '--range=50', then the program will
335 set a fixed length (50 bp) regardless of whether the
336 GTF file has genes with annotated UTRs.
337
338 Options for novo, SAM and BAM files::
339
340 --align_quality=100, --mapping_quality=100
341 with these options you can set the alignment quality
342 (Novoalign) or mapping quality (SAM) threshold. Reads
343 with qualities lower than the threshold will be
344 ignored. Default = 0
345 --align_score=100
346 with this option you can set the alignment score
347 threshold. Reads with alignment scores lower than the
348 threshold will be ignored. Default = 0
349 -l 100, --length=100
350 to set read length threshold. Default = 1000
351 -m 100000, --max=100000
352 maximum number of mapped reads that will be analyzed.
353 Default = All
354 --unique
355 with this option reads with multiple alignment
356 locations will be removed. Default = Off
357 --blocks
358 with this option reads with the same start and end
359 coordinates on a chromosome will only be counted once.
360 Default = Off
361 --discarded=FILE
362 prints the lines from the alignments file that were
363 discarded by the parsers. This file contains reads
364 that were unmapped (NM), of poor quality (i.e. QC) or
365 paired reads that were mapped to different chromosomal
366 locations or were too far apart on the same
367 chromosome. Useful for debugging purposes
368 -d 1000, --distance=1000
369 this option allows you to set the maximum number of
370 base-pairs allowed between two non-overlapping paired
371 reads. Default = 1000
372 --mutations=delsonly
373 Use this option to only track mutations that are of
374 interest. For CRAC data this is usually deletions
375 (--mutations=delsonly). For PAR-CLIP data this is
376 usually T-C mutations (--mutations=TC). Other options
377 are: do not report any mutations: --mutations=nomuts.
378 Only report specific base mutations, for example only
379 in T's, C's and G's :--mutations=[TCG]. The brackets
380 are essential. Other nucleotide combinations are also
381 possible
382
383 </help>
384 </tool>