comparison pyCRAC/pyReadAligner.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:19b20927172d
1 <tool id ="pyReadAligner" name="pyReadAligner">
2 <requirements>
3 <requirement type="package">pyCRAC</requirement>
4 </requirements>
5 <command interpreter="python">
6 /usr/local/bin/pyReadAligner.py
7 -f $ftype.input
8 --file_type $ftype.file_type
9 #if $geneOpt.alignGene == "gene":
10 -g $geneOpt.genes
11 #end if#
12 #if $geneOpt.alignGene == "chr":
13 --chr $geneOpt.chr
14 #end if#
15 #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":
16 --discarded $discarded
17 #end if#
18 --gtf=$addGTF.gtf
19 --tab=$addTab.tab
20 #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":
21 --align_quality=$ftype.addAlignOpt.align_quality
22 --align_score=$ftype.addAlignOpt.align_score
23 --distance=$ftype.addAlignOpt.d
24 --length=$ftype.addAlignOpt.length
25 #if int($ftype.addAlignOpt.max) > 0:
26 --max=$ftype.addAlignOpt.max
27 #end if#
28 $ftype.addAlignOpt.unique
29 $ftype.addAlignOpt.blocks
30 $ftype.addAlignOpt.mutations
31 #end if#
32 #if $addOpt.options == "edit":
33 --range=$addOpt.range
34 --overlap=$addOpt.overlap
35 $addOpt.ignore
36 -s $addOpt.sequence
37 #if int($addOpt.limit) > 0:
38 --limit=$addOpt.limit
39 #end if#
40 #end if#
41 -o $output
42 </command>
43 <version_command>/usr/local/bin/pyReadAligner.py --version</version_command>
44 <inputs>
45
46
47 <conditional name="geneOpt">
48 <param name="alignGene" type="select" label="Do you want to align reads to genes or chromosome co-ordinates?">
49 <option value="gene" selected="true">Genes</option>
50 <option value="chr">Chromosome Co-ordinates</option>
51 </param>
52 <when value="chr">
53 <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file contai\
54 ning an identifier, chromosome name, start position, end position and strand ('-' or '+')"/>
55 </when>
56 <when value="gene">
57 <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/>
58 </when>
59 </conditional>
60 <conditional name="addGTF">
61 <param name="gtfFile" type="select" label="Choose GTF File from">
62 <option value="default" selected="true">Defaults</option>
63 <option value="other">History</option>
64 </param>
65 <when value="default">
66 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
67 <options from_data_table="pycrac_gtf"/>
68 </param>
69 </when>
70 <when value="other">
71 <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
72 </when>
73 </conditional>
74 <conditional name="addTab">
75 <param name="tabFile" type="select" label="Choose Genomic Reference Sequence from">
76 <option value="default" selected="true">Defaults</option>
77 <option value="other">History</option>
78 </param>
79 <when value="default">
80 <param name="tab" type="select" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
81 <options from_data_table="pycrac_tab"/>
82 </param>
83 </when>
84 <when value="other">
85 <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
86 </when>
87 </conditional>
88
89
90 <conditional name="ftype">
91 <param name="file_type" type="select" label="Input File Type --file_type">
92 <option value="sam">Sam/BAM</option>
93 <option value="novo">Novo</option>
94 <option value="gtf">GTF</option>
95 </param>
96 <when value="sam">
97 <param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .bam"/>
98 <conditional name="disc">
99 <param name="discard" type="select" label="Print discarded reads to a separate file">
100 <option value="" selected="true">OFF</option>
101 <option value="discard">ON</option>
102 </param>
103 <when value="discard">
104 </when>
105 <when value="">
106 </when>
107 </conditional>
108 <conditional name="addAlignOpt">
109 <param name="alignoptions" type="select" label="Alignment Options">
110 <option value="default" selected="true">Default</option>
111 <option value="edit">Edit</option>
112 </param>
113 <when value="edit">
114 <param name="mutations" type="select" label="Filter reads by mutations --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
115 <option value="" selected="true">Off</option>
116 <option value="--mutations=delsonly">deletions</option>
117 <option value="--mutations=subsonly">substitutions</option>
118 <option value="--mutations=TC">T->C mutations</option>
119 <option value="--mutations=allmuts">all mutations</option>
120 <option value="--mutations=nomuts">no mutations</option>
121 </param>
122 <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
123 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
124 </param>
125 <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
126 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
127 </param>
128 <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
129 <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
130 </param>
131 <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
132 <validator type="in_range" min="1" message="Please enter a value >= 0"/>
133 </param>
134 <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
135 <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
136 </param>
137 <param name="unique" type="select" label="Remove reads with multiple alignment locations --unique">
138 <option value="" selected="true">OFF</option>
139 <option value="--unique">ON</option>
140 </param>
141 <param name="blocks" type="select" label="Only count reads with same start and end coords once --blocks">
142 <option value="" selected="true">OFF</option>
143 <option value="--blocks">ON</option>
144 </param>
145 </when>
146 <when value="default">
147 </when>
148 </conditional>
149 </when>
150 <when value="novo">
151 <param format="tabular" name="input" type="data" label="Input File -f" help="Alignment file of type .novo" />
152 <conditional name="disc">
153 <param name="discard" type="select" label="Print discarded reads to a separate file">
154 <option value="" selected="true">OFF</option>
155 <option value="discard">ON</option>
156 </param>
157 <when value="discard">
158 </when>
159 <when value="">
160 </when>
161 </conditional>
162 <conditional name="addAlignOpt">
163 <param name="alignoptions" type="select" label="Alignment Options">
164 <option value="default" selected="true">Default</option>
165 <option value="edit">Edit</option>
166 </param>
167 <when value="edit">
168 <param name="mutations" type="select" label="Filter reads by mutations --mutations" help="cross-linking sites are often
169 highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
170 <option value="" selected="true">Off</option>
171 <option value="--mutations=delsonly">deletions</option>
172 <option value="--mutations=subsonly">substitutions</option>
173 <option value="--mutations=TC">T->C mutations</option>
174 <option value="--mutations=allmuts">all mutations</option>
175 <option value="--mutations=nomuts">no mutations</option>
176 </param>
177 <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
178 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
179 </param>
180 <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
181 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
182 </param>
183 <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
184 <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
185 </param>
186 <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
187 <validator type="in_range" min="1" message="Please enter a value >= 0"/>
188 </param>
189 <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
190 <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
191 </param>
192 <param name="unique" type="select" label="Remove reads with multiple alignment locations --unique">
193 <option value="" selected="true">OFF</option>
194 <option value="--unique">ON</option>
195 </param>
196 <param name="blocks" type="select" label="Only count reads with same start and end coords once --blocks">
197 <option value="" selected="true">OFF</option>
198 <option value="--blocks">ON</option>
199 </param>
200 </when>
201 <when value="default">
202 </when>
203 </conditional>
204 </when>
205 <when value="gtf">
206 <param format="gtf" name="input" type="data" label="Input File -f" help="File of type .gtf" />
207 </when>
208 </conditional>
209
210 <conditional name="addOpt">
211 <param name="options" type="select" label="Standard Options">
212 <option value="default" selected="true">Default</option>
213 <option value="edit">Edit</option>
214 </param>
215 <when value="edit">
216 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
217 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
218 </param>
219 <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
220 <option value="" selected="true">No</option>
221 <option value="--ignorestrand">Yes</option>
222 </param>
223 <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
224 <validator type="in_range" min="1" message="Please enter a positive integer"/>
225 </param>
226 <param name="sequence" type="select" label="Align reads to --sequence">
227 <option value="genomic" selected="true">Genomic Sequence</option>
228 <option value="coding">Coding Sequence</option>
229 </param>
230 <param format="integer" name="limit" type="integer" label="Limit number of reads to count that map to a particular region --limit" value="0" size="15" help="Set to 0 for unlimited reads" >
231 <validator type="in_range" min="0" message="Please enter a value greater than 1 or set to 0 for unlimited reads"/>
232 </param>
233 </when>
234 <when value="default">
235 </when>
236 </conditional>
237 <param name="label" type="text" format="txt" size="30" value="pyReadAligner" label="Enter output file label -o" />
238 </inputs>
239 <outputs>
240 <data format="fasta" name="output" label="${label.value}.aligned.fasta"/>
241 <data format="txt" name="discarded" label="${label.value}_discarded.txt">
242 <filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] == "discard"</filter>
243 </data>
244 </outputs>
245 <help>
246
247
248 .. class:: infomark
249
250 **pyReadAligner**
251
252 pyReadAligner is part of the pyCRAC_ package. Generates multiple sequence alignments for reads mapped to individual genes or genomic regions.
253 Produces a fasta output file.
254
255
256 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
257
258 ------
259
260 **Parameter list**
261
262 File input options::
263
264 -f FILE, --input_file=FILE
265 As input files you can use Novoalign native output or
266 SAM files as input file. By default it expects data
267 from the standard input. Make sure to specify the file
268 type of the file you want to have analyzed using the
269 --file_type option!
270 -o OUTPUT_FILE, --output_file=OUTPUT_FILE
271 Use this flag to override the standard output file
272 names. All alignments will be written to one output
273 file.
274 -g FILE, --genes_file=FILE
275 here you need to type in the name of your gene list
276 file (1 column) or the hittable file
277 --chr=FILE
278 if you simply would like to align reads against a
279 genomic sequence you should generate a tab delimited
280 file containing an identifyer, chromosome name, start
281 position, end position and strand
282 --gtf=annotation_file.gtf
283 type the path to the gtf annotation file that you want
284 to use
285 --tab=tab_file.tab
286 type the path to the tab file that contains the
287 genomic reference sequence
288 --file_type=FILE_TYPE
289 use this option to specify the file type (i.e. 'novo',
290 'sam', 'gtf'). This will tell the program which
291 parsers to use for processing the files. Default =
292 'novo'
293
294 pyReadAligner specific options::
295
296 --limit=500
297 with this option you can select how many reads mapped
298 to a particular gene/ORF/region you want to count.
299 Default = All
300
301 Common options::
302
303 --ignorestrand
304 this flag tells the program to ignore strand
305 information and all overlapping reads will considered
306 sense reads. Useful for analysing ChIP or RIP data
307 --overlap=1
308 sets the number of nucleotides a read has to overlap
309 with a gene before it is considered a hit. Default =
310 1 nucleotide
311 -s genomic, --sequence=genomic
312 with this option you can select whether you want the
313 reads aligned to the genomic or the coding sequence.
314 Default = genomic
315 -r 100, --range=100
316 allows you to set the length of the UTR regions. If
317 you set '-r 50' or '--range=50', then the program will
318 set a fixed length (50 bp) regardless of whether the
319 GTF file has genes with annotated UTRs.
320
321 Options for novo, SAM and BAM files::
322
323 --align_quality=100, --mapping_quality=100
324 with these options you can set the alignment quality
325 (Novoalign) or mapping quality (SAM) threshold. Reads
326 with qualities lower than the threshold will be
327 ignored. Default = 0
328 --align_score=100
329 with this option you can set the alignment score
330 threshold. Reads with alignment scores lower than the
331 threshold will be ignored. Default = 0
332 -l 100, --length=100
333 to set read length threshold. Default = 1000
334 -m 100000, --max=100000
335 maximum number of mapped reads that will be analyzed.
336 Default = All
337 --unique
338 with this option reads with multiple alignment
339 locations will be removed. Default = Off
340 --blocks
341 with this option reads with the same start and end
342 coordinates on a chromosome will only be counted once.
343 Default = Off
344 --discarded=FILE
345 prints the lines from the alignments file that were
346 discarded by the parsers. This file contains reads
347 that were unmapped (NM), of poor quality (i.e. QC) or
348 paired reads that were mapped to different chromosomal
349 locations or were too far apart on the same
350 chromosome. Useful for debugging purposes
351 -d 1000, --distance=1000
352 this option allows you to set the maximum number of
353 base-pairs allowed between two non-overlapping paired
354 reads. Default = 1000
355 --mutations=delsonly
356 Use this option to only track mutations that are of
357 interest. For CRAC data this is usually deletions
358 (--mutations=delsonly). For PAR-CLIP data this is
359 usually T-C mutations (--mutations=TC). Other options
360 are: do not report any mutations: --mutations=nomuts.
361 Only report specific base mutations, for example only
362 in T's, C's and G's :--mutations=[TCG]. The brackets
363 are essential. Other nucleotide combinations are also
364 possible
365
366
367 </help>
368 </tool>