comparison pyCRAC/pyBinCollector.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:19b20927172d
1 <tool id ="pyBinCollector" name="pyBinCollector">
2 <requirements>
3 <requirement type="package">pyCRAC</requirement>
4 </requirements>
5 <command interpreter="perl">
6 pyBinCollector.pl
7 -f $input
8 --gtf $addGTF.gtf
9 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
10 --annotation $addGTF.annotate.scan.annotation
11 #else:
12 --annotation $addGTF.annotate.annotation
13 #end if#
14 #if $addOpt.options == "edit":
15 --options
16 --range $addOpt.range
17 --min_length $addOpt.min_length
18 --max_length $addOpt.max_length
19 --numberofbins $addOpt.numberofbins
20 -s $addOpt.sequence
21 #if $addOpt.limitBins.binselect == "yes":
22 --bins1 $addOpt.limitBins.bs_first
23 --bins2 $addOpt.limitBins.bs_last
24 #end if#
25 $addOpt.ignore
26 $addOpt.oall.outputall
27 #end if#
28 -o "$input.name"
29 #if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall":
30 --id $sd.id
31 --sd $sd
32 --ssub $ssub
33 --sdel $sdel
34 --asd $asd
35 --assub $assub
36 --asdel $asdel
37 #else:
38 --out $out
39 --id $out.id
40 #end if#
41 </command>
42 <version_command>/usr/local/bin/pyBinCollector.py --version</version_command>
43 <inputs>
44 <param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" />
45
46 <conditional name="addGTF">
47 <param name="gtfFile" type="select" label="Choose GTF File from">
48 <option value="default" selected="true">Defaults</option>
49 <option value="other">History</option>
50 </param>
51 <when value="default">
52 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
53 <options from_data_table="pycrac_gtf"/>
54 </param>
55
56 <conditional name="annotate">
57 <param name="annotations" type="select" label="Select annotation">
58 <option value="all" selected="true">All</option>
59 <option value="manual">Enter in text box</option>
60 <option value="auto">Scan pyGetGTFSources file</option>
61 </param>
62 <when value="all">
63 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
64 </when>
65 <when value="manual">
66 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
67 <validator type="empty_field" message="Please enter a value"/>
68 </param>
69 </when>
70 <when value="auto">
71 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
72 <conditional name="scan">
73 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
74 <option value="wait" selected="true">Waiting</option>
75 <option value="scanning">Go</option>
76 </param>
77 <when value="wait">
78 </when>
79 <when value="scanning">
80 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
81 <options from_dataset="gtf_annotation">
82 <column name="name" index="0"/>
83 <column name="value" index="0"/>
84 </options>
85 </param>
86 </when>
87 </conditional>
88 </when>
89 </conditional>
90
91 </when>
92 <when value="other">
93 <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
94 <conditional name="annotate">
95 <param name="annotations" type="select" label="Select annotation">
96 <option value="all" selected="true">All</option>
97 <option value="manual">Enter in text box</option>
98 <option value="auto">Scan selected file</option>
99 </param>
100 <when value="all">
101 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
102 </when>
103 <when value="manual">
104 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
105 <validator type="empty_field" message="Please enter a value"/>
106 </param>
107 </when>
108 <when value="auto">
109 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
110 <options from_dataset="gtf">
111 <column name="name" index="1"/>
112 <column name="value" index="1"/>
113 <filter type="unique_value" name="unique" column="1"/>
114 </options>
115 </param>
116 </when>
117 </conditional>
118 </when>
119 </conditional>
120
121
122 <conditional name="addOpt">
123 <param name="options" type="select" label="Options">
124 <option value="default" selected="true">Default</option>
125 <option value="edit">Edit</option>
126 </param>
127 <when value="edit">
128 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
129 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
130 </param>
131 <param format="integer" name="numberofbins" type="integer" label="Set the number of bins --numberofbins" value="20" size="7" help="Set the number of bins you want to divide the genes into">
132 <validator type="in_range" min="20" max="1000" message="Please enter a value between 20 and 1000"/>
133 </param>
134 <param format="integer" name="min_length" type="integer" label="Set the minimum gene length (nt) --min_length" value="50" size="7" help="To filter the data for gene length (nucleotides)" >
135 <validator type="in_range" min="20" message="Please enter a value greater than 20"/>
136 </param>
137 <param format="integer" name="max_length" type="integer" label="Set the maximum gene length (nt) --max_length" help="Default = 100000000" value="100000000" size="10" >
138 <validator type="in_range" min="50" max="100000000" message="Please enter a value between 50 and 100000000"/>
139 </param>
140 <param name="sequence" type="select" label="What sequences do you want to run pyBinCollector on? --sequence">
141 <option value="genomic" selected="true">Genomic Sequence</option>
142 <option value="coding">Coding Sequence</option>
143 <option value="intron">Introns</option>
144 <option value="exon">Exons</option>
145 <option value="CDS">CDS</option>
146 <option value="5UTR">5UTR</option>
147 <option value="3UTR">3UTR</option>
148 </param>
149 <conditional name="limitBins">
150 <param name="binselect" type="select" label="Select sequences that map to specific bins --binselect">
151 <option value="no" selected="true">No</option>
152 <option value="yes">Yes</option>
153 </param>
154 <when value="yes">
155 <param format="integer" name="bs_first" type="integer" label="Select First Bin" value="1" size="7">
156 <validator type="in_range" min="1" message="Please enter a value greater than 0"/>
157 </param>
158 <param format="integer" name="bs_last" type="integer" label="Select Last Bin" value="2" size="7">
159 <validator type="in_range" min="2" message="Please enter a value greater than 0"/>
160 </param>
161 </when>
162 <when value="no">
163 </when>
164 </conditional>
165 <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
166 <option value="" selected="true">No</option>
167 <option value="--ignorestrand">Yes</option>
168 </param>
169 <conditional name="oall">
170 <param name="outputall" type="select" label="Output all genes --outputall" help="output the normalized distribution for each individual gene, rather than making a cumulative coverage plot">
171 <option value="" selected="true">No</option>
172 <option value="--outputall">Yes</option>
173 </param>
174 <when value="--outputall"/>
175 <when value=""/>
176 </conditional>
177 </when>
178 <when value="default">
179 </when>
180 </conditional>
181 <param name="label" type="text" format="txt" size="30" value="pyBinCollector" label="Enter output file label -o" />
182 </inputs>
183
184 <outputs>
185 <data format="gtf" name="out" label="${label.value}.gtf">
186 <filter>addOpt['oall']['outputall'] == ""</filter>
187 </data>
188 <data format="txt" name="sd" label="sense_data_${label.value}.txt">
189 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
190 </data>
191 <data format="txt" name="ssub" label="sense_subs_${label.value}.txt">
192 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
193 </data>
194 <data format="txt" name="sdel" label="sense_dels_${label.value}.txt">
195 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
196 </data>
197 <data format="txt" name="asd" label="anti_sense_data_${label.value}.txt">
198 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
199 </data>
200 <data format="txt" name="assub" label="anti_sense_subs_${label.value}.txt">
201 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
202 </data>
203 <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt">
204 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
205 </data>
206 </outputs>
207 <help>
208
209
210 .. class:: infomark
211
212 **pyBinCollector**
213
214 pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a
215 fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract
216 blocks/clusters present in these bins.
217
218
219 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
220
221 ------
222
223 **Parameter list**
224
225 File input options::
226
227 -f FILE, --input_file=FILE
228 Provide the path and name of the pyReadCounters.py or
229 pyMotif.py GTF file. By default the program expects
230 data from the standard input.
231 -o OUTPUT_FILE, --output_file=OUTPUT_FILE
232 To set an output file name. Do not add a file
233 extension. By default, if the --outputall flag is not
234 used, the program writes to the standard output.
235 --gtf=yeast.gtf
236 type the path to the gtf annotation file that you want
237 to use. Default is /usr/local/pyCRAC/db/Saccharomyces_
238 cerevisiae.EF2.59.1.2.gtf
239
240 pyBinCollector.py specific options::
241
242 -a protein_coding, --annotation=protein_coding
243 select which annotation (i.e. protein_coding, ncRNA,
244 sRNA, rRNA, tRNA, snoRNA, all) you would like to focus
245 your search on. Default = all
246 --min_length=20
247 to set a minimum length threshold for genes. Genes
248 shorter than the minimal length will be discarded.
249 Default = 1
250 --max_length=10000
251 to set a maximum length threshold for genes. Genes
252 larger than the maximum length will be discarded.
253 Default = 100000000
254 -n 20, --numberofbins=20
255 select the number of bins you want to generate.
256 Default=20
257 --binselect=2 4
258 allows selection of sequences that were mapped to
259 specific bins. This option expects two numbers, one
260 for each bin, separated by a space. For example:
261 --binselect 20 30.
262 --outputall
263 use this flag to output the normalized distribution
264 for each individual gene, rather than making a
265 cumulative coverage plot. Useful for making box plots
266 or for making heat maps.
267
268 Common options::
269
270 -r 100, --range=100
271 allows you to set the length of the UTR regions. If
272 you set '-r 50' or '--range=50', then the program will
273 set a fixed length (50 bp) regardless of whether the
274 GTF file has genes with annotated UTRs.
275 -s intron, --sequence=intron
276 with this option you can select whether you want to
277 generate bins from the coding or genomic sequence or
278 introns,exon,CDS, or UTR coordinates. Default =
279 genomic
280 --ignorestrand
281 To ignore strand information and all reads overlapping
282 with genomic features will be considered sense reads.
283 Useful for analysing ChIP or RIP data
284
285
286
287
288
289 </help>
290 </tool>