0
|
1 <tool id ="pyBinCollector" name="pyBinCollector">
|
|
2 <requirements>
|
|
3 <requirement type="package">pyCRAC</requirement>
|
|
4 </requirements>
|
|
5 <command interpreter="perl">
|
|
6 pyBinCollector.pl
|
|
7 -f $input
|
|
8 --gtf $addGTF.gtf
|
|
9 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
|
|
10 --annotation $addGTF.annotate.scan.annotation
|
|
11 #else:
|
|
12 --annotation $addGTF.annotate.annotation
|
|
13 #end if#
|
|
14 #if $addOpt.options == "edit":
|
|
15 --options
|
|
16 --range $addOpt.range
|
|
17 --min_length $addOpt.min_length
|
|
18 --max_length $addOpt.max_length
|
|
19 --numberofbins $addOpt.numberofbins
|
|
20 -s $addOpt.sequence
|
|
21 #if $addOpt.limitBins.binselect == "yes":
|
|
22 --bins1 $addOpt.limitBins.bs_first
|
|
23 --bins2 $addOpt.limitBins.bs_last
|
|
24 #end if#
|
|
25 $addOpt.ignore
|
|
26 $addOpt.oall.outputall
|
|
27 #end if#
|
|
28 -o "$input.name"
|
|
29 #if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall":
|
|
30 --id $sd.id
|
|
31 --sd $sd
|
|
32 --ssub $ssub
|
|
33 --sdel $sdel
|
|
34 --asd $asd
|
|
35 --assub $assub
|
|
36 --asdel $asdel
|
|
37 #else:
|
|
38 --out $out
|
|
39 --id $out.id
|
|
40 #end if#
|
|
41 </command>
|
|
42 <version_command>/usr/local/bin/pyBinCollector.py --version</version_command>
|
|
43 <inputs>
|
|
44 <param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" />
|
|
45
|
|
46 <conditional name="addGTF">
|
|
47 <param name="gtfFile" type="select" label="Choose GTF File from">
|
|
48 <option value="default" selected="true">Defaults</option>
|
|
49 <option value="other">History</option>
|
|
50 </param>
|
|
51 <when value="default">
|
|
52 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
|
|
53 <options from_data_table="pycrac_gtf"/>
|
|
54 </param>
|
|
55
|
|
56 <conditional name="annotate">
|
|
57 <param name="annotations" type="select" label="Select annotation">
|
|
58 <option value="all" selected="true">All</option>
|
|
59 <option value="manual">Enter in text box</option>
|
|
60 <option value="auto">Scan pyGetGTFSources file</option>
|
|
61 </param>
|
|
62 <when value="all">
|
|
63 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
|
|
64 </when>
|
|
65 <when value="manual">
|
|
66 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
|
|
67 <validator type="empty_field" message="Please enter a value"/>
|
|
68 </param>
|
|
69 </when>
|
|
70 <when value="auto">
|
|
71 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
|
|
72 <conditional name="scan">
|
|
73 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
|
|
74 <option value="wait" selected="true">Waiting</option>
|
|
75 <option value="scanning">Go</option>
|
|
76 </param>
|
|
77 <when value="wait">
|
|
78 </when>
|
|
79 <when value="scanning">
|
|
80 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
|
|
81 <options from_dataset="gtf_annotation">
|
|
82 <column name="name" index="0"/>
|
|
83 <column name="value" index="0"/>
|
|
84 </options>
|
|
85 </param>
|
|
86 </when>
|
|
87 </conditional>
|
|
88 </when>
|
|
89 </conditional>
|
|
90
|
|
91 </when>
|
|
92 <when value="other">
|
|
93 <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
|
|
94 <conditional name="annotate">
|
|
95 <param name="annotations" type="select" label="Select annotation">
|
|
96 <option value="all" selected="true">All</option>
|
|
97 <option value="manual">Enter in text box</option>
|
|
98 <option value="auto">Scan selected file</option>
|
|
99 </param>
|
|
100 <when value="all">
|
|
101 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
|
|
102 </when>
|
|
103 <when value="manual">
|
|
104 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
|
|
105 <validator type="empty_field" message="Please enter a value"/>
|
|
106 </param>
|
|
107 </when>
|
|
108 <when value="auto">
|
|
109 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
|
|
110 <options from_dataset="gtf">
|
|
111 <column name="name" index="1"/>
|
|
112 <column name="value" index="1"/>
|
|
113 <filter type="unique_value" name="unique" column="1"/>
|
|
114 </options>
|
|
115 </param>
|
|
116 </when>
|
|
117 </conditional>
|
|
118 </when>
|
|
119 </conditional>
|
|
120
|
|
121
|
|
122 <conditional name="addOpt">
|
|
123 <param name="options" type="select" label="Options">
|
|
124 <option value="default" selected="true">Default</option>
|
|
125 <option value="edit">Edit</option>
|
|
126 </param>
|
|
127 <when value="edit">
|
|
128 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
|
|
129 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
|
|
130 </param>
|
|
131 <param format="integer" name="numberofbins" type="integer" label="Set the number of bins --numberofbins" value="20" size="7" help="Set the number of bins you want to divide the genes into">
|
|
132 <validator type="in_range" min="20" max="1000" message="Please enter a value between 20 and 1000"/>
|
|
133 </param>
|
|
134 <param format="integer" name="min_length" type="integer" label="Set the minimum gene length (nt) --min_length" value="50" size="7" help="To filter the data for gene length (nucleotides)" >
|
|
135 <validator type="in_range" min="20" message="Please enter a value greater than 20"/>
|
|
136 </param>
|
|
137 <param format="integer" name="max_length" type="integer" label="Set the maximum gene length (nt) --max_length" help="Default = 100000000" value="100000000" size="10" >
|
|
138 <validator type="in_range" min="50" max="100000000" message="Please enter a value between 50 and 100000000"/>
|
|
139 </param>
|
|
140 <param name="sequence" type="select" label="What sequences do you want to run pyBinCollector on? --sequence">
|
|
141 <option value="genomic" selected="true">Genomic Sequence</option>
|
|
142 <option value="coding">Coding Sequence</option>
|
|
143 <option value="intron">Introns</option>
|
|
144 <option value="exon">Exons</option>
|
|
145 <option value="CDS">CDS</option>
|
|
146 <option value="5UTR">5UTR</option>
|
|
147 <option value="3UTR">3UTR</option>
|
|
148 </param>
|
|
149 <conditional name="limitBins">
|
|
150 <param name="binselect" type="select" label="Select sequences that map to specific bins --binselect">
|
|
151 <option value="no" selected="true">No</option>
|
|
152 <option value="yes">Yes</option>
|
|
153 </param>
|
|
154 <when value="yes">
|
|
155 <param format="integer" name="bs_first" type="integer" label="Select First Bin" value="1" size="7">
|
|
156 <validator type="in_range" min="1" message="Please enter a value greater than 0"/>
|
|
157 </param>
|
|
158 <param format="integer" name="bs_last" type="integer" label="Select Last Bin" value="2" size="7">
|
|
159 <validator type="in_range" min="2" message="Please enter a value greater than 0"/>
|
|
160 </param>
|
|
161 </when>
|
|
162 <when value="no">
|
|
163 </when>
|
|
164 </conditional>
|
|
165 <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
|
|
166 <option value="" selected="true">No</option>
|
|
167 <option value="--ignorestrand">Yes</option>
|
|
168 </param>
|
|
169 <conditional name="oall">
|
|
170 <param name="outputall" type="select" label="Output all genes --outputall" help="output the normalized distribution for each individual gene, rather than making a cumulative coverage plot">
|
|
171 <option value="" selected="true">No</option>
|
|
172 <option value="--outputall">Yes</option>
|
|
173 </param>
|
|
174 <when value="--outputall"/>
|
|
175 <when value=""/>
|
|
176 </conditional>
|
|
177 </when>
|
|
178 <when value="default">
|
|
179 </when>
|
|
180 </conditional>
|
|
181 <param name="label" type="text" format="txt" size="30" value="pyBinCollector" label="Enter output file label -o" />
|
|
182 </inputs>
|
|
183
|
|
184 <outputs>
|
|
185 <data format="gtf" name="out" label="${label.value}.gtf">
|
|
186 <filter>addOpt['oall']['outputall'] == ""</filter>
|
|
187 </data>
|
|
188 <data format="txt" name="sd" label="sense_data_${label.value}.txt">
|
|
189 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
|
|
190 </data>
|
|
191 <data format="txt" name="ssub" label="sense_subs_${label.value}.txt">
|
|
192 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
|
|
193 </data>
|
|
194 <data format="txt" name="sdel" label="sense_dels_${label.value}.txt">
|
|
195 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
|
|
196 </data>
|
|
197 <data format="txt" name="asd" label="anti_sense_data_${label.value}.txt">
|
|
198 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
|
|
199 </data>
|
|
200 <data format="txt" name="assub" label="anti_sense_subs_${label.value}.txt">
|
|
201 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
|
|
202 </data>
|
|
203 <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt">
|
|
204 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
|
|
205 </data>
|
|
206 </outputs>
|
|
207 <help>
|
|
208
|
|
209
|
|
210 .. class:: infomark
|
|
211
|
|
212 **pyBinCollector**
|
|
213
|
|
214 pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a
|
|
215 fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract
|
|
216 blocks/clusters present in these bins.
|
|
217
|
|
218
|
|
219 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
|
|
220
|
|
221 ------
|
|
222
|
|
223 **Parameter list**
|
|
224
|
|
225 File input options::
|
|
226
|
|
227 -f FILE, --input_file=FILE
|
|
228 Provide the path and name of the pyReadCounters.py or
|
|
229 pyMotif.py GTF file. By default the program expects
|
|
230 data from the standard input.
|
|
231 -o OUTPUT_FILE, --output_file=OUTPUT_FILE
|
|
232 To set an output file name. Do not add a file
|
|
233 extension. By default, if the --outputall flag is not
|
|
234 used, the program writes to the standard output.
|
|
235 --gtf=yeast.gtf
|
|
236 type the path to the gtf annotation file that you want
|
|
237 to use. Default is /usr/local/pyCRAC/db/Saccharomyces_
|
|
238 cerevisiae.EF2.59.1.2.gtf
|
|
239
|
|
240 pyBinCollector.py specific options::
|
|
241
|
|
242 -a protein_coding, --annotation=protein_coding
|
|
243 select which annotation (i.e. protein_coding, ncRNA,
|
|
244 sRNA, rRNA, tRNA, snoRNA, all) you would like to focus
|
|
245 your search on. Default = all
|
|
246 --min_length=20
|
|
247 to set a minimum length threshold for genes. Genes
|
|
248 shorter than the minimal length will be discarded.
|
|
249 Default = 1
|
|
250 --max_length=10000
|
|
251 to set a maximum length threshold for genes. Genes
|
|
252 larger than the maximum length will be discarded.
|
|
253 Default = 100000000
|
|
254 -n 20, --numberofbins=20
|
|
255 select the number of bins you want to generate.
|
|
256 Default=20
|
|
257 --binselect=2 4
|
|
258 allows selection of sequences that were mapped to
|
|
259 specific bins. This option expects two numbers, one
|
|
260 for each bin, separated by a space. For example:
|
|
261 --binselect 20 30.
|
|
262 --outputall
|
|
263 use this flag to output the normalized distribution
|
|
264 for each individual gene, rather than making a
|
|
265 cumulative coverage plot. Useful for making box plots
|
|
266 or for making heat maps.
|
|
267
|
|
268 Common options::
|
|
269
|
|
270 -r 100, --range=100
|
|
271 allows you to set the length of the UTR regions. If
|
|
272 you set '-r 50' or '--range=50', then the program will
|
|
273 set a fixed length (50 bp) regardless of whether the
|
|
274 GTF file has genes with annotated UTRs.
|
|
275 -s intron, --sequence=intron
|
|
276 with this option you can select whether you want to
|
|
277 generate bins from the coding or genomic sequence or
|
|
278 introns,exon,CDS, or UTR coordinates. Default =
|
|
279 genomic
|
|
280 --ignorestrand
|
|
281 To ignore strand information and all reads overlapping
|
|
282 with genomic features will be considered sense reads.
|
|
283 Useful for analysing ChIP or RIP data
|
|
284
|
|
285
|
|
286
|
|
287
|
|
288
|
|
289 </help>
|
|
290 </tool>
|