comparison pyCRAC/pyBinCollector.xml @ 0:19b20927172d draft

author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
equal deleted inserted replaced
-1:000000000000 0:19b20927172d
1 <tool id ="pyBinCollector" name="pyBinCollector">
2 <requirements>
3 <requirement type="package">pyCRAC</requirement>
4 </requirements>
5 <command interpreter="perl">
7 -f $input
8 --gtf $addGTF.gtf
9 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
10 --annotation $addGTF.annotate.scan.annotation
11 #else:
12 --annotation $addGTF.annotate.annotation
13 #end if#
14 #if $addOpt.options == "edit":
15 --options
16 --range $addOpt.range
17 --min_length $addOpt.min_length
18 --max_length $addOpt.max_length
19 --numberofbins $addOpt.numberofbins
20 -s $addOpt.sequence
21 #if $addOpt.limitBins.binselect == "yes":
22 --bins1 $addOpt.limitBins.bs_first
23 --bins2 $addOpt.limitBins.bs_last
24 #end if#
25 $addOpt.ignore
26 $addOpt.oall.outputall
27 #end if#
28 -o "$"
29 #if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall":
30 --id $
31 --sd $sd
32 --ssub $ssub
33 --sdel $sdel
34 --asd $asd
35 --assub $assub
36 --asdel $asdel
37 #else:
38 --out $out
39 --id $
40 #end if#
41 </command>
42 <version_command>/usr/local/bin/ --version</version_command>
43 <inputs>
44 <param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" />
46 <conditional name="addGTF">
47 <param name="gtfFile" type="select" label="Choose GTF File from">
48 <option value="default" selected="true">Defaults</option>
49 <option value="other">History</option>
50 </param>
51 <when value="default">
52 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
53 <options from_data_table="pycrac_gtf"/>
54 </param>
56 <conditional name="annotate">
57 <param name="annotations" type="select" label="Select annotation">
58 <option value="all" selected="true">All</option>
59 <option value="manual">Enter in text box</option>
60 <option value="auto">Scan pyGetGTFSources file</option>
61 </param>
62 <when value="all">
63 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
64 </when>
65 <when value="manual">
66 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
67 <validator type="empty_field" message="Please enter a value"/>
68 </param>
69 </when>
70 <when value="auto">
71 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
72 <conditional name="scan">
73 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
74 <option value="wait" selected="true">Waiting</option>
75 <option value="scanning">Go</option>
76 </param>
77 <when value="wait">
78 </when>
79 <when value="scanning">
80 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
81 <options from_dataset="gtf_annotation">
82 <column name="name" index="0"/>
83 <column name="value" index="0"/>
84 </options>
85 </param>
86 </when>
87 </conditional>
88 </when>
89 </conditional>
91 </when>
92 <when value="other">
93 <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
94 <conditional name="annotate">
95 <param name="annotations" type="select" label="Select annotation">
96 <option value="all" selected="true">All</option>
97 <option value="manual">Enter in text box</option>
98 <option value="auto">Scan selected file</option>
99 </param>
100 <when value="all">
101 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
102 </when>
103 <when value="manual">
104 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
105 <validator type="empty_field" message="Please enter a value"/>
106 </param>
107 </when>
108 <when value="auto">
109 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
110 <options from_dataset="gtf">
111 <column name="name" index="1"/>
112 <column name="value" index="1"/>
113 <filter type="unique_value" name="unique" column="1"/>
114 </options>
115 </param>
116 </when>
117 </conditional>
118 </when>
119 </conditional>
122 <conditional name="addOpt">
123 <param name="options" type="select" label="Options">
124 <option value="default" selected="true">Default</option>
125 <option value="edit">Edit</option>
126 </param>
127 <when value="edit">
128 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
129 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
130 </param>
131 <param format="integer" name="numberofbins" type="integer" label="Set the number of bins --numberofbins" value="20" size="7" help="Set the number of bins you want to divide the genes into">
132 <validator type="in_range" min="20" max="1000" message="Please enter a value between 20 and 1000"/>
133 </param>
134 <param format="integer" name="min_length" type="integer" label="Set the minimum gene length (nt) --min_length" value="50" size="7" help="To filter the data for gene length (nucleotides)" >
135 <validator type="in_range" min="20" message="Please enter a value greater than 20"/>
136 </param>
137 <param format="integer" name="max_length" type="integer" label="Set the maximum gene length (nt) --max_length" help="Default = 100000000" value="100000000" size="10" >
138 <validator type="in_range" min="50" max="100000000" message="Please enter a value between 50 and 100000000"/>
139 </param>
140 <param name="sequence" type="select" label="What sequences do you want to run pyBinCollector on? --sequence">
141 <option value="genomic" selected="true">Genomic Sequence</option>
142 <option value="coding">Coding Sequence</option>
143 <option value="intron">Introns</option>
144 <option value="exon">Exons</option>
145 <option value="CDS">CDS</option>
146 <option value="5UTR">5UTR</option>
147 <option value="3UTR">3UTR</option>
148 </param>
149 <conditional name="limitBins">
150 <param name="binselect" type="select" label="Select sequences that map to specific bins --binselect">
151 <option value="no" selected="true">No</option>
152 <option value="yes">Yes</option>
153 </param>
154 <when value="yes">
155 <param format="integer" name="bs_first" type="integer" label="Select First Bin" value="1" size="7">
156 <validator type="in_range" min="1" message="Please enter a value greater than 0"/>
157 </param>
158 <param format="integer" name="bs_last" type="integer" label="Select Last Bin" value="2" size="7">
159 <validator type="in_range" min="2" message="Please enter a value greater than 0"/>
160 </param>
161 </when>
162 <when value="no">
163 </when>
164 </conditional>
165 <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
166 <option value="" selected="true">No</option>
167 <option value="--ignorestrand">Yes</option>
168 </param>
169 <conditional name="oall">
170 <param name="outputall" type="select" label="Output all genes --outputall" help="output the normalized distribution for each individual gene, rather than making a cumulative coverage plot">
171 <option value="" selected="true">No</option>
172 <option value="--outputall">Yes</option>
173 </param>
174 <when value="--outputall"/>
175 <when value=""/>
176 </conditional>
177 </when>
178 <when value="default">
179 </when>
180 </conditional>
181 <param name="label" type="text" format="txt" size="30" value="pyBinCollector" label="Enter output file label -o" />
182 </inputs>
184 <outputs>
185 <data format="gtf" name="out" label="${label.value}.gtf">
186 <filter>addOpt['oall']['outputall'] == ""</filter>
187 </data>
188 <data format="txt" name="sd" label="sense_data_${label.value}.txt">
189 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
190 </data>
191 <data format="txt" name="ssub" label="sense_subs_${label.value}.txt">
192 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
193 </data>
194 <data format="txt" name="sdel" label="sense_dels_${label.value}.txt">
195 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
196 </data>
197 <data format="txt" name="asd" label="anti_sense_data_${label.value}.txt">
198 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
199 </data>
200 <data format="txt" name="assub" label="anti_sense_subs_${label.value}.txt">
201 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
202 </data>
203 <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt">
204 <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
205 </data>
206 </outputs>
207 <help>
210 .. class:: infomark
212 **pyBinCollector**
214 pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a
215 fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract
216 blocks/clusters present in these bins.
219 .. _pyCRAC:
221 ------
223 **Parameter list**
225 File input options::
227 -f FILE, --input_file=FILE
228 Provide the path and name of the or
229 GTF file. By default the program expects
230 data from the standard input.
231 -o OUTPUT_FILE, --output_file=OUTPUT_FILE
232 To set an output file name. Do not add a file
233 extension. By default, if the --outputall flag is not
234 used, the program writes to the standard output.
235 --gtf=yeast.gtf
236 type the path to the gtf annotation file that you want
237 to use. Default is /usr/local/pyCRAC/db/Saccharomyces_
238 cerevisiae.EF2.59.1.2.gtf
240 specific options::
242 -a protein_coding, --annotation=protein_coding
243 select which annotation (i.e. protein_coding, ncRNA,
244 sRNA, rRNA, tRNA, snoRNA, all) you would like to focus
245 your search on. Default = all
246 --min_length=20
247 to set a minimum length threshold for genes. Genes
248 shorter than the minimal length will be discarded.
249 Default = 1
250 --max_length=10000
251 to set a maximum length threshold for genes. Genes
252 larger than the maximum length will be discarded.
253 Default = 100000000
254 -n 20, --numberofbins=20
255 select the number of bins you want to generate.
256 Default=20
257 --binselect=2 4
258 allows selection of sequences that were mapped to
259 specific bins. This option expects two numbers, one
260 for each bin, separated by a space. For example:
261 --binselect 20 30.
262 --outputall
263 use this flag to output the normalized distribution
264 for each individual gene, rather than making a
265 cumulative coverage plot. Useful for making box plots
266 or for making heat maps.
268 Common options::
270 -r 100, --range=100
271 allows you to set the length of the UTR regions. If
272 you set '-r 50' or '--range=50', then the program will
273 set a fixed length (50 bp) regardless of whether the
274 GTF file has genes with annotated UTRs.
275 -s intron, --sequence=intron
276 with this option you can select whether you want to
277 generate bins from the coding or genomic sequence or
278 introns,exon,CDS, or UTR coordinates. Default =
279 genomic
280 --ignorestrand
281 To ignore strand information and all reads overlapping
282 with genomic features will be considered sense reads.
283 Useful for analysing ChIP or RIP data
289 </help>
290 </tool>