0
|
1 <tool id ="pyCalculateFDRs" name="pyCalculateFDRs">
|
|
2 <requirements>
|
|
3 <requirement type="package">pyCRAC</requirement>
|
|
4 </requirements>
|
|
5 <command interpreter="python">
|
|
6 /usr/local/bin/pyCalculateFDRs.py
|
|
7 -f $ftype.input
|
|
8 --file_type $ftype.file_type
|
|
9 --gtf=$addGTF.gtf
|
|
10
|
|
11 #if $addGTF.annotate.annotations != "all":
|
|
12 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
|
|
13 --annotation $addGTF.annotate.scan.annotation
|
|
14 #else:
|
|
15 --annotation $addGTF.annotate.annotation
|
|
16 #end if#
|
|
17 #end if#
|
|
18 --chromfile=$addChr.chr
|
|
19 #if $addOpt.options == "edit"
|
|
20 -s $addOpt.sequence
|
|
21 --min $addOpt.min
|
|
22 --minfdr $addOpt.minfdr
|
|
23 --iterations=$addOpt.iterations
|
|
24 --range $addOpt.range
|
|
25 #end if#
|
|
26 -o $output
|
|
27
|
|
28 </command>
|
|
29 <version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command>
|
|
30 <inputs>
|
|
31 <conditional name="ftype">
|
|
32 <param name="file_type" type="select" label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates">
|
|
33 <option value="gff" selected="true">GFF</option>
|
|
34 <option value="bed">Bed6</option>
|
|
35 <option value="gtf">GTF</option>
|
|
36 </param>
|
|
37 <when value="gff">
|
|
38 <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" />
|
|
39 </when>
|
|
40 <when value="gtf">
|
|
41 <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" />
|
|
42 </when>
|
|
43 <when value="bed">
|
|
44 <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" />
|
|
45 </when>
|
|
46 </conditional>
|
|
47
|
|
48 <conditional name="addChr">
|
|
49 <param name="chrfile" type="select" label="Choose Chromosome length file from">
|
|
50 <option value="default" selected="true">Defaults</option>
|
|
51 <option value="other">History</option>
|
|
52 </param>
|
|
53 <when value="default">
|
|
54 <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes">
|
|
55 <options from_data_table="pycrac_chr"/>
|
|
56 </param>
|
|
57 </when>
|
|
58 <when value="other">
|
|
59 <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/>
|
|
60 </when>
|
|
61 </conditional>
|
|
62
|
|
63 <conditional name="addGTF">
|
|
64 <param name="gtfFile" type="select" label="Choose GTF File from">
|
|
65 <option value="default" selected="true">Defaults</option>
|
|
66 <option value="other">History</option>
|
|
67 </param>
|
|
68 <when value="default">
|
|
69 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
|
|
70 <options from_data_table="pycrac_gtf"/>
|
|
71 </param>
|
|
72 <conditional name="annotate">
|
|
73 <param name="annotations" type="select" label="Select annotation">
|
|
74 <option value="all" selected="true">All</option>
|
|
75 <option value="manual">Enter in text box</option>
|
|
76 <option value="auto">Scan pyGetGTFSources file</option>
|
|
77 </param>
|
|
78 <when value="all">
|
|
79 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
|
|
80 </when>
|
|
81 <when value="manual">
|
|
82 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
|
|
83 <validator type="empty_field" message="Please enter a value"/>
|
|
84 </param>
|
|
85 </when>
|
|
86 <when value="auto">
|
|
87 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
|
|
88 <conditional name="scan">
|
|
89 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
|
|
90 <option value="wait" selected="true">Waiting</option>
|
|
91 <option value="scanning">Go</option>
|
|
92 </param>
|
|
93 <when value="wait">
|
|
94 </when>
|
|
95 <when value="scanning">
|
|
96 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
|
|
97 <options from_dataset="gtf_annotation">
|
|
98 <column name="name" index="0"/>
|
|
99 <column name="value" index="0"/>
|
|
100 </options>
|
|
101 </param>
|
|
102 </when>
|
|
103 </conditional>
|
|
104 </when>
|
|
105 </conditional>
|
|
106 </when>
|
|
107 <when value="other">
|
|
108 <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
|
|
109 <conditional name="annotate">
|
|
110 <param name="annotations" type="select" label="Select annotation">
|
|
111 <option value="all" selected="true">All</option>
|
|
112 <option value="manual">Enter in text box</option>
|
|
113 <option value="auto">Scan selected file</option>
|
|
114 </param>
|
|
115 <when value="all">
|
|
116 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
|
|
117 </when>
|
|
118 <when value="manual">
|
|
119 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
|
|
120 <validator type="empty_field" message="Please enter a value"/>
|
|
121 </param>
|
|
122 </when>
|
|
123 <when value="auto">
|
|
124 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
|
|
125 <options from_dataset="gtf">
|
|
126 <column name="name" index="1"/>
|
|
127 <column name="value" index="1"/>
|
|
128 <filter type="unique_value" name="unique" column="1"/>
|
|
129 </options>
|
|
130 </param>
|
|
131 </when>
|
|
132 </conditional>
|
|
133 </when>
|
|
134 </conditional>
|
|
135 <conditional name="addOpt">
|
|
136 <param name="options" type="select" label="Standard options">
|
|
137 <option value="default" selected="true">Default</option>
|
|
138 <option value="edit">Edit</option>
|
|
139 </param>
|
|
140 <when value="edit">
|
|
141 <param name="sequence" type="select" label="Align reads to --sequence">
|
|
142 <option value="genomic" selected="true">Genomic Sequence</option>
|
|
143 <option value="coding">Coding Sequence</option>
|
|
144 </param>
|
|
145 <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region">
|
|
146 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
|
|
147 </param>
|
|
148 <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data">
|
|
149 <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/>
|
|
150 </param>
|
|
151 <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates">
|
|
152 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
|
|
153 </param>
|
|
154 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
|
|
155 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
|
|
156 </param>
|
|
157 </when>
|
|
158 <when value="default">
|
|
159 </when>
|
|
160 </conditional>
|
|
161 <param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" />
|
|
162 </inputs>
|
|
163 <outputs>
|
|
164 <data format="gtf" name="output" label="${label.value}.gtf"/>
|
|
165 </outputs>
|
|
166 <help>
|
|
167
|
|
168 .. class:: infomark
|
|
169
|
|
170 **pyCalculateFDRs**
|
|
171
|
|
172 By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched.
|
|
173 The tool reports significant intervals in the GTF format and reports overlapping genomic features.
|
|
174 Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool
|
|
175
|
|
176 **NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron.
|
|
177 Use bedtools to extract those intervals that overlap with introns or other features
|
|
178
|
|
179 Example of an output file::
|
|
180
|
|
181 ##gff-version 2
|
|
182 # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013
|
|
183 # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
|
|
184 # chromosome feature source start end minimal_coverage strand . attributes
|
|
185 chrI protein_coding exon 140846 140860 5 - . gene_id "YAL005C"; gene_name "SSA1";
|
|
186 chrI intergenic_region exon 223118 223164 4 - . gene_id "INT_0_179"; gene_name "INT_0_179";
|
|
187 chrI intergenic_region exon 71889 71922 3 + . gene_id "INT_0_94"; gene_name "INT_0_94";
|
|
188 chrII intergenic_region exon 296127 296158 3 - . gene_id "INT_0_365"; gene_name "INT_0_365";
|
|
189 chrII intergenic_region exon 680697 680722 4 - . gene_id "INT_0_626"; gene_name "INT_0_626";
|
|
190 chrII intergenic_region exon 680827 680846 4 - . gene_id "INT_0_626"; gene_name "INT_0_626";
|
|
191 chrII snRNA exon 680827 680838 5 - . gene_id "LSR1"; gene_name "LSR1";
|
|
192 chrII snRNA exon 680951 681001 5 - . gene_id "LSR1"; gene_name "LSR1";
|
|
193 chrII intergenic_region exon 577985 577996 3 - . gene_id "INT_0_556"; gene_name "INT_0_556";
|
|
194 chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1";
|
|
195 chrII protein_coding exon 296127 296158 3 - . gene_id "YBR028C"; gene_name "YBR028C";
|
|
196
|
|
197
|
|
198 pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs).
|
|
199
|
|
200
|
|
201 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
|
|
202
|
|
203 ------
|
|
204
|
|
205 **Parameter list**
|
|
206
|
|
207 Options::
|
|
208
|
|
209 -f read_file, --readdatafile=read_file
|
|
210 Name of the bed/gff/gtf file containing the read/cDNA
|
|
211 coordinates
|
|
212 --file_type=FILE_TYPE
|
|
213 this tool supports bed6, gtf and gff input files.
|
|
214 Please select from 'bed','gtf' or 'gff'. Default=gtf
|
|
215 -o outfile.gtf, --outfile=outfile.gtf
|
|
216 Optional. Provide the name of the output file. Default
|
|
217 is 'selected_intervals.gtf'
|
|
218 -r 100, --range=100
|
|
219 allows you to set the length of the UTR regions. If
|
|
220 you set '-r 50' or '--range=50', then the program will
|
|
221 set a fixed length (50 bp) regardless of whether the
|
|
222 GTF file has genes with annotated UTRs.
|
|
223 -a protein_coding, --annotation=protein_coding
|
|
224 select which annotation (i.e. protein_coding, ncRNA,
|
|
225 sRNA, rRNA,snoRNA,snRNA, depending on the source of
|
|
226 your GTF file) you would like to focus your analysis
|
|
227 on. Default = all annotations
|
|
228 -c yeast.txt, --chromfile=yeast.txt
|
|
229 Location of the chromosome info file. This file should
|
|
230 have two columns: first column is the names of the
|
|
231 chromosomes, second column is length of the
|
|
232 chromosomes. Default is yeast
|
|
233 --gtf=yeast.gtf
|
|
234 Name of the annotation file. Default is /usr/local/pyC
|
|
235 RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf
|
|
236 -m MINFDR, --minfdr=MINFDR
|
|
237 To set a minimal FDR threshold for filtering interval
|
|
238 data. Default is 0.05
|
|
239 --min=MIN
|
|
240 to set a minimal read coverages for a region. Regions
|
|
241 with coverage less than minimum will be ignoredve an
|
|
242 FDR of zero
|
|
243 --iterations=ITERATIONS
|
|
244 to set the number of iterations for randomization of
|
|
245 read coordinates. Default=100
|
|
246 </help>
|
|
247 </tool>
|