comparison pyCRAC/pyCalculateFDRs.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:19b20927172d
1 <tool id ="pyCalculateFDRs" name="pyCalculateFDRs">
2 <requirements>
3 <requirement type="package">pyCRAC</requirement>
4 </requirements>
5 <command interpreter="python">
6 /usr/local/bin/pyCalculateFDRs.py
7 -f $ftype.input
8 --file_type $ftype.file_type
9 --gtf=$addGTF.gtf
10
11 #if $addGTF.annotate.annotations != "all":
12 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
13 --annotation $addGTF.annotate.scan.annotation
14 #else:
15 --annotation $addGTF.annotate.annotation
16 #end if#
17 #end if#
18 --chromfile=$addChr.chr
19 #if $addOpt.options == "edit"
20 -s $addOpt.sequence
21 --min $addOpt.min
22 --minfdr $addOpt.minfdr
23 --iterations=$addOpt.iterations
24 --range $addOpt.range
25 #end if#
26 -o $output
27
28 </command>
29 <version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command>
30 <inputs>
31 <conditional name="ftype">
32 <param name="file_type" type="select" label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates">
33 <option value="gff" selected="true">GFF</option>
34 <option value="bed">Bed6</option>
35 <option value="gtf">GTF</option>
36 </param>
37 <when value="gff">
38 <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" />
39 </when>
40 <when value="gtf">
41 <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" />
42 </when>
43 <when value="bed">
44 <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" />
45 </when>
46 </conditional>
47
48 <conditional name="addChr">
49 <param name="chrfile" type="select" label="Choose Chromosome length file from">
50 <option value="default" selected="true">Defaults</option>
51 <option value="other">History</option>
52 </param>
53 <when value="default">
54 <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes">
55 <options from_data_table="pycrac_chr"/>
56 </param>
57 </when>
58 <when value="other">
59 <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/>
60 </when>
61 </conditional>
62
63 <conditional name="addGTF">
64 <param name="gtfFile" type="select" label="Choose GTF File from">
65 <option value="default" selected="true">Defaults</option>
66 <option value="other">History</option>
67 </param>
68 <when value="default">
69 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
70 <options from_data_table="pycrac_gtf"/>
71 </param>
72 <conditional name="annotate">
73 <param name="annotations" type="select" label="Select annotation">
74 <option value="all" selected="true">All</option>
75 <option value="manual">Enter in text box</option>
76 <option value="auto">Scan pyGetGTFSources file</option>
77 </param>
78 <when value="all">
79 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
80 </when>
81 <when value="manual">
82 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
83 <validator type="empty_field" message="Please enter a value"/>
84 </param>
85 </when>
86 <when value="auto">
87 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
88 <conditional name="scan">
89 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
90 <option value="wait" selected="true">Waiting</option>
91 <option value="scanning">Go</option>
92 </param>
93 <when value="wait">
94 </when>
95 <when value="scanning">
96 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
97 <options from_dataset="gtf_annotation">
98 <column name="name" index="0"/>
99 <column name="value" index="0"/>
100 </options>
101 </param>
102 </when>
103 </conditional>
104 </when>
105 </conditional>
106 </when>
107 <when value="other">
108 <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
109 <conditional name="annotate">
110 <param name="annotations" type="select" label="Select annotation">
111 <option value="all" selected="true">All</option>
112 <option value="manual">Enter in text box</option>
113 <option value="auto">Scan selected file</option>
114 </param>
115 <when value="all">
116 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
117 </when>
118 <when value="manual">
119 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
120 <validator type="empty_field" message="Please enter a value"/>
121 </param>
122 </when>
123 <when value="auto">
124 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
125 <options from_dataset="gtf">
126 <column name="name" index="1"/>
127 <column name="value" index="1"/>
128 <filter type="unique_value" name="unique" column="1"/>
129 </options>
130 </param>
131 </when>
132 </conditional>
133 </when>
134 </conditional>
135 <conditional name="addOpt">
136 <param name="options" type="select" label="Standard options">
137 <option value="default" selected="true">Default</option>
138 <option value="edit">Edit</option>
139 </param>
140 <when value="edit">
141 <param name="sequence" type="select" label="Align reads to --sequence">
142 <option value="genomic" selected="true">Genomic Sequence</option>
143 <option value="coding">Coding Sequence</option>
144 </param>
145 <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region">
146 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
147 </param>
148 <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data">
149 <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/>
150 </param>
151 <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates">
152 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
153 </param>
154 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
155 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
156 </param>
157 </when>
158 <when value="default">
159 </when>
160 </conditional>
161 <param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" />
162 </inputs>
163 <outputs>
164 <data format="gtf" name="output" label="${label.value}.gtf"/>
165 </outputs>
166 <help>
167
168 .. class:: infomark
169
170 **pyCalculateFDRs**
171
172 By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched.
173 The tool reports significant intervals in the GTF format and reports overlapping genomic features.
174 Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool
175
176 **NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron.
177 Use bedtools to extract those intervals that overlap with introns or other features
178
179 Example of an output file::
180
181 ##gff-version 2
182 # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013
183 # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
184 # chromosome feature source start end minimal_coverage strand . attributes
185 chrI protein_coding exon 140846 140860 5 - . gene_id "YAL005C"; gene_name "SSA1";
186 chrI intergenic_region exon 223118 223164 4 - . gene_id "INT_0_179"; gene_name "INT_0_179";
187 chrI intergenic_region exon 71889 71922 3 + . gene_id "INT_0_94"; gene_name "INT_0_94";
188 chrII intergenic_region exon 296127 296158 3 - . gene_id "INT_0_365"; gene_name "INT_0_365";
189 chrII intergenic_region exon 680697 680722 4 - . gene_id "INT_0_626"; gene_name "INT_0_626";
190 chrII intergenic_region exon 680827 680846 4 - . gene_id "INT_0_626"; gene_name "INT_0_626";
191 chrII snRNA exon 680827 680838 5 - . gene_id "LSR1"; gene_name "LSR1";
192 chrII snRNA exon 680951 681001 5 - . gene_id "LSR1"; gene_name "LSR1";
193 chrII intergenic_region exon 577985 577996 3 - . gene_id "INT_0_556"; gene_name "INT_0_556";
194 chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1";
195 chrII protein_coding exon 296127 296158 3 - . gene_id "YBR028C"; gene_name "YBR028C";
196
197
198 pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs).
199
200
201 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
202
203 ------
204
205 **Parameter list**
206
207 Options::
208
209 -f read_file, --readdatafile=read_file
210 Name of the bed/gff/gtf file containing the read/cDNA
211 coordinates
212 --file_type=FILE_TYPE
213 this tool supports bed6, gtf and gff input files.
214 Please select from 'bed','gtf' or 'gff'. Default=gtf
215 -o outfile.gtf, --outfile=outfile.gtf
216 Optional. Provide the name of the output file. Default
217 is 'selected_intervals.gtf'
218 -r 100, --range=100
219 allows you to set the length of the UTR regions. If
220 you set '-r 50' or '--range=50', then the program will
221 set a fixed length (50 bp) regardless of whether the
222 GTF file has genes with annotated UTRs.
223 -a protein_coding, --annotation=protein_coding
224 select which annotation (i.e. protein_coding, ncRNA,
225 sRNA, rRNA,snoRNA,snRNA, depending on the source of
226 your GTF file) you would like to focus your analysis
227 on. Default = all annotations
228 -c yeast.txt, --chromfile=yeast.txt
229 Location of the chromosome info file. This file should
230 have two columns: first column is the names of the
231 chromosomes, second column is length of the
232 chromosomes. Default is yeast
233 --gtf=yeast.gtf
234 Name of the annotation file. Default is /usr/local/pyC
235 RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf
236 -m MINFDR, --minfdr=MINFDR
237 To set a minimal FDR threshold for filtering interval
238 data. Default is 0.05
239 --min=MIN
240 to set a minimal read coverages for a region. Regions
241 with coverage less than minimum will be ignoredve an
242 FDR of zero
243 --iterations=ITERATIONS
244 to set the number of iterations for randomization of
245 read coordinates. Default=100
246 </help>
247 </tool>