Mercurial > repos > swebb > pycrac
comparison pyCRAC/pyCalculateFDRs.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:19b20927172d |
---|---|
1 <tool id ="pyCalculateFDRs" name="pyCalculateFDRs"> | |
2 <requirements> | |
3 <requirement type="package">pyCRAC</requirement> | |
4 </requirements> | |
5 <command interpreter="python"> | |
6 /usr/local/bin/pyCalculateFDRs.py | |
7 -f $ftype.input | |
8 --file_type $ftype.file_type | |
9 --gtf=$addGTF.gtf | |
10 | |
11 #if $addGTF.annotate.annotations != "all": | |
12 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": | |
13 --annotation $addGTF.annotate.scan.annotation | |
14 #else: | |
15 --annotation $addGTF.annotate.annotation | |
16 #end if# | |
17 #end if# | |
18 --chromfile=$addChr.chr | |
19 #if $addOpt.options == "edit" | |
20 -s $addOpt.sequence | |
21 --min $addOpt.min | |
22 --minfdr $addOpt.minfdr | |
23 --iterations=$addOpt.iterations | |
24 --range $addOpt.range | |
25 #end if# | |
26 -o $output | |
27 | |
28 </command> | |
29 <version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command> | |
30 <inputs> | |
31 <conditional name="ftype"> | |
32 <param name="file_type" type="select" label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates"> | |
33 <option value="gff" selected="true">GFF</option> | |
34 <option value="bed">Bed6</option> | |
35 <option value="gtf">GTF</option> | |
36 </param> | |
37 <when value="gff"> | |
38 <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" /> | |
39 </when> | |
40 <when value="gtf"> | |
41 <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" /> | |
42 </when> | |
43 <when value="bed"> | |
44 <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" /> | |
45 </when> | |
46 </conditional> | |
47 | |
48 <conditional name="addChr"> | |
49 <param name="chrfile" type="select" label="Choose Chromosome length file from"> | |
50 <option value="default" selected="true">Defaults</option> | |
51 <option value="other">History</option> | |
52 </param> | |
53 <when value="default"> | |
54 <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"> | |
55 <options from_data_table="pycrac_chr"/> | |
56 </param> | |
57 </when> | |
58 <when value="other"> | |
59 <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/> | |
60 </when> | |
61 </conditional> | |
62 | |
63 <conditional name="addGTF"> | |
64 <param name="gtfFile" type="select" label="Choose GTF File from"> | |
65 <option value="default" selected="true">Defaults</option> | |
66 <option value="other">History</option> | |
67 </param> | |
68 <when value="default"> | |
69 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> | |
70 <options from_data_table="pycrac_gtf"/> | |
71 </param> | |
72 <conditional name="annotate"> | |
73 <param name="annotations" type="select" label="Select annotation"> | |
74 <option value="all" selected="true">All</option> | |
75 <option value="manual">Enter in text box</option> | |
76 <option value="auto">Scan pyGetGTFSources file</option> | |
77 </param> | |
78 <when value="all"> | |
79 <param name="annotation" type="hidden" format="txt" size="10" value="all"/> | |
80 </when> | |
81 <when value="manual"> | |
82 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> | |
83 <validator type="empty_field" message="Please enter a value"/> | |
84 </param> | |
85 </when> | |
86 <when value="auto"> | |
87 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> | |
88 <conditional name="scan"> | |
89 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> | |
90 <option value="wait" selected="true">Waiting</option> | |
91 <option value="scanning">Go</option> | |
92 </param> | |
93 <when value="wait"> | |
94 </when> | |
95 <when value="scanning"> | |
96 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> | |
97 <options from_dataset="gtf_annotation"> | |
98 <column name="name" index="0"/> | |
99 <column name="value" index="0"/> | |
100 </options> | |
101 </param> | |
102 </when> | |
103 </conditional> | |
104 </when> | |
105 </conditional> | |
106 </when> | |
107 <when value="other"> | |
108 <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> | |
109 <conditional name="annotate"> | |
110 <param name="annotations" type="select" label="Select annotation"> | |
111 <option value="all" selected="true">All</option> | |
112 <option value="manual">Enter in text box</option> | |
113 <option value="auto">Scan selected file</option> | |
114 </param> | |
115 <when value="all"> | |
116 <param name="annotation" type="hidden" format="txt" size="10" value="all"/> | |
117 </when> | |
118 <when value="manual"> | |
119 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> | |
120 <validator type="empty_field" message="Please enter a value"/> | |
121 </param> | |
122 </when> | |
123 <when value="auto"> | |
124 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> | |
125 <options from_dataset="gtf"> | |
126 <column name="name" index="1"/> | |
127 <column name="value" index="1"/> | |
128 <filter type="unique_value" name="unique" column="1"/> | |
129 </options> | |
130 </param> | |
131 </when> | |
132 </conditional> | |
133 </when> | |
134 </conditional> | |
135 <conditional name="addOpt"> | |
136 <param name="options" type="select" label="Standard options"> | |
137 <option value="default" selected="true">Default</option> | |
138 <option value="edit">Edit</option> | |
139 </param> | |
140 <when value="edit"> | |
141 <param name="sequence" type="select" label="Align reads to --sequence"> | |
142 <option value="genomic" selected="true">Genomic Sequence</option> | |
143 <option value="coding">Coding Sequence</option> | |
144 </param> | |
145 <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region"> | |
146 <validator type="in_range" min="1" message="Please enter a value >= 1"/> | |
147 </param> | |
148 <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data"> | |
149 <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/> | |
150 </param> | |
151 <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates"> | |
152 <validator type="in_range" min="0" message="Please enter a value >= 0"/> | |
153 </param> | |
154 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> | |
155 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> | |
156 </param> | |
157 </when> | |
158 <when value="default"> | |
159 </when> | |
160 </conditional> | |
161 <param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" /> | |
162 </inputs> | |
163 <outputs> | |
164 <data format="gtf" name="output" label="${label.value}.gtf"/> | |
165 </outputs> | |
166 <help> | |
167 | |
168 .. class:: infomark | |
169 | |
170 **pyCalculateFDRs** | |
171 | |
172 By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched. | |
173 The tool reports significant intervals in the GTF format and reports overlapping genomic features. | |
174 Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool | |
175 | |
176 **NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron. | |
177 Use bedtools to extract those intervals that overlap with introns or other features | |
178 | |
179 Example of an output file:: | |
180 | |
181 ##gff-version 2 | |
182 # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013 | |
183 # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05 | |
184 # chromosome feature source start end minimal_coverage strand . attributes | |
185 chrI protein_coding exon 140846 140860 5 - . gene_id "YAL005C"; gene_name "SSA1"; | |
186 chrI intergenic_region exon 223118 223164 4 - . gene_id "INT_0_179"; gene_name "INT_0_179"; | |
187 chrI intergenic_region exon 71889 71922 3 + . gene_id "INT_0_94"; gene_name "INT_0_94"; | |
188 chrII intergenic_region exon 296127 296158 3 - . gene_id "INT_0_365"; gene_name "INT_0_365"; | |
189 chrII intergenic_region exon 680697 680722 4 - . gene_id "INT_0_626"; gene_name "INT_0_626"; | |
190 chrII intergenic_region exon 680827 680846 4 - . gene_id "INT_0_626"; gene_name "INT_0_626"; | |
191 chrII snRNA exon 680827 680838 5 - . gene_id "LSR1"; gene_name "LSR1"; | |
192 chrII snRNA exon 680951 681001 5 - . gene_id "LSR1"; gene_name "LSR1"; | |
193 chrII intergenic_region exon 577985 577996 3 - . gene_id "INT_0_556"; gene_name "INT_0_556"; | |
194 chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1"; | |
195 chrII protein_coding exon 296127 296158 3 - . gene_id "YBR028C"; gene_name "YBR028C"; | |
196 | |
197 | |
198 pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs). | |
199 | |
200 | |
201 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html | |
202 | |
203 ------ | |
204 | |
205 **Parameter list** | |
206 | |
207 Options:: | |
208 | |
209 -f read_file, --readdatafile=read_file | |
210 Name of the bed/gff/gtf file containing the read/cDNA | |
211 coordinates | |
212 --file_type=FILE_TYPE | |
213 this tool supports bed6, gtf and gff input files. | |
214 Please select from 'bed','gtf' or 'gff'. Default=gtf | |
215 -o outfile.gtf, --outfile=outfile.gtf | |
216 Optional. Provide the name of the output file. Default | |
217 is 'selected_intervals.gtf' | |
218 -r 100, --range=100 | |
219 allows you to set the length of the UTR regions. If | |
220 you set '-r 50' or '--range=50', then the program will | |
221 set a fixed length (50 bp) regardless of whether the | |
222 GTF file has genes with annotated UTRs. | |
223 -a protein_coding, --annotation=protein_coding | |
224 select which annotation (i.e. protein_coding, ncRNA, | |
225 sRNA, rRNA,snoRNA,snRNA, depending on the source of | |
226 your GTF file) you would like to focus your analysis | |
227 on. Default = all annotations | |
228 -c yeast.txt, --chromfile=yeast.txt | |
229 Location of the chromosome info file. This file should | |
230 have two columns: first column is the names of the | |
231 chromosomes, second column is length of the | |
232 chromosomes. Default is yeast | |
233 --gtf=yeast.gtf | |
234 Name of the annotation file. Default is /usr/local/pyC | |
235 RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf | |
236 -m MINFDR, --minfdr=MINFDR | |
237 To set a minimal FDR threshold for filtering interval | |
238 data. Default is 0.05 | |
239 --min=MIN | |
240 to set a minimal read coverages for a region. Regions | |
241 with coverage less than minimum will be ignoredve an | |
242 FDR of zero | |
243 --iterations=ITERATIONS | |
244 to set the number of iterations for randomization of | |
245 read coordinates. Default=100 | |
246 </help> | |
247 </tool> |