Mercurial > repos > swebb > pycrac
comparison pyCRAC/pyClusterReads.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:19b20927172d |
---|---|
1 <tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True"> | |
2 <requirements> | |
3 <requirement type="package">pyCRAC</requirement> | |
4 </requirements> | |
5 <command interpreter="python"> | |
6 /usr/local/bin/pyClusterReads.py | |
7 -f $input | |
8 --gtf=$addGTF.gtf | |
9 #if $addGTF.annotate.annotations != "all": | |
10 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": | |
11 --annotation=$addGTF.annotate.scan.annotation | |
12 #else: | |
13 --annotation=$addGTF.annotate.annotation | |
14 #end if# | |
15 #end if# | |
16 -o $output | |
17 #if $addOpt.options == "edit": | |
18 --range=$addOpt.range | |
19 --cic=$addOpt.cic | |
20 --co=$addOpt.co | |
21 --ch=$addOpt.ch | |
22 --cl=$addOpt.cl | |
23 --mutsfreq=$addOpt.mutsfreq | |
24 #end if# | |
25 </command> | |
26 <version_command>/usr/local/bin/pyClusterReads.py --version</version_command> | |
27 <inputs> | |
28 <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/> | |
29 <conditional name="addGTF"> | |
30 <param name="gtfFile" type="select" label="Choose GTF File from"> | |
31 <option value="default" selected="true">Defaults</option> | |
32 <option value="other">History</option> | |
33 </param> | |
34 <when value="default"> | |
35 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> | |
36 <options from_data_table="pycrac_gtf"/> | |
37 </param> | |
38 <conditional name="annotate"> | |
39 <param name="annotations" type="select" label="Select annotation"> | |
40 <option value="all" selected="true">All</option> | |
41 <option value="manual">Enter in text box</option> | |
42 <option value="auto">Scan pyGetGTFSources file</option> | |
43 </param> | |
44 <when value="all"> | |
45 <param name="annotation" type="hidden" format="txt" size="10" value="all"/> | |
46 </when> | |
47 <when value="manual"> | |
48 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> | |
49 <validator type="empty_field" message="Please enter a value"/> | |
50 </param> | |
51 </when> | |
52 <when value="auto"> | |
53 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> | |
54 <conditional name="scan"> | |
55 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> | |
56 <option value="wait" selected="true">Waiting</option> | |
57 <option value="scanning">Go</option> | |
58 </param> | |
59 <when value="wait"> | |
60 </when> | |
61 <when value="scanning"> | |
62 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> | |
63 <options from_dataset="gtf_annotation"> | |
64 <column name="name" index="0"/> | |
65 <column name="value" index="0"/> | |
66 </options> | |
67 </param> | |
68 </when> | |
69 </conditional> | |
70 </when> | |
71 </conditional> | |
72 | |
73 </when> | |
74 <when value="other"> | |
75 <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> | |
76 <conditional name="annotate"> | |
77 <param name="annotations" type="select" label="Select annotation"> | |
78 <option value="all" selected="true">All</option> | |
79 <option value="manual">Enter in text box</option> | |
80 <option value="auto">Scan selected file</option> | |
81 </param> | |
82 <when value="all"> | |
83 <param name="annotation" type="hidden" format="txt" size="10" value="all"/> | |
84 </when> | |
85 <when value="manual"> | |
86 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> | |
87 <validator type="empty_field" message="Please enter a value"/> | |
88 </param> | |
89 </when> | |
90 <when value="auto"> | |
91 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> | |
92 <options from_dataset="gtf"> | |
93 <column name="name" index="1"/> | |
94 <column name="value" index="1"/> | |
95 <filter type="unique_value" name="unique" column="1"/> | |
96 </options> | |
97 </param> | |
98 </when> | |
99 </conditional> | |
100 </when> | |
101 </conditional> | |
102 | |
103 <conditional name="addOpt"> | |
104 <param name="options" type="select" label="Standard Options"> | |
105 <option value="default" selected="true">Default</option> | |
106 <option value="edit">Edit</option> | |
107 </param> | |
108 <when value="edit"> | |
109 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> | |
110 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> | |
111 </param> | |
112 <param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster"> | |
113 <validator type="in_range" min="1" message="Please enter a value >= 1"/> | |
114 </param> | |
115 <param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster"> | |
116 <validator type="in_range" min="1" message="Please enter a value >= 1"/> | |
117 </param> | |
118 <param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" > | |
119 <validator type="in_range" min="2" message="Please enter a value >= 1"/> | |
120 </param> | |
121 <param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" > | |
122 <validator type="in_range" min="1" message="Please enter a value >= 1"/> | |
123 </param> | |
124 <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" > | |
125 <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/> | |
126 </param> | |
127 </when> | |
128 <when value="default"> | |
129 </when> | |
130 </conditional> | |
131 <param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" /> | |
132 </inputs> | |
133 <outputs> | |
134 <data format="gtf" name="output" label="${label.value}_clusters.gtf"/> | |
135 </outputs> | |
136 <help> | |
137 | |
138 .. class:: infomark | |
139 | |
140 **pyClusterReads** | |
141 | |
142 pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates. | |
143 Produces a GTF output file with cluster intervals and overlapping genomic features. | |
144 It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates | |
145 The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files. | |
146 | |
147 **NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron. | |
148 Use bedtools to extract those intervals that overlap with introns or other features | |
149 | |
150 The maximum height of the cluster is indicated in column 8. | |
151 The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. | |
152 | |
153 For example:: | |
154 | |
155 # 114099S100.0 | |
156 | |
157 indicates that 100% of the nucleotides in position 114099 were substituted in the cluster. | |
158 | |
159 An example of a pyClusterReads output file:: | |
160 | |
161 ##gff-version 2 | |
162 # generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013 | |
163 # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v | |
164 # chromosome feature source start end cDNAs strand height attributes | |
165 chrI cluster exon 112583 112643 6 - 5 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0; | |
166 chrI cluster exon 113176 113232 3 - 3 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0; | |
167 chrI cluster exon 113334 113386 2 - 2 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0; | |
168 chrI cluster exon 113534 113564 3 - 3 gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3; | |
169 chrI cluster exon 113644 113691 5 - 4 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0 | |
170 chrI cluster exon 113912 113958 2 - 2 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0; | |
171 chrI cluster exon 113966 114066 5 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3; | |
172 chrI cluster exon 114067 114130 3 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0; | |
173 | |
174 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html | |
175 | |
176 ------ | |
177 | |
178 **Parameter list** | |
179 | |
180 | |
181 File input options:: | |
182 | |
183 -f reads.gtf, --input_file=reads.gtf | |
184 provide the path to your GTF read data file. NOTE the | |
185 file has to be correctly sorted! If you used | |
186 pyReadCounters to generate the file you should be | |
187 fine. If you modified it, use the sort command | |
188 described in the manual to sort your file first by | |
189 chromosome, then by strand and then by start position. | |
190 -o clusters.gtf, --output_file=clusters.gtf | |
191 provide a name for an output file. By default it | |
192 writes to the standard output | |
193 --gtf=Yourfavoritegtf.gtf | |
194 type the path to the gtf annotation file that you want | |
195 to use | |
196 | |
197 Common pyCRAC options:: | |
198 | |
199 -r 100, --range=100 | |
200 allows you to set the length of the UTR regions. If | |
201 you set '-r 50' or '--range=50', then the program will | |
202 set a fixed length (50 bp) regardless of whether the | |
203 GTF annotation file has genes with annotated UTRs. | |
204 -a protein_coding, --annotation=protein_coding | |
205 select which annotation (i.e. protein_coding, ncRNA, | |
206 sRNA, rRNA,snoRNA,snRNA, depending on the source of | |
207 your GTF file) you would like to focus your analysis | |
208 on. Default = all annotations | |
209 | |
210 Options for cluster analysis:: | |
211 | |
212 --cic=2, --cdnasinclusters=2 | |
213 sets the minimal number of overlapping cDNAs in each | |
214 cluster. Default = 2 | |
215 --co=5, --clusteroverlap=5 | |
216 sets the number of nucleotides cDNA sequences have to | |
217 overlap to form a cluster. Default = 1 nucleotide | |
218 --ch=5, --clusterheight=5 | |
219 sets the minimal height of the cluster. Default = 2 | |
220 nucleotides | |
221 --cl=100, --clusterlength=100 | |
222 to set the maximum cluster sequence length | |
223 --mutsfreq=10, --mutationfrequency=10 | |
224 sets the minimal mutations frequency for a cluster | |
225 position in the GTF output file. Default = 0%. | |
226 Example: if the mutsfrequency is set at 10 and a | |
227 cluster position has a mutated in less than 10% of the | |
228 reads, then the mutation will not be reported. | |
229 </help> | |
230 </tool> |