0
|
1 <tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True">
|
|
2 <requirements>
|
|
3 <requirement type="package">pyCRAC</requirement>
|
|
4 </requirements>
|
|
5 <command interpreter="python">
|
|
6 /usr/local/bin/pyClusterReads.py
|
|
7 -f $input
|
|
8 --gtf=$addGTF.gtf
|
|
9 #if $addGTF.annotate.annotations != "all":
|
|
10 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
|
|
11 --annotation=$addGTF.annotate.scan.annotation
|
|
12 #else:
|
|
13 --annotation=$addGTF.annotate.annotation
|
|
14 #end if#
|
|
15 #end if#
|
|
16 -o $output
|
|
17 #if $addOpt.options == "edit":
|
|
18 --range=$addOpt.range
|
|
19 --cic=$addOpt.cic
|
|
20 --co=$addOpt.co
|
|
21 --ch=$addOpt.ch
|
|
22 --cl=$addOpt.cl
|
|
23 --mutsfreq=$addOpt.mutsfreq
|
|
24 #end if#
|
|
25 </command>
|
|
26 <version_command>/usr/local/bin/pyClusterReads.py --version</version_command>
|
|
27 <inputs>
|
|
28 <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/>
|
|
29 <conditional name="addGTF">
|
|
30 <param name="gtfFile" type="select" label="Choose GTF File from">
|
|
31 <option value="default" selected="true">Defaults</option>
|
|
32 <option value="other">History</option>
|
|
33 </param>
|
|
34 <when value="default">
|
|
35 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
|
|
36 <options from_data_table="pycrac_gtf"/>
|
|
37 </param>
|
|
38 <conditional name="annotate">
|
|
39 <param name="annotations" type="select" label="Select annotation">
|
|
40 <option value="all" selected="true">All</option>
|
|
41 <option value="manual">Enter in text box</option>
|
|
42 <option value="auto">Scan pyGetGTFSources file</option>
|
|
43 </param>
|
|
44 <when value="all">
|
|
45 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
|
|
46 </when>
|
|
47 <when value="manual">
|
|
48 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
|
|
49 <validator type="empty_field" message="Please enter a value"/>
|
|
50 </param>
|
|
51 </when>
|
|
52 <when value="auto">
|
|
53 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
|
|
54 <conditional name="scan">
|
|
55 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
|
|
56 <option value="wait" selected="true">Waiting</option>
|
|
57 <option value="scanning">Go</option>
|
|
58 </param>
|
|
59 <when value="wait">
|
|
60 </when>
|
|
61 <when value="scanning">
|
|
62 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
|
|
63 <options from_dataset="gtf_annotation">
|
|
64 <column name="name" index="0"/>
|
|
65 <column name="value" index="0"/>
|
|
66 </options>
|
|
67 </param>
|
|
68 </when>
|
|
69 </conditional>
|
|
70 </when>
|
|
71 </conditional>
|
|
72
|
|
73 </when>
|
|
74 <when value="other">
|
|
75 <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
|
|
76 <conditional name="annotate">
|
|
77 <param name="annotations" type="select" label="Select annotation">
|
|
78 <option value="all" selected="true">All</option>
|
|
79 <option value="manual">Enter in text box</option>
|
|
80 <option value="auto">Scan selected file</option>
|
|
81 </param>
|
|
82 <when value="all">
|
|
83 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
|
|
84 </when>
|
|
85 <when value="manual">
|
|
86 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
|
|
87 <validator type="empty_field" message="Please enter a value"/>
|
|
88 </param>
|
|
89 </when>
|
|
90 <when value="auto">
|
|
91 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
|
|
92 <options from_dataset="gtf">
|
|
93 <column name="name" index="1"/>
|
|
94 <column name="value" index="1"/>
|
|
95 <filter type="unique_value" name="unique" column="1"/>
|
|
96 </options>
|
|
97 </param>
|
|
98 </when>
|
|
99 </conditional>
|
|
100 </when>
|
|
101 </conditional>
|
|
102
|
|
103 <conditional name="addOpt">
|
|
104 <param name="options" type="select" label="Standard Options">
|
|
105 <option value="default" selected="true">Default</option>
|
|
106 <option value="edit">Edit</option>
|
|
107 </param>
|
|
108 <when value="edit">
|
|
109 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
|
|
110 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
|
|
111 </param>
|
|
112 <param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster">
|
|
113 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
|
|
114 </param>
|
|
115 <param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster">
|
|
116 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
|
|
117 </param>
|
|
118 <param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" >
|
|
119 <validator type="in_range" min="2" message="Please enter a value >= 1"/>
|
|
120 </param>
|
|
121 <param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" >
|
|
122 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
|
|
123 </param>
|
|
124 <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" >
|
|
125 <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/>
|
|
126 </param>
|
|
127 </when>
|
|
128 <when value="default">
|
|
129 </when>
|
|
130 </conditional>
|
|
131 <param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" />
|
|
132 </inputs>
|
|
133 <outputs>
|
|
134 <data format="gtf" name="output" label="${label.value}_clusters.gtf"/>
|
|
135 </outputs>
|
|
136 <help>
|
|
137
|
|
138 .. class:: infomark
|
|
139
|
|
140 **pyClusterReads**
|
|
141
|
|
142 pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates.
|
|
143 Produces a GTF output file with cluster intervals and overlapping genomic features.
|
|
144 It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates
|
|
145 The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files.
|
|
146
|
|
147 **NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron.
|
|
148 Use bedtools to extract those intervals that overlap with introns or other features
|
|
149
|
|
150 The maximum height of the cluster is indicated in column 8.
|
|
151 The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies.
|
|
152
|
|
153 For example::
|
|
154
|
|
155 # 114099S100.0
|
|
156
|
|
157 indicates that 100% of the nucleotides in position 114099 were substituted in the cluster.
|
|
158
|
|
159 An example of a pyClusterReads output file::
|
|
160
|
|
161 ##gff-version 2
|
|
162 # generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013
|
|
163 # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v
|
|
164 # chromosome feature source start end cDNAs strand height attributes
|
|
165 chrI cluster exon 112583 112643 6 - 5 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0;
|
|
166 chrI cluster exon 113176 113232 3 - 3 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0;
|
|
167 chrI cluster exon 113334 113386 2 - 2 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0;
|
|
168 chrI cluster exon 113534 113564 3 - 3 gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3;
|
|
169 chrI cluster exon 113644 113691 5 - 4 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0
|
|
170 chrI cluster exon 113912 113958 2 - 2 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0;
|
|
171 chrI cluster exon 113966 114066 5 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3;
|
|
172 chrI cluster exon 114067 114130 3 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0;
|
|
173
|
|
174 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
|
|
175
|
|
176 ------
|
|
177
|
|
178 **Parameter list**
|
|
179
|
|
180
|
|
181 File input options::
|
|
182
|
|
183 -f reads.gtf, --input_file=reads.gtf
|
|
184 provide the path to your GTF read data file. NOTE the
|
|
185 file has to be correctly sorted! If you used
|
|
186 pyReadCounters to generate the file you should be
|
|
187 fine. If you modified it, use the sort command
|
|
188 described in the manual to sort your file first by
|
|
189 chromosome, then by strand and then by start position.
|
|
190 -o clusters.gtf, --output_file=clusters.gtf
|
|
191 provide a name for an output file. By default it
|
|
192 writes to the standard output
|
|
193 --gtf=Yourfavoritegtf.gtf
|
|
194 type the path to the gtf annotation file that you want
|
|
195 to use
|
|
196
|
|
197 Common pyCRAC options::
|
|
198
|
|
199 -r 100, --range=100
|
|
200 allows you to set the length of the UTR regions. If
|
|
201 you set '-r 50' or '--range=50', then the program will
|
|
202 set a fixed length (50 bp) regardless of whether the
|
|
203 GTF annotation file has genes with annotated UTRs.
|
|
204 -a protein_coding, --annotation=protein_coding
|
|
205 select which annotation (i.e. protein_coding, ncRNA,
|
|
206 sRNA, rRNA,snoRNA,snRNA, depending on the source of
|
|
207 your GTF file) you would like to focus your analysis
|
|
208 on. Default = all annotations
|
|
209
|
|
210 Options for cluster analysis::
|
|
211
|
|
212 --cic=2, --cdnasinclusters=2
|
|
213 sets the minimal number of overlapping cDNAs in each
|
|
214 cluster. Default = 2
|
|
215 --co=5, --clusteroverlap=5
|
|
216 sets the number of nucleotides cDNA sequences have to
|
|
217 overlap to form a cluster. Default = 1 nucleotide
|
|
218 --ch=5, --clusterheight=5
|
|
219 sets the minimal height of the cluster. Default = 2
|
|
220 nucleotides
|
|
221 --cl=100, --clusterlength=100
|
|
222 to set the maximum cluster sequence length
|
|
223 --mutsfreq=10, --mutationfrequency=10
|
|
224 sets the minimal mutations frequency for a cluster
|
|
225 position in the GTF output file. Default = 0%.
|
|
226 Example: if the mutsfrequency is set at 10 and a
|
|
227 cluster position has a mutated in less than 10% of the
|
|
228 reads, then the mutation will not be reported.
|
|
229 </help>
|
|
230 </tool> |