comparison pyCRAC/pyClusterReads.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:19b20927172d
1 <tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True">
2 <requirements>
3 <requirement type="package">pyCRAC</requirement>
4 </requirements>
5 <command interpreter="python">
6 /usr/local/bin/pyClusterReads.py
7 -f $input
8 --gtf=$addGTF.gtf
9 #if $addGTF.annotate.annotations != "all":
10 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
11 --annotation=$addGTF.annotate.scan.annotation
12 #else:
13 --annotation=$addGTF.annotate.annotation
14 #end if#
15 #end if#
16 -o $output
17 #if $addOpt.options == "edit":
18 --range=$addOpt.range
19 --cic=$addOpt.cic
20 --co=$addOpt.co
21 --ch=$addOpt.ch
22 --cl=$addOpt.cl
23 --mutsfreq=$addOpt.mutsfreq
24 #end if#
25 </command>
26 <version_command>/usr/local/bin/pyClusterReads.py --version</version_command>
27 <inputs>
28 <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/>
29 <conditional name="addGTF">
30 <param name="gtfFile" type="select" label="Choose GTF File from">
31 <option value="default" selected="true">Defaults</option>
32 <option value="other">History</option>
33 </param>
34 <when value="default">
35 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
36 <options from_data_table="pycrac_gtf"/>
37 </param>
38 <conditional name="annotate">
39 <param name="annotations" type="select" label="Select annotation">
40 <option value="all" selected="true">All</option>
41 <option value="manual">Enter in text box</option>
42 <option value="auto">Scan pyGetGTFSources file</option>
43 </param>
44 <when value="all">
45 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
46 </when>
47 <when value="manual">
48 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
49 <validator type="empty_field" message="Please enter a value"/>
50 </param>
51 </when>
52 <when value="auto">
53 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
54 <conditional name="scan">
55 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
56 <option value="wait" selected="true">Waiting</option>
57 <option value="scanning">Go</option>
58 </param>
59 <when value="wait">
60 </when>
61 <when value="scanning">
62 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
63 <options from_dataset="gtf_annotation">
64 <column name="name" index="0"/>
65 <column name="value" index="0"/>
66 </options>
67 </param>
68 </when>
69 </conditional>
70 </when>
71 </conditional>
72
73 </when>
74 <when value="other">
75 <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
76 <conditional name="annotate">
77 <param name="annotations" type="select" label="Select annotation">
78 <option value="all" selected="true">All</option>
79 <option value="manual">Enter in text box</option>
80 <option value="auto">Scan selected file</option>
81 </param>
82 <when value="all">
83 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
84 </when>
85 <when value="manual">
86 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
87 <validator type="empty_field" message="Please enter a value"/>
88 </param>
89 </when>
90 <when value="auto">
91 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
92 <options from_dataset="gtf">
93 <column name="name" index="1"/>
94 <column name="value" index="1"/>
95 <filter type="unique_value" name="unique" column="1"/>
96 </options>
97 </param>
98 </when>
99 </conditional>
100 </when>
101 </conditional>
102
103 <conditional name="addOpt">
104 <param name="options" type="select" label="Standard Options">
105 <option value="default" selected="true">Default</option>
106 <option value="edit">Edit</option>
107 </param>
108 <when value="edit">
109 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
110 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
111 </param>
112 <param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster">
113 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
114 </param>
115 <param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster">
116 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
117 </param>
118 <param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" >
119 <validator type="in_range" min="2" message="Please enter a value >= 1"/>
120 </param>
121 <param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" >
122 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
123 </param>
124 <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" >
125 <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/>
126 </param>
127 </when>
128 <when value="default">
129 </when>
130 </conditional>
131 <param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" />
132 </inputs>
133 <outputs>
134 <data format="gtf" name="output" label="${label.value}_clusters.gtf"/>
135 </outputs>
136 <help>
137
138 .. class:: infomark
139
140 **pyClusterReads**
141
142 pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates.
143 Produces a GTF output file with cluster intervals and overlapping genomic features.
144 It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates
145 The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files.
146
147 **NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron.
148 Use bedtools to extract those intervals that overlap with introns or other features
149
150 The maximum height of the cluster is indicated in column 8.
151 The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies.
152
153 For example::
154
155 # 114099S100.0
156
157 indicates that 100% of the nucleotides in position 114099 were substituted in the cluster.
158
159 An example of a pyClusterReads output file::
160
161 ##gff-version 2
162 # generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013
163 # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v
164 # chromosome feature source start end cDNAs strand height attributes
165 chrI cluster exon 112583 112643 6 - 5 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0;
166 chrI cluster exon 113176 113232 3 - 3 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0;
167 chrI cluster exon 113334 113386 2 - 2 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0;
168 chrI cluster exon 113534 113564 3 - 3 gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3;
169 chrI cluster exon 113644 113691 5 - 4 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0
170 chrI cluster exon 113912 113958 2 - 2 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0;
171 chrI cluster exon 113966 114066 5 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3;
172 chrI cluster exon 114067 114130 3 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0;
173
174 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
175
176 ------
177
178 **Parameter list**
179
180
181 File input options::
182
183 -f reads.gtf, --input_file=reads.gtf
184 provide the path to your GTF read data file. NOTE the
185 file has to be correctly sorted! If you used
186 pyReadCounters to generate the file you should be
187 fine. If you modified it, use the sort command
188 described in the manual to sort your file first by
189 chromosome, then by strand and then by start position.
190 -o clusters.gtf, --output_file=clusters.gtf
191 provide a name for an output file. By default it
192 writes to the standard output
193 --gtf=Yourfavoritegtf.gtf
194 type the path to the gtf annotation file that you want
195 to use
196
197 Common pyCRAC options::
198
199 -r 100, --range=100
200 allows you to set the length of the UTR regions. If
201 you set '-r 50' or '--range=50', then the program will
202 set a fixed length (50 bp) regardless of whether the
203 GTF annotation file has genes with annotated UTRs.
204 -a protein_coding, --annotation=protein_coding
205 select which annotation (i.e. protein_coding, ncRNA,
206 sRNA, rRNA,snoRNA,snRNA, depending on the source of
207 your GTF file) you would like to focus your analysis
208 on. Default = all annotations
209
210 Options for cluster analysis::
211
212 --cic=2, --cdnasinclusters=2
213 sets the minimal number of overlapping cDNAs in each
214 cluster. Default = 2
215 --co=5, --clusteroverlap=5
216 sets the number of nucleotides cDNA sequences have to
217 overlap to form a cluster. Default = 1 nucleotide
218 --ch=5, --clusterheight=5
219 sets the minimal height of the cluster. Default = 2
220 nucleotides
221 --cl=100, --clusterlength=100
222 to set the maximum cluster sequence length
223 --mutsfreq=10, --mutationfrequency=10
224 sets the minimal mutations frequency for a cluster
225 position in the GTF output file. Default = 0%.
226 Example: if the mutsfrequency is set at 10 and a
227 cluster position has a mutated in less than 10% of the
228 reads, then the mutation will not be reported.
229 </help>
230 </tool>