| 0 | 1 <tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True"> | 
|  | 2 	<requirements> | 
|  | 3         	<requirement type="package">pyCRAC</requirement> | 
|  | 4     	</requirements> | 
|  | 5 	<command interpreter="python"> | 
|  | 6 	/usr/local/bin/pyClusterReads.py | 
|  | 7 	-f $input | 
|  | 8 	--gtf=$addGTF.gtf | 
|  | 9         #if $addGTF.annotate.annotations != "all": | 
|  | 10            #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": | 
|  | 11                  --annotation=$addGTF.annotate.scan.annotation | 
|  | 12            #else: | 
|  | 13                 --annotation=$addGTF.annotate.annotation | 
|  | 14            #end if# | 
|  | 15 	#end if# | 
|  | 16 	-o $output | 
|  | 17 	#if $addOpt.options == "edit": | 
|  | 18 		--range=$addOpt.range | 
|  | 19 		--cic=$addOpt.cic | 
|  | 20 		--co=$addOpt.co | 
|  | 21 		--ch=$addOpt.ch | 
|  | 22 		--cl=$addOpt.cl | 
|  | 23 		--mutsfreq=$addOpt.mutsfreq | 
|  | 24 	#end if# | 
|  | 25 	</command> | 
|  | 26 	<version_command>/usr/local/bin/pyClusterReads.py --version</version_command> | 
|  | 27 	<inputs> | 
|  | 28 	    <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/> | 
|  | 29 	    <conditional name="addGTF"> | 
|  | 30             <param name="gtfFile" type="select"  label="Choose GTF File from"> | 
|  | 31                 <option value="default" selected="true">Defaults</option> | 
|  | 32                 <option value="other">History</option> | 
|  | 33             </param> | 
|  | 34             <when value="default"> | 
|  | 35                 <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> | 
|  | 36                     <options from_data_table="pycrac_gtf"/> | 
|  | 37                 </param> | 
|  | 38                 <conditional name="annotate"> | 
|  | 39                 <param name="annotations" type="select"  label="Select annotation"> | 
|  | 40                     <option value="all" selected="true">All</option> | 
|  | 41                     <option value="manual">Enter in text box</option> | 
|  | 42                     <option value="auto">Scan pyGetGTFSources file</option> | 
|  | 43                 </param> | 
|  | 44                 <when value="all"> | 
|  | 45                     <param name="annotation" type="hidden" format="txt" size="10" value="all"/> | 
|  | 46                 </when> | 
|  | 47                 <when value="manual"> | 
|  | 48                     <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> | 
|  | 49                         <validator type="empty_field" message="Please enter a value"/> | 
|  | 50                     </param> | 
|  | 51                 </when> | 
|  | 52                 <when value="auto"> | 
|  | 53                     <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> | 
|  | 54                         <conditional name="scan"> | 
|  | 55                         <param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> | 
|  | 56                             <option value="wait" selected="true">Waiting</option> | 
|  | 57                             <option value="scanning">Go</option> | 
|  | 58                         </param> | 
|  | 59                         <when value="wait"> | 
|  | 60                         </when> | 
|  | 61                         <when value="scanning"> | 
|  | 62                         <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> | 
|  | 63                             <options from_dataset="gtf_annotation"> | 
|  | 64                                 <column name="name" index="0"/> | 
|  | 65                                 <column name="value" index="0"/> | 
|  | 66                             </options> | 
|  | 67                         </param> | 
|  | 68                         </when> | 
|  | 69                         </conditional> | 
|  | 70                 </when> | 
|  | 71                 </conditional> | 
|  | 72 | 
|  | 73             </when> | 
|  | 74             <when value="other"> | 
|  | 75                 <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> | 
|  | 76                 <conditional name="annotate"> | 
|  | 77                 <param name="annotations" type="select"  label="Select annotation"> | 
|  | 78                     <option value="all" selected="true">All</option> | 
|  | 79                     <option value="manual">Enter in text box</option> | 
|  | 80                     <option value="auto">Scan selected file</option> | 
|  | 81                 </param> | 
|  | 82                 <when value="all"> | 
|  | 83                     <param name="annotation" type="hidden" format="txt" size="10" value="all"/> | 
|  | 84                 </when> | 
|  | 85                 <when value="manual"> | 
|  | 86                     <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> | 
|  | 87                         <validator type="empty_field" message="Please enter a value"/> | 
|  | 88                     </param> | 
|  | 89                 </when> | 
|  | 90                 <when value="auto"> | 
|  | 91                     <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> | 
|  | 92                         <options from_dataset="gtf"> | 
|  | 93                             <column name="name" index="1"/> | 
|  | 94                             <column name="value" index="1"/> | 
|  | 95                             <filter type="unique_value" name="unique" column="1"/> | 
|  | 96                         </options> | 
|  | 97                     </param> | 
|  | 98                 </when> | 
|  | 99                 </conditional> | 
|  | 100             </when> | 
|  | 101 	  </conditional> | 
|  | 102 | 
|  | 103 	  <conditional name="addOpt"> | 
|  | 104 		<param name="options" type="select"  label="Standard Options"> | 
|  | 105 			<option value="default" selected="true">Default</option> | 
|  | 106 			<option value="edit">Edit</option> | 
|  | 107 		</param> | 
|  | 108 		<when value="edit"> | 
|  | 109             <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> | 
|  | 110                 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> | 
|  | 111             </param> | 
|  | 112             <param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster"> | 
|  | 113                 <validator type="in_range" min="1" message="Please enter a value >= 1"/> | 
|  | 114             </param> | 
|  | 115 			<param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster"> | 
|  | 116                 <validator type="in_range" min="1" message="Please enter a value >= 1"/> | 
|  | 117 			</param> | 
|  | 118 			<param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" > | 
|  | 119 				<validator type="in_range" min="2" message="Please enter a value >= 1"/> | 
|  | 120 			</param> | 
|  | 121 			<param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" > | 
|  | 122 				<validator type="in_range" min="1" message="Please enter a value >= 1"/> | 
|  | 123 			</param> | 
|  | 124                 <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" > | 
|  | 125                     <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/> | 
|  | 126                 </param> | 
|  | 127 		</when> | 
|  | 128 		<when value="default"> | 
|  | 129 		</when> | 
|  | 130 	</conditional> | 
|  | 131 	<param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" /> | 
|  | 132 	</inputs> | 
|  | 133 	<outputs> | 
|  | 134 		<data format="gtf" name="output" label="${label.value}_clusters.gtf"/> | 
|  | 135 	</outputs> | 
|  | 136 	<help> | 
|  | 137 | 
|  | 138 .. class:: infomark | 
|  | 139 | 
|  | 140 **pyClusterReads** | 
|  | 141 | 
|  | 142 pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates. | 
|  | 143 Produces a GTF output file with cluster intervals and overlapping genomic features. | 
|  | 144 It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates | 
|  | 145 The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files. | 
|  | 146 | 
|  | 147 **NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron. | 
|  | 148 Use bedtools to extract those intervals that overlap with introns or other features | 
|  | 149 | 
|  | 150 The maximum height of the cluster is indicated in column 8. | 
|  | 151 The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. | 
|  | 152 | 
|  | 153 For example:: | 
|  | 154 | 
|  | 155     # 114099S100.0 | 
|  | 156 | 
|  | 157 indicates that 100% of the nucleotides in position 114099 were substituted in the cluster. | 
|  | 158 | 
|  | 159 An example of a pyClusterReads output file:: | 
|  | 160 | 
|  | 161     ##gff-version 2 | 
|  | 162     # generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013 | 
|  | 163     # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v | 
|  | 164     # chromosome    feature source  start   end     cDNAs   strand  height  attributes | 
|  | 165     chrI    cluster exon    112583  112643  6       -       5   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0; | 
|  | 166     chrI    cluster exon    113176  113232  3       -       3   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0; | 
|  | 167     chrI    cluster exon    113334  113386  2       -       2   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0; | 
|  | 168     chrI    cluster exon    113534  113564  3       -       3   gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3; | 
|  | 169     chrI    cluster exon    113644  113691  5       -       4   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0 | 
|  | 170     chrI    cluster exon    113912  113958  2       -       2   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0; | 
|  | 171     chrI    cluster exon    113966  114066  5       -       3   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3; | 
|  | 172     chrI    cluster exon    114067  114130  3       -       3   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0; | 
|  | 173 | 
|  | 174 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html | 
|  | 175 | 
|  | 176 ------ | 
|  | 177 | 
|  | 178 **Parameter list** | 
|  | 179 | 
|  | 180 | 
|  | 181 File input options:: | 
|  | 182 | 
|  | 183     -f reads.gtf, --input_file=reads.gtf | 
|  | 184                                   provide the path to your GTF read data file. NOTE the | 
|  | 185 				  file has to be correctly sorted! If you used | 
|  | 186 				  pyReadCounters to generate the file you should be | 
|  | 187 				  fine. If you modified it, use the sort command | 
|  | 188 				  described in the manual to sort your file first by | 
|  | 189 				  chromosome, then by strand and then by start position. | 
|  | 190     -o clusters.gtf, --output_file=clusters.gtf | 
|  | 191                                   provide a name for an output file. By default it | 
|  | 192 				  writes to the standard output | 
|  | 193     --gtf=Yourfavoritegtf.gtf | 
|  | 194                                   type the path to the gtf annotation file that you want | 
|  | 195 				  to use | 
|  | 196 | 
|  | 197 Common pyCRAC options:: | 
|  | 198 | 
|  | 199     -r 100, --range=100 | 
|  | 200                                   allows you to set the length of the UTR regions. If | 
|  | 201 				  you set '-r 50' or '--range=50', then the program will | 
|  | 202 				  set a fixed length (50 bp) regardless of whether the | 
|  | 203 				  GTF annotation file has genes with annotated UTRs. | 
|  | 204     -a protein_coding, --annotation=protein_coding | 
|  | 205                                   select which annotation (i.e. protein_coding, ncRNA, | 
|  | 206 				  sRNA, rRNA,snoRNA,snRNA, depending on the source of | 
|  | 207 				  your GTF file) you would like to focus your analysis | 
|  | 208 				  on. Default = all annotations | 
|  | 209 | 
|  | 210 Options for cluster analysis:: | 
|  | 211 | 
|  | 212     --cic=2, --cdnasinclusters=2 | 
|  | 213                                   sets the minimal number of overlapping cDNAs in each | 
|  | 214 				  cluster. Default = 2 | 
|  | 215     --co=5, --clusteroverlap=5 | 
|  | 216                                   sets the number of nucleotides cDNA sequences have to | 
|  | 217 				  overlap to form a cluster. Default = 1 nucleotide | 
|  | 218     --ch=5, --clusterheight=5 | 
|  | 219                                   sets the minimal height of the cluster. Default = 2 | 
|  | 220 				  nucleotides | 
|  | 221     --cl=100, --clusterlength=100 | 
|  | 222                                   to set the maximum cluster sequence length | 
|  | 223     --mutsfreq=10, --mutationfrequency=10 | 
|  | 224                                   sets the minimal mutations frequency for a cluster | 
|  | 225 				  position in the GTF output file. Default = 0%. | 
|  | 226 				  Example: if the mutsfrequency is set at 10 and a | 
|  | 227 				  cluster position has a mutated in less than 10% of the | 
|  | 228 				  reads, then the mutation will not be reported. | 
|  | 229 	</help> | 
|  | 230 </tool> |