diff pyCRAC/pyClusterReads.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyClusterReads.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,230 @@
+<tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python">
+	/usr/local/bin/pyClusterReads.py
+	-f $input
+	--gtf=$addGTF.gtf
+        #if $addGTF.annotate.annotations != "all":
+           #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+                 --annotation=$addGTF.annotate.scan.annotation
+           #else:
+                --annotation=$addGTF.annotate.annotation
+           #end if#
+	#end if#
+	-o $output
+	#if $addOpt.options == "edit":
+		--range=$addOpt.range
+		--cic=$addOpt.cic
+		--co=$addOpt.co
+		--ch=$addOpt.ch
+		--cl=$addOpt.cl
+		--mutsfreq=$addOpt.mutsfreq
+	#end if#
+	</command>
+	<version_command>/usr/local/bin/pyClusterReads.py --version</version_command>
+	<inputs>
+	    <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/>         
+	    <conditional name="addGTF">
+            <param name="gtfFile" type="select"  label="Choose GTF File from">
+                <option value="default" selected="true">Defaults</option>
+                <option value="other">History</option>
+            </param>        
+            <when value="default">
+                <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+                    <options from_data_table="pycrac_gtf"/>
+                </param>
+                <conditional name="annotate">
+                <param name="annotations" type="select"  label="Select annotation">
+                    <option value="all" selected="true">All</option>
+                    <option value="manual">Enter in text box</option>
+                    <option value="auto">Scan pyGetGTFSources file</option>
+                </param>        
+                <when value="all">
+                    <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+                </when>
+                <when value="manual">
+                    <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+                        <validator type="empty_field" message="Please enter a value"/>                  
+                    </param>
+                </when>
+                <when value="auto">
+                    <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>       
+                        <conditional name="scan">
+                        <param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+                            <option value="wait" selected="true">Waiting</option>
+                            <option value="scanning">Go</option>
+                        </param>        
+                        <when value="wait">
+                        </when>
+                        <when value="scanning">
+                        <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+                            <options from_dataset="gtf_annotation">
+                                <column name="name" index="0"/>
+                                <column name="value" index="0"/>
+                            </options>
+                        </param>      
+                        </when>
+                        </conditional>
+                </when>
+                </conditional>
+
+            </when>
+            <when value="other">
+                <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+                <conditional name="annotate">
+                <param name="annotations" type="select"  label="Select annotation">
+                    <option value="all" selected="true">All</option>
+                    <option value="manual">Enter in text box</option>
+                    <option value="auto">Scan selected file</option>
+                </param>        
+                <when value="all">
+                    <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+                </when>
+                <when value="manual">
+                    <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+                        <validator type="empty_field" message="Please enter a value"/>                  
+                    </param>
+                </when>
+                <when value="auto">
+                    <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+                        <options from_dataset="gtf">
+                            <column name="name" index="1"/>
+                            <column name="value" index="1"/>
+                            <filter type="unique_value" name="unique" column="1"/>
+                        </options>
+                    </param>      
+                </when>
+                </conditional>
+            </when>
+	  </conditional>
+
+	  <conditional name="addOpt">
+		<param name="options" type="select"  label="Standard Options">
+			<option value="default" selected="true">Default</option>
+			<option value="edit">Edit</option>
+		</param>	
+		<when value="edit">
+            <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+                <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+            </param>
+            <param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster">
+                <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+            </param>
+			<param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster">
+                <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+			</param>
+			<param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" >
+				<validator type="in_range" min="2" message="Please enter a value >= 1"/>
+			</param>
+			<param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" >
+				<validator type="in_range" min="1" message="Please enter a value >= 1"/>
+			</param>
+                <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" >
+                    <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/>
+                </param>
+		</when>
+		<when value="default">
+		</when>
+	</conditional>	
+	<param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="gtf" name="output" label="${label.value}_clusters.gtf"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyClusterReads**
+
+pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates. 
+Produces a GTF output file with cluster intervals and overlapping genomic features.
+It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates
+The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files.
+
+**NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron.
+Use bedtools to extract those intervals that overlap with introns or other features
+
+The maximum height of the cluster is indicated in column 8. 
+The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. 
+
+For example::
+    
+    # 114099S100.0 
+
+indicates that 100% of the nucleotides in position 114099 were substituted in the cluster.
+
+An example of a pyClusterReads output file::
+
+    ##gff-version 2
+    # generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013
+    # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v
+    # chromosome    feature source  start   end     cDNAs   strand  height  attributes
+    chrI    cluster exon    112583  112643  6       -       5   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0;
+    chrI    cluster exon    113176  113232  3       -       3   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0;
+    chrI    cluster exon    113334  113386  2       -       2   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0;
+    chrI    cluster exon    113534  113564  3       -       3   gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3;
+    chrI    cluster exon    113644  113691  5       -       4   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0
+    chrI    cluster exon    113912  113958  2       -       2   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0;
+    chrI    cluster exon    113966  114066  5       -       3   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3;
+    chrI    cluster exon    114067  114130  3       -       3   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0;
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html                                                                          
+
+------
+
+**Parameter list**  
+
+
+File input options::
+
+    -f reads.gtf, --input_file=reads.gtf
+                                  provide the path to your GTF read data file. NOTE the
+				  file has to be correctly sorted! If you used
+				  pyReadCounters to generate the file you should be
+				  fine. If you modified it, use the sort command
+				  described in the manual to sort your file first by
+				  chromosome, then by strand and then by start position.
+    -o clusters.gtf, --output_file=clusters.gtf
+                                  provide a name for an output file. By default it
+				  writes to the standard output
+    --gtf=Yourfavoritegtf.gtf
+                                  type the path to the gtf annotation file that you want
+				  to use
+
+Common pyCRAC options::
+
+    -r 100, --range=100
+                                  allows you to set the length of the UTR regions. If
+				  you set '-r 50' or '--range=50', then the program will
+				  set a fixed length (50 bp) regardless of whether the
+				  GTF annotation file has genes with annotated UTRs.
+    -a protein_coding, --annotation=protein_coding
+                                  select which annotation (i.e. protein_coding, ncRNA,
+				  sRNA, rRNA,snoRNA,snRNA, depending on the source of
+				  your GTF file) you would like to focus your analysis
+				  on. Default = all annotations
+
+Options for cluster analysis::
+
+    --cic=2, --cdnasinclusters=2
+                                  sets the minimal number of overlapping cDNAs in each
+				  cluster. Default = 2
+    --co=5, --clusteroverlap=5
+                                  sets the number of nucleotides cDNA sequences have to
+				  overlap to form a cluster. Default = 1 nucleotide
+    --ch=5, --clusterheight=5
+                                  sets the minimal height of the cluster. Default = 2
+				  nucleotides
+    --cl=100, --clusterlength=100
+                                  to set the maximum cluster sequence length
+    --mutsfreq=10, --mutationfrequency=10
+                                  sets the minimal mutations frequency for a cluster
+				  position in the GTF output file. Default = 0%.
+				  Example: if the mutsfrequency is set at 10 and a
+				  cluster position has a mutated in less than 10% of the
+				  reads, then the mutation will not be reported.
+	</help>
+</tool>
\ No newline at end of file