diff pyCRAC/pyCalculateFDRs.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCalculateFDRs.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,247 @@
+ <tool id ="pyCalculateFDRs" name="pyCalculateFDRs">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyCalculateFDRs.py
+	-f $ftype.input
+        --file_type $ftype.file_type
+	--gtf=$addGTF.gtf
+
+	#if $addGTF.annotate.annotations != "all":
+	   #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+	       --annotation $addGTF.annotate.scan.annotation
+	   #else:
+		--annotation $addGTF.annotate.annotation
+	   #end if#
+	#end if#
+	--chromfile=$addChr.chr
+	#if $addOpt.options == "edit"
+                -s $addOpt.sequence
+	        --min $addOpt.min                                                                                                                          
+                --minfdr $addOpt.minfdr                                                                                                                    
+                --iterations=$addOpt.iterations  
+                --range $addOpt.range
+	#end if#
+	-o $output
+
+	</command>
+	<version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command>
+	<inputs>
+        <conditional name="ftype">
+        <param name="file_type" type="select"  label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates">
+            <option value="gff" selected="true">GFF</option>
+            <option value="bed">Bed6</option>
+            <option value="gtf">GTF</option>
+        </param>
+        <when value="gff">
+            <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" />
+        </when>
+            <when value="gtf">
+                <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" />
+            </when>
+            <when value="bed">
+                <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" />
+            </when>
+        </conditional>
+	  
+        <conditional name="addChr">
+        <param name="chrfile" type="select"  label="Choose Chromosome length file from">
+            <option value="default" selected="true">Defaults</option>
+            <option value="other">History</option>
+        </param>
+        <when value="default">
+            <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes">
+        <options from_data_table="pycrac_chr"/>
+            </param>
+        </when>
+        <when value="other">
+            <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/>
+        </when>
+        </conditional>
+
+	    <conditional name="addGTF">
+		    <param name="gtfFile" type="select"  label="Choose GTF File from">
+		        <option value="default" selected="true">Defaults</option>
+		        <option value="other">History</option>
+		  </param>	
+		  <when value="default">
+		    <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+		      <options from_data_table="pycrac_gtf"/>
+		    </param>
+		    <conditional name="annotate">
+		      <param name="annotations" type="select"  label="Select annotation">
+                <option value="all" selected="true">All</option>
+                <option value="manual">Enter in text box</option>
+                <option value="auto">Scan pyGetGTFSources file</option>
+		      </param>	
+		      <when value="all">
+			    <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+		      </when>
+		      <when value="manual">
+			    <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+			      <validator type="empty_field" message="Please enter a value"/>			
+			    </param>
+		      </when>
+		      <when value="auto">
+			<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>	
+			<conditional name="scan">
+			  <param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+			    <option value="wait" selected="true">Waiting</option>
+			    <option value="scanning">Go</option>
+			  </param>	
+			  <when value="wait">
+			  </when>
+			  <when value="scanning">
+			    <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+			      <options from_dataset="gtf_annotation">
+				<column name="name" index="0"/>
+				<column name="value" index="0"/>
+			      </options>
+			    </param>      
+			  </when>
+			</conditional>
+		      </when>
+		    </conditional>		
+		  </when>
+		  <when value="other">
+		    <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+		    <conditional name="annotate">
+		      <param name="annotations" type="select"  label="Select annotation">
+			<option value="all" selected="true">All</option>
+			<option value="manual">Enter in text box</option>
+			<option value="auto">Scan selected file</option>
+		      </param>	
+		      <when value="all">
+			<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+		      </when>
+		      <when value="manual">
+			<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+			  <validator type="empty_field" message="Please enter a value"/>			
+			</param>
+		      </when>
+		      <when value="auto">
+			<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+			  <options from_dataset="gtf">
+			    <column name="name" index="1"/>
+			    <column name="value" index="1"/>
+			    <filter type="unique_value" name="unique" column="1"/>
+			  </options>
+			</param>      
+		      </when>
+		    </conditional>
+		  </when>
+		</conditional>
+		<conditional name="addOpt">
+		  <param name="options" type="select"  label="Standard options">
+		    <option value="default" selected="true">Default</option>
+		    <option value="edit">Edit</option>
+		  </param>
+		  <when value="edit">
+		    <param name="sequence" type="select" label="Align reads to --sequence">
+		      <option value="genomic" selected="true">Genomic Sequence</option>
+		      <option value="coding">Coding Sequence</option>
+		    </param>
+		    <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region">
+		      <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+		    </param>
+		    <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data">
+		      <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/>
+		    </param>
+		    <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates">
+		      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+		    </param>
+		    <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+		      <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+		    </param>
+		  </when>
+		  <when value="default">
+		  </when>
+		</conditional>
+		<param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="gtf" name="output" label="${label.value}.gtf"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyCalculateFDRs**
+
+By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched.
+The tool reports significant intervals in the GTF format and reports overlapping genomic features.
+Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool
+
+**NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron.
+Use bedtools to extract those intervals that overlap with introns or other features
+
+Example of an output file::
+
+    ##gff-version 2
+    # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
+    # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
+    # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
+    chrI	protein_coding	exon	140846	140860	5	-	.	gene_id "YAL005C"; gene_name "SSA1"; 
+    chrI	intergenic_region	exon	223118	223164	4	-	.	gene_id "INT_0_179"; gene_name "INT_0_179"; 
+    chrI	intergenic_region	exon	71889	71922	3	+	.	gene_id "INT_0_94"; gene_name "INT_0_94"; 
+    chrII	intergenic_region	exon	296127	296158	3	-	.	gene_id "INT_0_365"; gene_name "INT_0_365"; 
+    chrII	intergenic_region	exon	680697	680722	4	-	.	gene_id "INT_0_626"; gene_name "INT_0_626"; 
+    chrII	intergenic_region	exon	680827	680846	4	-	.	gene_id "INT_0_626"; gene_name "INT_0_626"; 
+    chrII	snRNA	exon	680827	680838	5	-	.	gene_id "LSR1"; gene_name "LSR1"; 
+    chrII	snRNA	exon	680951	681001	5	-	.	gene_id "LSR1"; gene_name "LSR1"; 
+    chrII	intergenic_region	exon	577985	577996	3	-	.	gene_id "INT_0_556"; gene_name "INT_0_556"; 
+    chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; 
+    chrII	protein_coding	exon	296127	296158	3	-	.	gene_id "YBR028C"; gene_name "YBR028C"; 
+
+ 
+pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs).
+
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  -f read_file, --readdatafile=read_file
+                        Name of the bed/gff/gtf file containing the read/cDNA
+                        coordinates
+  --file_type=FILE_TYPE
+                        this tool supports bed6, gtf and gff input files.
+                        Please select from 'bed','gtf' or 'gff'. Default=gtf
+  -o outfile.gtf, --outfile=outfile.gtf
+                        Optional. Provide the name of the output file. Default
+                        is 'selected_intervals.gtf'
+  -r 100, --range=100   
+                        allows you to set the length of the UTR regions. If
+                        you set '-r 50' or '--range=50', then the program will
+                        set a fixed length (50 bp) regardless of whether the
+                        GTF file has genes with annotated UTRs.
+  -a protein_coding, --annotation=protein_coding
+                        select which annotation (i.e. protein_coding, ncRNA,
+                        sRNA, rRNA,snoRNA,snRNA, depending on the source of
+                        your GTF file) you would like to focus your analysis
+                        on. Default = all annotations
+  -c yeast.txt, --chromfile=yeast.txt
+                        Location of the chromosome info file. This file should
+                        have two columns: first column is the names of the
+                        chromosomes, second column is length of the
+                        chromosomes. Default is yeast
+  --gtf=yeast.gtf       
+                        Name of the annotation file. Default is /usr/local/pyC
+                        RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf
+  -m MINFDR, --minfdr=MINFDR
+                        To set a minimal FDR threshold for filtering interval
+                        data. Default is 0.05
+  --min=MIN             
+                        to set a minimal read coverages for a region. Regions
+                        with coverage less than minimum will be ignoredve an
+                        FDR of zero
+  --iterations=ITERATIONS
+                        to set the number of iterations for randomization of
+                        read coordinates. Default=100
+	</help>
+</tool>