view pyCRAC/pyCalculateFDRs.xml @ 1:7c9574213c0a draft default tip

Uploaded
author swebb
date Thu, 20 Jun 2013 12:13:43 -0400
parents 19b20927172d
children
line wrap: on
line source

 <tool id ="pyCalculateFDRs" name="pyCalculateFDRs">
	<requirements>
        	<requirement type="package">pyCRAC</requirement>
    	</requirements>
	<command interpreter="python"> 
	/usr/local/bin/pyCalculateFDRs.py
	-f $ftype.input
        --file_type $ftype.file_type
	--gtf=$addGTF.gtf

	#if $addGTF.annotate.annotations != "all":
	   #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
	       --annotation $addGTF.annotate.scan.annotation
	   #else:
		--annotation $addGTF.annotate.annotation
	   #end if#
	#end if#
	--chromfile=$addChr.chr
	#if $addOpt.options == "edit"
                -s $addOpt.sequence
	        --min $addOpt.min                                                                                                                          
                --minfdr $addOpt.minfdr                                                                                                                    
                --iterations=$addOpt.iterations  
                --range $addOpt.range
	#end if#
	-o $output

	</command>
	<version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command>
	<inputs>
        <conditional name="ftype">
        <param name="file_type" type="select"  label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates">
            <option value="gff" selected="true">GFF</option>
            <option value="bed">Bed6</option>
            <option value="gtf">GTF</option>
        </param>
        <when value="gff">
            <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" />
        </when>
            <when value="gtf">
                <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" />
            </when>
            <when value="bed">
                <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" />
            </when>
        </conditional>
	  
        <conditional name="addChr">
        <param name="chrfile" type="select"  label="Choose Chromosome length file from">
            <option value="default" selected="true">Defaults</option>
            <option value="other">History</option>
        </param>
        <when value="default">
            <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes">
        <options from_data_table="pycrac_chr"/>
            </param>
        </when>
        <when value="other">
            <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/>
        </when>
        </conditional>

	    <conditional name="addGTF">
		    <param name="gtfFile" type="select"  label="Choose GTF File from">
		        <option value="default" selected="true">Defaults</option>
		        <option value="other">History</option>
		  </param>	
		  <when value="default">
		    <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
		      <options from_data_table="pycrac_gtf"/>
		    </param>
		    <conditional name="annotate">
		      <param name="annotations" type="select"  label="Select annotation">
                <option value="all" selected="true">All</option>
                <option value="manual">Enter in text box</option>
                <option value="auto">Scan pyGetGTFSources file</option>
		      </param>	
		      <when value="all">
			    <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
		      </when>
		      <when value="manual">
			    <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
			      <validator type="empty_field" message="Please enter a value"/>			
			    </param>
		      </when>
		      <when value="auto">
			<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>	
			<conditional name="scan">
			  <param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
			    <option value="wait" selected="true">Waiting</option>
			    <option value="scanning">Go</option>
			  </param>	
			  <when value="wait">
			  </when>
			  <when value="scanning">
			    <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
			      <options from_dataset="gtf_annotation">
				<column name="name" index="0"/>
				<column name="value" index="0"/>
			      </options>
			    </param>      
			  </when>
			</conditional>
		      </when>
		    </conditional>		
		  </when>
		  <when value="other">
		    <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
		    <conditional name="annotate">
		      <param name="annotations" type="select"  label="Select annotation">
			<option value="all" selected="true">All</option>
			<option value="manual">Enter in text box</option>
			<option value="auto">Scan selected file</option>
		      </param>	
		      <when value="all">
			<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
		      </when>
		      <when value="manual">
			<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
			  <validator type="empty_field" message="Please enter a value"/>			
			</param>
		      </when>
		      <when value="auto">
			<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
			  <options from_dataset="gtf">
			    <column name="name" index="1"/>
			    <column name="value" index="1"/>
			    <filter type="unique_value" name="unique" column="1"/>
			  </options>
			</param>      
		      </when>
		    </conditional>
		  </when>
		</conditional>
		<conditional name="addOpt">
		  <param name="options" type="select"  label="Standard options">
		    <option value="default" selected="true">Default</option>
		    <option value="edit">Edit</option>
		  </param>
		  <when value="edit">
		    <param name="sequence" type="select" label="Align reads to --sequence">
		      <option value="genomic" selected="true">Genomic Sequence</option>
		      <option value="coding">Coding Sequence</option>
		    </param>
		    <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region">
		      <validator type="in_range" min="1" message="Please enter a value >= 1"/>
		    </param>
		    <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data">
		      <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/>
		    </param>
		    <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates">
		      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
		    </param>
		    <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
		      <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
		    </param>
		  </when>
		  <when value="default">
		  </when>
		</conditional>
		<param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" />
	</inputs>
	<outputs>
		<data format="gtf" name="output" label="${label.value}.gtf"/>
	</outputs>
	<help>

.. class:: infomark

**pyCalculateFDRs**

By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched.
The tool reports significant intervals in the GTF format and reports overlapping genomic features.
Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool

**NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron.
Use bedtools to extract those intervals that overlap with introns or other features

Example of an output file::

    ##gff-version 2
    # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
    # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
    # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
    chrI	protein_coding	exon	140846	140860	5	-	.	gene_id "YAL005C"; gene_name "SSA1"; 
    chrI	intergenic_region	exon	223118	223164	4	-	.	gene_id "INT_0_179"; gene_name "INT_0_179"; 
    chrI	intergenic_region	exon	71889	71922	3	+	.	gene_id "INT_0_94"; gene_name "INT_0_94"; 
    chrII	intergenic_region	exon	296127	296158	3	-	.	gene_id "INT_0_365"; gene_name "INT_0_365"; 
    chrII	intergenic_region	exon	680697	680722	4	-	.	gene_id "INT_0_626"; gene_name "INT_0_626"; 
    chrII	intergenic_region	exon	680827	680846	4	-	.	gene_id "INT_0_626"; gene_name "INT_0_626"; 
    chrII	snRNA	exon	680827	680838	5	-	.	gene_id "LSR1"; gene_name "LSR1"; 
    chrII	snRNA	exon	680951	681001	5	-	.	gene_id "LSR1"; gene_name "LSR1"; 
    chrII	intergenic_region	exon	577985	577996	3	-	.	gene_id "INT_0_556"; gene_name "INT_0_556"; 
    chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; 
    chrII	protein_coding	exon	296127	296158	3	-	.	gene_id "YBR028C"; gene_name "YBR028C"; 

 
pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs).

   
.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
        
------

**Parameter list**

Options::

  -f read_file, --readdatafile=read_file
                        Name of the bed/gff/gtf file containing the read/cDNA
                        coordinates
  --file_type=FILE_TYPE
                        this tool supports bed6, gtf and gff input files.
                        Please select from 'bed','gtf' or 'gff'. Default=gtf
  -o outfile.gtf, --outfile=outfile.gtf
                        Optional. Provide the name of the output file. Default
                        is 'selected_intervals.gtf'
  -r 100, --range=100   
                        allows you to set the length of the UTR regions. If
                        you set '-r 50' or '--range=50', then the program will
                        set a fixed length (50 bp) regardless of whether the
                        GTF file has genes with annotated UTRs.
  -a protein_coding, --annotation=protein_coding
                        select which annotation (i.e. protein_coding, ncRNA,
                        sRNA, rRNA,snoRNA,snRNA, depending on the source of
                        your GTF file) you would like to focus your analysis
                        on. Default = all annotations
  -c yeast.txt, --chromfile=yeast.txt
                        Location of the chromosome info file. This file should
                        have two columns: first column is the names of the
                        chromosomes, second column is length of the
                        chromosomes. Default is yeast
  --gtf=yeast.gtf       
                        Name of the annotation file. Default is /usr/local/pyC
                        RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf
  -m MINFDR, --minfdr=MINFDR
                        To set a minimal FDR threshold for filtering interval
                        data. Default is 0.05
  --min=MIN             
                        to set a minimal read coverages for a region. Regions
                        with coverage less than minimum will be ignoredve an
                        FDR of zero
  --iterations=ITERATIONS
                        to set the number of iterations for randomization of
                        read coordinates. Default=100
	</help>
</tool>