Mercurial > repos > swebb > pycrac
diff pyCRAC/pyCalculateFDRs.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyCRAC/pyCalculateFDRs.xml Tue Jun 18 09:11:00 2013 -0400 @@ -0,0 +1,247 @@ + <tool id ="pyCalculateFDRs" name="pyCalculateFDRs"> + <requirements> + <requirement type="package">pyCRAC</requirement> + </requirements> + <command interpreter="python"> + /usr/local/bin/pyCalculateFDRs.py + -f $ftype.input + --file_type $ftype.file_type + --gtf=$addGTF.gtf + + #if $addGTF.annotate.annotations != "all": + #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": + --annotation $addGTF.annotate.scan.annotation + #else: + --annotation $addGTF.annotate.annotation + #end if# + #end if# + --chromfile=$addChr.chr + #if $addOpt.options == "edit" + -s $addOpt.sequence + --min $addOpt.min + --minfdr $addOpt.minfdr + --iterations=$addOpt.iterations + --range $addOpt.range + #end if# + -o $output + + </command> + <version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command> + <inputs> + <conditional name="ftype"> + <param name="file_type" type="select" label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates"> + <option value="gff" selected="true">GFF</option> + <option value="bed">Bed6</option> + <option value="gtf">GTF</option> + </param> + <when value="gff"> + <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" /> + </when> + <when value="gtf"> + <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" /> + </when> + <when value="bed"> + <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" /> + </when> + </conditional> + + <conditional name="addChr"> + <param name="chrfile" type="select" label="Choose Chromosome length file from"> + <option value="default" selected="true">Defaults</option> + <option value="other">History</option> + </param> + <when value="default"> + <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"> + <options from_data_table="pycrac_chr"/> + </param> + </when> + <when value="other"> + <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/> + </when> + </conditional> + + <conditional name="addGTF"> + <param name="gtfFile" type="select" label="Choose GTF File from"> + <option value="default" selected="true">Defaults</option> + <option value="other">History</option> + </param> + <when value="default"> + <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> + <options from_data_table="pycrac_gtf"/> + </param> + <conditional name="annotate"> + <param name="annotations" type="select" label="Select annotation"> + <option value="all" selected="true">All</option> + <option value="manual">Enter in text box</option> + <option value="auto">Scan pyGetGTFSources file</option> + </param> + <when value="all"> + <param name="annotation" type="hidden" format="txt" size="10" value="all"/> + </when> + <when value="manual"> + <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> + <validator type="empty_field" message="Please enter a value"/> + </param> + </when> + <when value="auto"> + <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> + <conditional name="scan"> + <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> + <option value="wait" selected="true">Waiting</option> + <option value="scanning">Go</option> + </param> + <when value="wait"> + </when> + <when value="scanning"> + <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> + <options from_dataset="gtf_annotation"> + <column name="name" index="0"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + </when> + <when value="other"> + <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> + <conditional name="annotate"> + <param name="annotations" type="select" label="Select annotation"> + <option value="all" selected="true">All</option> + <option value="manual">Enter in text box</option> + <option value="auto">Scan selected file</option> + </param> + <when value="all"> + <param name="annotation" type="hidden" format="txt" size="10" value="all"/> + </when> + <when value="manual"> + <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> + <validator type="empty_field" message="Please enter a value"/> + </param> + </when> + <when value="auto"> + <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> + <options from_dataset="gtf"> + <column name="name" index="1"/> + <column name="value" index="1"/> + <filter type="unique_value" name="unique" column="1"/> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + <conditional name="addOpt"> + <param name="options" type="select" label="Standard options"> + <option value="default" selected="true">Default</option> + <option value="edit">Edit</option> + </param> + <when value="edit"> + <param name="sequence" type="select" label="Align reads to --sequence"> + <option value="genomic" selected="true">Genomic Sequence</option> + <option value="coding">Coding Sequence</option> + </param> + <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region"> + <validator type="in_range" min="1" message="Please enter a value >= 1"/> + </param> + <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data"> + <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/> + </param> + <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates"> + <validator type="in_range" min="0" message="Please enter a value >= 0"/> + </param> + <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> + <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> + </param> + </when> + <when value="default"> + </when> + </conditional> + <param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" /> + </inputs> + <outputs> + <data format="gtf" name="output" label="${label.value}.gtf"/> + </outputs> + <help> + +.. class:: infomark + +**pyCalculateFDRs** + +By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched. +The tool reports significant intervals in the GTF format and reports overlapping genomic features. +Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool + +**NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron. +Use bedtools to extract those intervals that overlap with introns or other features + +Example of an output file:: + + ##gff-version 2 + # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013 + # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05 + # chromosome feature source start end minimal_coverage strand . attributes + chrI protein_coding exon 140846 140860 5 - . gene_id "YAL005C"; gene_name "SSA1"; + chrI intergenic_region exon 223118 223164 4 - . gene_id "INT_0_179"; gene_name "INT_0_179"; + chrI intergenic_region exon 71889 71922 3 + . gene_id "INT_0_94"; gene_name "INT_0_94"; + chrII intergenic_region exon 296127 296158 3 - . gene_id "INT_0_365"; gene_name "INT_0_365"; + chrII intergenic_region exon 680697 680722 4 - . gene_id "INT_0_626"; gene_name "INT_0_626"; + chrII intergenic_region exon 680827 680846 4 - . gene_id "INT_0_626"; gene_name "INT_0_626"; + chrII snRNA exon 680827 680838 5 - . gene_id "LSR1"; gene_name "LSR1"; + chrII snRNA exon 680951 681001 5 - . gene_id "LSR1"; gene_name "LSR1"; + chrII intergenic_region exon 577985 577996 3 - . gene_id "INT_0_556"; gene_name "INT_0_556"; + chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1"; + chrII protein_coding exon 296127 296158 3 - . gene_id "YBR028C"; gene_name "YBR028C"; + + +pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs). + + +.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html + +------ + +**Parameter list** + +Options:: + + -f read_file, --readdatafile=read_file + Name of the bed/gff/gtf file containing the read/cDNA + coordinates + --file_type=FILE_TYPE + this tool supports bed6, gtf and gff input files. + Please select from 'bed','gtf' or 'gff'. Default=gtf + -o outfile.gtf, --outfile=outfile.gtf + Optional. Provide the name of the output file. Default + is 'selected_intervals.gtf' + -r 100, --range=100 + allows you to set the length of the UTR regions. If + you set '-r 50' or '--range=50', then the program will + set a fixed length (50 bp) regardless of whether the + GTF file has genes with annotated UTRs. + -a protein_coding, --annotation=protein_coding + select which annotation (i.e. protein_coding, ncRNA, + sRNA, rRNA,snoRNA,snRNA, depending on the source of + your GTF file) you would like to focus your analysis + on. Default = all annotations + -c yeast.txt, --chromfile=yeast.txt + Location of the chromosome info file. This file should + have two columns: first column is the names of the + chromosomes, second column is length of the + chromosomes. Default is yeast + --gtf=yeast.gtf + Name of the annotation file. Default is /usr/local/pyC + RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf + -m MINFDR, --minfdr=MINFDR + To set a minimal FDR threshold for filtering interval + data. Default is 0.05 + --min=MIN + to set a minimal read coverages for a region. Regions + with coverage less than minimum will be ignoredve an + FDR of zero + --iterations=ITERATIONS + to set the number of iterations for randomization of + read coordinates. Default=100 + </help> +</tool>