Mercurial > repos > swebb > pycrac
view pyCRAC/pyCalculateFDRs.xml @ 1:7c9574213c0a draft default tip
Uploaded
author | swebb |
---|---|
date | Thu, 20 Jun 2013 12:13:43 -0400 |
parents | 19b20927172d |
children |
line wrap: on
line source
<tool id ="pyCalculateFDRs" name="pyCalculateFDRs"> <requirements> <requirement type="package">pyCRAC</requirement> </requirements> <command interpreter="python"> /usr/local/bin/pyCalculateFDRs.py -f $ftype.input --file_type $ftype.file_type --gtf=$addGTF.gtf #if $addGTF.annotate.annotations != "all": #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": --annotation $addGTF.annotate.scan.annotation #else: --annotation $addGTF.annotate.annotation #end if# #end if# --chromfile=$addChr.chr #if $addOpt.options == "edit" -s $addOpt.sequence --min $addOpt.min --minfdr $addOpt.minfdr --iterations=$addOpt.iterations --range $addOpt.range #end if# -o $output </command> <version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command> <inputs> <conditional name="ftype"> <param name="file_type" type="select" label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates"> <option value="gff" selected="true">GFF</option> <option value="bed">Bed6</option> <option value="gtf">GTF</option> </param> <when value="gff"> <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" /> </when> <when value="gtf"> <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" /> </when> <when value="bed"> <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" /> </when> </conditional> <conditional name="addChr"> <param name="chrfile" type="select" label="Choose Chromosome length file from"> <option value="default" selected="true">Defaults</option> <option value="other">History</option> </param> <when value="default"> <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"> <options from_data_table="pycrac_chr"/> </param> </when> <when value="other"> <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/> </when> </conditional> <conditional name="addGTF"> <param name="gtfFile" type="select" label="Choose GTF File from"> <option value="default" selected="true">Defaults</option> <option value="other">History</option> </param> <when value="default"> <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> <options from_data_table="pycrac_gtf"/> </param> <conditional name="annotate"> <param name="annotations" type="select" label="Select annotation"> <option value="all" selected="true">All</option> <option value="manual">Enter in text box</option> <option value="auto">Scan pyGetGTFSources file</option> </param> <when value="all"> <param name="annotation" type="hidden" format="txt" size="10" value="all"/> </when> <when value="manual"> <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> <validator type="empty_field" message="Please enter a value"/> </param> </when> <when value="auto"> <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> <conditional name="scan"> <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> <option value="wait" selected="true">Waiting</option> <option value="scanning">Go</option> </param> <when value="wait"> </when> <when value="scanning"> <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> <options from_dataset="gtf_annotation"> <column name="name" index="0"/> <column name="value" index="0"/> </options> </param> </when> </conditional> </when> </conditional> </when> <when value="other"> <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> <conditional name="annotate"> <param name="annotations" type="select" label="Select annotation"> <option value="all" selected="true">All</option> <option value="manual">Enter in text box</option> <option value="auto">Scan selected file</option> </param> <when value="all"> <param name="annotation" type="hidden" format="txt" size="10" value="all"/> </when> <when value="manual"> <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> <validator type="empty_field" message="Please enter a value"/> </param> </when> <when value="auto"> <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> <options from_dataset="gtf"> <column name="name" index="1"/> <column name="value" index="1"/> <filter type="unique_value" name="unique" column="1"/> </options> </param> </when> </conditional> </when> </conditional> <conditional name="addOpt"> <param name="options" type="select" label="Standard options"> <option value="default" selected="true">Default</option> <option value="edit">Edit</option> </param> <when value="edit"> <param name="sequence" type="select" label="Align reads to --sequence"> <option value="genomic" selected="true">Genomic Sequence</option> <option value="coding">Coding Sequence</option> </param> <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region"> <validator type="in_range" min="1" message="Please enter a value >= 1"/> </param> <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data"> <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/> </param> <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates"> <validator type="in_range" min="0" message="Please enter a value >= 0"/> </param> <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> </param> </when> <when value="default"> </when> </conditional> <param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" /> </inputs> <outputs> <data format="gtf" name="output" label="${label.value}.gtf"/> </outputs> <help> .. class:: infomark **pyCalculateFDRs** By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched. The tool reports significant intervals in the GTF format and reports overlapping genomic features. Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool **NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron. Use bedtools to extract those intervals that overlap with introns or other features Example of an output file:: ##gff-version 2 # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013 # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05 # chromosome feature source start end minimal_coverage strand . attributes chrI protein_coding exon 140846 140860 5 - . gene_id "YAL005C"; gene_name "SSA1"; chrI intergenic_region exon 223118 223164 4 - . gene_id "INT_0_179"; gene_name "INT_0_179"; chrI intergenic_region exon 71889 71922 3 + . gene_id "INT_0_94"; gene_name "INT_0_94"; chrII intergenic_region exon 296127 296158 3 - . gene_id "INT_0_365"; gene_name "INT_0_365"; chrII intergenic_region exon 680697 680722 4 - . gene_id "INT_0_626"; gene_name "INT_0_626"; chrII intergenic_region exon 680827 680846 4 - . gene_id "INT_0_626"; gene_name "INT_0_626"; chrII snRNA exon 680827 680838 5 - . gene_id "LSR1"; gene_name "LSR1"; chrII snRNA exon 680951 681001 5 - . gene_id "LSR1"; gene_name "LSR1"; chrII intergenic_region exon 577985 577996 3 - . gene_id "INT_0_556"; gene_name "INT_0_556"; chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1"; chrII protein_coding exon 296127 296158 3 - . gene_id "YBR028C"; gene_name "YBR028C"; pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs). .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html ------ **Parameter list** Options:: -f read_file, --readdatafile=read_file Name of the bed/gff/gtf file containing the read/cDNA coordinates --file_type=FILE_TYPE this tool supports bed6, gtf and gff input files. Please select from 'bed','gtf' or 'gff'. Default=gtf -o outfile.gtf, --outfile=outfile.gtf Optional. Provide the name of the output file. Default is 'selected_intervals.gtf' -r 100, --range=100 allows you to set the length of the UTR regions. If you set '-r 50' or '--range=50', then the program will set a fixed length (50 bp) regardless of whether the GTF file has genes with annotated UTRs. -a protein_coding, --annotation=protein_coding select which annotation (i.e. protein_coding, ncRNA, sRNA, rRNA,snoRNA,snRNA, depending on the source of your GTF file) you would like to focus your analysis on. Default = all annotations -c yeast.txt, --chromfile=yeast.txt Location of the chromosome info file. This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Default is yeast --gtf=yeast.gtf Name of the annotation file. Default is /usr/local/pyC RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf -m MINFDR, --minfdr=MINFDR To set a minimal FDR threshold for filtering interval data. Default is 0.05 --min=MIN to set a minimal read coverages for a region. Regions with coverage less than minimum will be ignoredve an FDR of zero --iterations=ITERATIONS to set the number of iterations for randomization of read coordinates. Default=100 </help> </tool>