Mercurial > repos > swebb > pycrac
diff pyCRAC/pyCalculateMutationFrequencies.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyCRAC/pyCalculateMutationFrequencies.xml Tue Jun 18 09:11:00 2013 -0400 @@ -0,0 +1,126 @@ +<tool id ="pyCalculateMutationFrequencies" name="pyCalculateMutationFrequencies"> + <requirements> + <requirement type="package">pyCRAC</requirement> + </requirements> + <command interpreter="python"> + /usr/local/bin/pyCalculateMutationFrequencies.py + -r $readdatafile + -i $intervaldatafile + -c $addChr.chr + -o $output + --mutsfreq $mutsfreq + </command> + <version_command>/usr/local/bin/pyCalculateMutationFrequencies.py --version</version_command> + <inputs> + <param format="gff" name="readdatafile" type="data" label="GFF Reads File --readdatafile" help="GFF file containing read data" /> + <param format="gtf" name="intervaldatafile" type="data" label="GFF Interval File --intervaldatafile" help="GFF file containing interval co-ordinates"/> + <conditional name="addChr"> + <param name="chrfile" type="select" label="Choose Chromosome length file from"> + <option value="default" selected="true">Defaults</option> + <option value="other">History</option> + </param> + <when value="default"> + <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes.Use pyCrac utility pyCalculateChromosomeLengths to create."> + <options from_data_table="pycrac_chr"/> + </param> + </when> + <when value="other"> + <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"/> + </when> + </conditional> + + <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency --mutsfreq " value="0" size="10" help="sets the minimal mutations frequency for an interval that you want to have written to our output file"> + <validator type="in_range" min="0" message="Please enter a value >= 0"/> + </param> + <param name="label" type="text" format="txt" size="30" value="pyCalculateMutationFrequencies" label="Enter output file label -o" /> + </inputs> + <outputs> + <data format="gtf" name="output" label="${label.value}.gtf"/> + </outputs> + <help> + +.. class:: infomark + +**pyCalculateMutationFrequencies** + +pyCalculateMutationFrequencies is part of the pyCRAC_ package. Takes an interval file and a pyReadCounters GTF file and calculates (cross-linking induced) mutation frequencies fore each interval. +This tool can be used to calculate mutation frequencies for significant intervals (pyCalculateFDRs output file) or over-represented motifs (pyMotif GTF output file). +It expects a pyCRAC GTF count_output_reads.gtf file and a GTF file with the intervals. + +For example:: + + This pyCalculateFDRs GTF output file:: + + ##gff-version 2 + # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013 + # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05 + # chromosome feature source start end minimal_coverage strand . attributes + chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1"; + chrII intergenic_region exon 407669 407708 3 + . gene_id "INT_0_445"; gene_name "INT_0_445"; + chrII intergenic_region exon 585158 585195 2 + . gene_id "INT_0_562"; gene_name "INT_0_562"; + chrII protein_coding exon 372390 372433 4 - . gene_id "YBR067C"; gene_name "TIP1"; + chrII intergenic_region exon 380754 380815 6 - . gene_id "INT_0_431"; gene_name "INT_0_431"; + chrIII protein_coding exon 138001 138044 5 + . gene_id "YCR012W"; gene_name "PGK1"; + chrIII intergenic_region exon 227997 228036 5 + . gene_id "INT_0_885"; gene_name "INT_0_885"; + chrIII intergenic_region exon 227997 228037 4 + . gene_id "INT_0_887"; gene_name "INT_0_887"; + chrIII tRNA exon 227997 228037 4 + . gene_id "tS(CGA)C"; gene_name "SUP61"; + + Will be converted into:: + + ##gff-version 2 + # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013 + # /Library/Frameworks/EPD64.framework/Versions/Current/bin/pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05 + # chromosome feature source start end minimal_coverage strand . attributes + chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1"; # 203882D33.3,203883D33.3,203884D33.3; + chrII intergenic_region exon 407669 407708 3 + . gene_id "INT_0_445"; gene_name "INT_0_445"; # 407680D33.3,407681D33.3; + chrII intergenic_region exon 585158 585195 2 + . gene_id "INT_0_562"; gene_name "INT_0_562"; # 585171D100.0,585172D100.0,585173D100.0; + chrII protein_coding exon 372390 372433 4 - . gene_id "YBR067C"; gene_name "TIP1"; # 372412D50.0,372413D50.0; + chrII intergenic_region exon 380754 380815 6 - . gene_id "INT_0_431"; gene_name "INT_0_431"; # 380786D90.2,380787D90.2; + chrIII protein_coding exon 138001 138044 5 + . gene_id "YCR012W"; gene_name "PGK1"; # 138025D40.0,138026D30.0,138027D40.0; + chrIII intergenic_region exon 227997 228036 5 + . gene_id "INT_0_885"; gene_name "INT_0_885"; # 228006D85.7,228007D100.0; + chrIII intergenic_region exon 227997 228037 4 + . gene_id "INT_0_887"; gene_name "INT_0_887"; # 228006D85.7,228007D100.0; + chrIII tRNA exon 227997 228037 4 + . gene_id "tS(CGA)C"; gene_name "SUP61"; # 228006D85.7,228007D100.0; + + +The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. + +For example:: + + # 228007D100.0 + +indicates that 100% of the nucleotides in position 228007 were deleted in the interval. + +By setting the --mutsfreq flag you can set a limit for the lowest mutation frequency that you want to have reported. +This makes it relatively easy to select those significant regions that have nucleotides with high mutation frequencies. + +.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html + +------ + +**Parameter list** + +Options:: + + -i intervals.gtf, --intervaldatafile=intervals.gtf + provide the path to your GTF interval data file. + -r reads.gtf, --readdatafile=reads.gtf + provide the path to your GTF read data file. + -c yeast.txt, --chromfile=yeast.txt + Location of the chromosome info file. This file should + have two columns: first column is the names of the + chromosomes, second column is length of the + chromosomes. Default is yeast + -o intervals_with_muts.gtf, --output_file=intervals_with_muts.gtf + provide a name for an output file. By default it + writes to the standard output + --mutsfreq=10, --mutationfrequency=10 + sets the minimal mutations frequency for an interval + that you want to have written to our output file. + Default = 0%. Example: if the mutsfrequency is set at + 10 and an interval position has a mutated in less than + 10% of the reads,then the mutation will not be + reported. + + + </help> + </tool> \ No newline at end of file