comparison pyCRAC/pyCalculateMutationFrequencies.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:19b20927172d
1 <tool id ="pyCalculateMutationFrequencies" name="pyCalculateMutationFrequencies">
2 <requirements>
3 <requirement type="package">pyCRAC</requirement>
4 </requirements>
5 <command interpreter="python">
6 /usr/local/bin/pyCalculateMutationFrequencies.py
7 -r $readdatafile
8 -i $intervaldatafile
9 -c $addChr.chr
10 -o $output
11 --mutsfreq $mutsfreq
12 </command>
13 <version_command>/usr/local/bin/pyCalculateMutationFrequencies.py --version</version_command>
14 <inputs>
15 <param format="gff" name="readdatafile" type="data" label="GFF Reads File --readdatafile" help="GFF file containing read data" />
16 <param format="gtf" name="intervaldatafile" type="data" label="GFF Interval File --intervaldatafile" help="GFF file containing interval co-ordinates"/>
17 <conditional name="addChr">
18 <param name="chrfile" type="select" label="Choose Chromosome length file from">
19 <option value="default" selected="true">Defaults</option>
20 <option value="other">History</option>
21 </param>
22 <when value="default">
23 <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes.Use pyCrac utility pyCalculateChromosomeLengths to create.">
24 <options from_data_table="pycrac_chr"/>
25 </param>
26 </when>
27 <when value="other">
28 <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"/>
29 </when>
30 </conditional>
31
32 <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency --mutsfreq " value="0" size="10" help="sets the minimal mutations frequency for an interval that you want to have written to our output file">
33 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
34 </param>
35 <param name="label" type="text" format="txt" size="30" value="pyCalculateMutationFrequencies" label="Enter output file label -o" />
36 </inputs>
37 <outputs>
38 <data format="gtf" name="output" label="${label.value}.gtf"/>
39 </outputs>
40 <help>
41
42 .. class:: infomark
43
44 **pyCalculateMutationFrequencies**
45
46 pyCalculateMutationFrequencies is part of the pyCRAC_ package. Takes an interval file and a pyReadCounters GTF file and calculates (cross-linking induced) mutation frequencies fore each interval.
47 This tool can be used to calculate mutation frequencies for significant intervals (pyCalculateFDRs output file) or over-represented motifs (pyMotif GTF output file).
48 It expects a pyCRAC GTF count_output_reads.gtf file and a GTF file with the intervals.
49
50 For example::
51
52 This pyCalculateFDRs GTF output file::
53
54 ##gff-version 2
55 # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013
56 # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
57 # chromosome feature source start end minimal_coverage strand . attributes
58 chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1";
59 chrII intergenic_region exon 407669 407708 3 + . gene_id "INT_0_445"; gene_name "INT_0_445";
60 chrII intergenic_region exon 585158 585195 2 + . gene_id "INT_0_562"; gene_name "INT_0_562";
61 chrII protein_coding exon 372390 372433 4 - . gene_id "YBR067C"; gene_name "TIP1";
62 chrII intergenic_region exon 380754 380815 6 - . gene_id "INT_0_431"; gene_name "INT_0_431";
63 chrIII protein_coding exon 138001 138044 5 + . gene_id "YCR012W"; gene_name "PGK1";
64 chrIII intergenic_region exon 227997 228036 5 + . gene_id "INT_0_885"; gene_name "INT_0_885";
65 chrIII intergenic_region exon 227997 228037 4 + . gene_id "INT_0_887"; gene_name "INT_0_887";
66 chrIII tRNA exon 227997 228037 4 + . gene_id "tS(CGA)C"; gene_name "SUP61";
67
68 Will be converted into::
69
70 ##gff-version 2
71 # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013
72 # /Library/Frameworks/EPD64.framework/Versions/Current/bin/pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
73 # chromosome feature source start end minimal_coverage strand . attributes
74 chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1"; # 203882D33.3,203883D33.3,203884D33.3;
75 chrII intergenic_region exon 407669 407708 3 + . gene_id "INT_0_445"; gene_name "INT_0_445"; # 407680D33.3,407681D33.3;
76 chrII intergenic_region exon 585158 585195 2 + . gene_id "INT_0_562"; gene_name "INT_0_562"; # 585171D100.0,585172D100.0,585173D100.0;
77 chrII protein_coding exon 372390 372433 4 - . gene_id "YBR067C"; gene_name "TIP1"; # 372412D50.0,372413D50.0;
78 chrII intergenic_region exon 380754 380815 6 - . gene_id "INT_0_431"; gene_name "INT_0_431"; # 380786D90.2,380787D90.2;
79 chrIII protein_coding exon 138001 138044 5 + . gene_id "YCR012W"; gene_name "PGK1"; # 138025D40.0,138026D30.0,138027D40.0;
80 chrIII intergenic_region exon 227997 228036 5 + . gene_id "INT_0_885"; gene_name "INT_0_885"; # 228006D85.7,228007D100.0;
81 chrIII intergenic_region exon 227997 228037 4 + . gene_id "INT_0_887"; gene_name "INT_0_887"; # 228006D85.7,228007D100.0;
82 chrIII tRNA exon 227997 228037 4 + . gene_id "tS(CGA)C"; gene_name "SUP61"; # 228006D85.7,228007D100.0;
83
84
85 The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies.
86
87 For example::
88
89 # 228007D100.0
90
91 indicates that 100% of the nucleotides in position 228007 were deleted in the interval.
92
93 By setting the --mutsfreq flag you can set a limit for the lowest mutation frequency that you want to have reported.
94 This makes it relatively easy to select those significant regions that have nucleotides with high mutation frequencies.
95
96 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
97
98 ------
99
100 **Parameter list**
101
102 Options::
103
104 -i intervals.gtf, --intervaldatafile=intervals.gtf
105 provide the path to your GTF interval data file.
106 -r reads.gtf, --readdatafile=reads.gtf
107 provide the path to your GTF read data file.
108 -c yeast.txt, --chromfile=yeast.txt
109 Location of the chromosome info file. This file should
110 have two columns: first column is the names of the
111 chromosomes, second column is length of the
112 chromosomes. Default is yeast
113 -o intervals_with_muts.gtf, --output_file=intervals_with_muts.gtf
114 provide a name for an output file. By default it
115 writes to the standard output
116 --mutsfreq=10, --mutationfrequency=10
117 sets the minimal mutations frequency for an interval
118 that you want to have written to our output file.
119 Default = 0%. Example: if the mutsfrequency is set at
120 10 and an interval position has a mutated in less than
121 10% of the reads,then the mutation will not be
122 reported.
123
124
125 </help>
126 </tool>