diff pyCRAC/pyCalculateMutationFrequencies.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCalculateMutationFrequencies.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,126 @@
+<tool id ="pyCalculateMutationFrequencies" name="pyCalculateMutationFrequencies">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyCalculateMutationFrequencies.py
+	-r $readdatafile
+	-i $intervaldatafile
+	-c $addChr.chr
+	-o $output
+	--mutsfreq $mutsfreq
+	</command>
+	<version_command>/usr/local/bin/pyCalculateMutationFrequencies.py --version</version_command>
+	<inputs>
+	      <param format="gff" name="readdatafile" type="data" label="GFF Reads File --readdatafile" help="GFF file containing read data" />
+	      <param format="gtf" name="intervaldatafile" type="data" label="GFF Interval File --intervaldatafile" help="GFF file containing interval co-ordinates"/>	      
+	      <conditional name="addChr">
+		<param name="chrfile" type="select"  label="Choose Chromosome length file from">
+		  <option value="default" selected="true">Defaults</option>
+		  <option value="other">History</option>
+		</param>
+		<when value="default">
+		  <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes.Use pyCrac utility pyCalculateChromosomeLengths to create.">
+		    <options from_data_table="pycrac_chr"/>
+		  </param>
+		</when>
+		<when value="other">
+		  <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"/>
+		</when>
+	      </conditional>
+
+	      <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency --mutsfreq " value="0" size="10" help="sets the minimal mutations frequency for an interval that you want to have written to our output file">
+		<validator type="in_range" min="0" message="Please enter a value >= 0"/>
+	      </param>
+        <param name="label" type="text" format="txt" size="30" value="pyCalculateMutationFrequencies" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="gtf" name="output" label="${label.value}.gtf"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyCalculateMutationFrequencies**
+
+pyCalculateMutationFrequencies is part of the pyCRAC_ package. Takes an interval file and a pyReadCounters GTF file and calculates (cross-linking induced) mutation frequencies fore each interval.
+This tool can be used to calculate mutation frequencies for significant intervals (pyCalculateFDRs output file) or over-represented motifs (pyMotif GTF output file).
+It expects a pyCRAC GTF count_output_reads.gtf file and a GTF file with the intervals.
+
+For example::
+
+    This pyCalculateFDRs GTF output file::
+
+        ##gff-version 2
+        # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
+        # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
+        # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
+        chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; 
+        chrII	intergenic_region	exon	407669	407708	3	+	.	gene_id "INT_0_445"; gene_name "INT_0_445"; 
+        chrII	intergenic_region	exon	585158	585195	2	+	.	gene_id "INT_0_562"; gene_name "INT_0_562"; 
+        chrII	protein_coding	exon	372390	372433	4	-	.	gene_id "YBR067C"; gene_name "TIP1"; 
+        chrII	intergenic_region	exon	380754	380815	6	-	.	gene_id "INT_0_431"; gene_name "INT_0_431"; 
+        chrIII	protein_coding	exon	138001	138044	5	+	.	gene_id "YCR012W"; gene_name "PGK1"; 
+        chrIII	intergenic_region	exon	227997	228036	5	+	.	gene_id "INT_0_885"; gene_name "INT_0_885";
+        chrIII	intergenic_region	exon	227997	228037	4	+	.	gene_id "INT_0_887"; gene_name "INT_0_887";
+        chrIII	tRNA	exon	227997	228037	4	+	.	gene_id "tS(CGA)C"; gene_name "SUP61";
+    
+    Will be converted into::
+        
+        ##gff-version 2
+        # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
+        # /Library/Frameworks/EPD64.framework/Versions/Current/bin/pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
+        # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
+        chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; # 203882D33.3,203883D33.3,203884D33.3;
+        chrII	intergenic_region	exon	407669	407708	3	+	.	gene_id "INT_0_445"; gene_name "INT_0_445"; # 407680D33.3,407681D33.3;
+        chrII	intergenic_region	exon	585158	585195	2	+	.	gene_id "INT_0_562"; gene_name "INT_0_562"; # 585171D100.0,585172D100.0,585173D100.0;
+        chrII	protein_coding	exon	372390	372433	4	-	.	gene_id "YBR067C"; gene_name "TIP1"; # 372412D50.0,372413D50.0;
+        chrII	intergenic_region	exon	380754	380815	6	-	.	gene_id "INT_0_431"; gene_name "INT_0_431"; # 380786D90.2,380787D90.2;
+        chrIII	protein_coding	exon	138001	138044	5	+	.	gene_id "YCR012W"; gene_name "PGK1"; # 138025D40.0,138026D30.0,138027D40.0;
+        chrIII	intergenic_region	exon	227997	228036	5	+	.	gene_id "INT_0_885"; gene_name "INT_0_885"; # 228006D85.7,228007D100.0;
+        chrIII	intergenic_region	exon	227997	228037	4	+	.	gene_id "INT_0_887"; gene_name "INT_0_887"; # 228006D85.7,228007D100.0;
+        chrIII	tRNA	exon	227997	228037	4	+	.	gene_id "tS(CGA)C"; gene_name "SUP61"; # 228006D85.7,228007D100.0;
+        
+
+The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. 
+
+For example::
+    
+    # 228007D100.0
+
+indicates that 100% of the nucleotides in position 228007 were deleted in the interval.
+
+By setting the --mutsfreq flag you can set a limit for the lowest mutation frequency that you want to have reported. 
+This makes it relatively easy to select those significant regions that have nucleotides with high mutation frequencies.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  -i intervals.gtf, --intervaldatafile=intervals.gtf
+                        provide the path to your GTF interval data file.
+  -r reads.gtf, --readdatafile=reads.gtf
+                        provide the path to your GTF read data file.
+  -c yeast.txt, --chromfile=yeast.txt
+                        Location of the chromosome info file. This file should
+                        have two columns: first column is the names of the
+                        chromosomes, second column is length of the
+                        chromosomes. Default is yeast
+  -o intervals_with_muts.gtf, --output_file=intervals_with_muts.gtf
+                        provide a name for an output file. By default it
+                        writes to the standard output
+  --mutsfreq=10, --mutationfrequency=10
+                        sets the minimal mutations frequency for an interval
+                        that you want to have written to our output file.
+                        Default = 0%. Example: if the mutsfrequency is set at
+                        10 and an interval position has a mutated in less than
+                        10% of the reads,then the mutation will not be
+                        reported.
+
+
+	</help>
+ </tool>
\ No newline at end of file