view pyCRAC/pyCalculateMutationFrequencies.xml @ 1:7c9574213c0a draft default tip

Uploaded
author swebb
date Thu, 20 Jun 2013 12:13:43 -0400
parents 19b20927172d
children
line wrap: on
line source

<tool id ="pyCalculateMutationFrequencies" name="pyCalculateMutationFrequencies">
	<requirements>
        	<requirement type="package">pyCRAC</requirement>
    	</requirements>
	<command interpreter="python"> 
	/usr/local/bin/pyCalculateMutationFrequencies.py
	-r $readdatafile
	-i $intervaldatafile
	-c $addChr.chr
	-o $output
	--mutsfreq $mutsfreq
	</command>
	<version_command>/usr/local/bin/pyCalculateMutationFrequencies.py --version</version_command>
	<inputs>
	      <param format="gff" name="readdatafile" type="data" label="GFF Reads File --readdatafile" help="GFF file containing read data" />
	      <param format="gtf" name="intervaldatafile" type="data" label="GFF Interval File --intervaldatafile" help="GFF file containing interval co-ordinates"/>	      
	      <conditional name="addChr">
		<param name="chrfile" type="select"  label="Choose Chromosome length file from">
		  <option value="default" selected="true">Defaults</option>
		  <option value="other">History</option>
		</param>
		<when value="default">
		  <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes.Use pyCrac utility pyCalculateChromosomeLengths to create.">
		    <options from_data_table="pycrac_chr"/>
		  </param>
		</when>
		<when value="other">
		  <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"/>
		</when>
	      </conditional>

	      <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency --mutsfreq " value="0" size="10" help="sets the minimal mutations frequency for an interval that you want to have written to our output file">
		<validator type="in_range" min="0" message="Please enter a value >= 0"/>
	      </param>
        <param name="label" type="text" format="txt" size="30" value="pyCalculateMutationFrequencies" label="Enter output file label -o" />
	</inputs>
	<outputs>
		<data format="gtf" name="output" label="${label.value}.gtf"/>
	</outputs>
	<help>

.. class:: infomark

**pyCalculateMutationFrequencies**

pyCalculateMutationFrequencies is part of the pyCRAC_ package. Takes an interval file and a pyReadCounters GTF file and calculates (cross-linking induced) mutation frequencies fore each interval.
This tool can be used to calculate mutation frequencies for significant intervals (pyCalculateFDRs output file) or over-represented motifs (pyMotif GTF output file).
It expects a pyCRAC GTF count_output_reads.gtf file and a GTF file with the intervals.

For example::

    This pyCalculateFDRs GTF output file::

        ##gff-version 2
        # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
        # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
        # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
        chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; 
        chrII	intergenic_region	exon	407669	407708	3	+	.	gene_id "INT_0_445"; gene_name "INT_0_445"; 
        chrII	intergenic_region	exon	585158	585195	2	+	.	gene_id "INT_0_562"; gene_name "INT_0_562"; 
        chrII	protein_coding	exon	372390	372433	4	-	.	gene_id "YBR067C"; gene_name "TIP1"; 
        chrII	intergenic_region	exon	380754	380815	6	-	.	gene_id "INT_0_431"; gene_name "INT_0_431"; 
        chrIII	protein_coding	exon	138001	138044	5	+	.	gene_id "YCR012W"; gene_name "PGK1"; 
        chrIII	intergenic_region	exon	227997	228036	5	+	.	gene_id "INT_0_885"; gene_name "INT_0_885";
        chrIII	intergenic_region	exon	227997	228037	4	+	.	gene_id "INT_0_887"; gene_name "INT_0_887";
        chrIII	tRNA	exon	227997	228037	4	+	.	gene_id "tS(CGA)C"; gene_name "SUP61";
    
    Will be converted into::
        
        ##gff-version 2
        # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
        # /Library/Frameworks/EPD64.framework/Versions/Current/bin/pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
        # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
        chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; # 203882D33.3,203883D33.3,203884D33.3;
        chrII	intergenic_region	exon	407669	407708	3	+	.	gene_id "INT_0_445"; gene_name "INT_0_445"; # 407680D33.3,407681D33.3;
        chrII	intergenic_region	exon	585158	585195	2	+	.	gene_id "INT_0_562"; gene_name "INT_0_562"; # 585171D100.0,585172D100.0,585173D100.0;
        chrII	protein_coding	exon	372390	372433	4	-	.	gene_id "YBR067C"; gene_name "TIP1"; # 372412D50.0,372413D50.0;
        chrII	intergenic_region	exon	380754	380815	6	-	.	gene_id "INT_0_431"; gene_name "INT_0_431"; # 380786D90.2,380787D90.2;
        chrIII	protein_coding	exon	138001	138044	5	+	.	gene_id "YCR012W"; gene_name "PGK1"; # 138025D40.0,138026D30.0,138027D40.0;
        chrIII	intergenic_region	exon	227997	228036	5	+	.	gene_id "INT_0_885"; gene_name "INT_0_885"; # 228006D85.7,228007D100.0;
        chrIII	intergenic_region	exon	227997	228037	4	+	.	gene_id "INT_0_887"; gene_name "INT_0_887"; # 228006D85.7,228007D100.0;
        chrIII	tRNA	exon	227997	228037	4	+	.	gene_id "tS(CGA)C"; gene_name "SUP61"; # 228006D85.7,228007D100.0;
        

The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. 

For example::
    
    # 228007D100.0

indicates that 100% of the nucleotides in position 228007 were deleted in the interval.

By setting the --mutsfreq flag you can set a limit for the lowest mutation frequency that you want to have reported. 
This makes it relatively easy to select those significant regions that have nucleotides with high mutation frequencies.

.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
        
------

**Parameter list**

Options::

  -i intervals.gtf, --intervaldatafile=intervals.gtf
                        provide the path to your GTF interval data file.
  -r reads.gtf, --readdatafile=reads.gtf
                        provide the path to your GTF read data file.
  -c yeast.txt, --chromfile=yeast.txt
                        Location of the chromosome info file. This file should
                        have two columns: first column is the names of the
                        chromosomes, second column is length of the
                        chromosomes. Default is yeast
  -o intervals_with_muts.gtf, --output_file=intervals_with_muts.gtf
                        provide a name for an output file. By default it
                        writes to the standard output
  --mutsfreq=10, --mutationfrequency=10
                        sets the minimal mutations frequency for an interval
                        that you want to have written to our output file.
                        Default = 0%. Example: if the mutsfrequency is set at
                        10 and an interval position has a mutated in less than
                        10% of the reads,then the mutation will not be
                        reported.


	</help>
 </tool>