| 
6
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 from optparse import OptionParser
 | 
| 
 | 
     4 from commons.core.parsing.FastaParser import FastaParser
 | 
| 
 | 
     5 from commons.core.writer.Gff3Writer import Gff3Writer
 | 
| 
 | 
     6 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
 | 
| 
 | 
     7 from SMART.Java.Python.misc.Progress import Progress
 | 
| 
 | 
     8 from commons.core.utils.RepetOptionParser import RepetOptionParser
 | 
| 
 | 
     9 from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 class CountReadGCPercent(object):
 | 
| 
 | 
    13     
 | 
| 
 | 
    14     def __init__(self):
 | 
| 
 | 
    15         self.referenceReader = None
 | 
| 
 | 
    16         self.gffReader = None
 | 
| 
 | 
    17         self.outputWriter = None
 | 
| 
 | 
    18         self.verbose = 0
 | 
| 
 | 
    19         
 | 
| 
 | 
    20     def setInputReferenceFile(self, fileName):
 | 
| 
 | 
    21         self.referenceReader = fileName
 | 
| 
 | 
    22  
 | 
| 
 | 
    23     def setInputGffFile(self, fileName):
 | 
| 
 | 
    24         self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose)
 | 
| 
 | 
    25         
 | 
| 
 | 
    26     def setOutputFileName(self, fileName):
 | 
| 
 | 
    27         self.outputWriter = Gff3Writer(fileName, self.verbose)
 | 
| 
 | 
    28 
 | 
| 
 | 
    29     def readGffAnnotation(self):
 | 
| 
 | 
    30         self.coveredRegions = {}
 | 
| 
 | 
    31         progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose)
 | 
| 
 | 
    32         for transcript in self.gffReader.getIterator():
 | 
| 
 | 
    33             chromosome = transcript.getChromosome()
 | 
| 
 | 
    34             if chromosome not in self.coveredRegions:
 | 
| 
 | 
    35                 self.coveredRegions[chromosome] = {}
 | 
| 
 | 
    36             for exon in transcript.getExons():
 | 
| 
 | 
    37                 for position in range(exon.getStart(), exon.getEnd()+1):
 | 
| 
 | 
    38                     self.coveredRegions[chromosome][position] = 1
 | 
| 
 | 
    39             progress.inc()
 | 
| 
 | 
    40         progress.done()
 | 
| 
 | 
    41         
 | 
| 
 | 
    42     def write(self):
 | 
| 
 | 
    43         iParser = FastaParser(self.referenceReader)
 | 
| 
 | 
    44         iParser.setTags()
 | 
| 
 | 
    45         iGetGCPercentBySW = CountGCPercentBySlidingWindow()
 | 
| 
 | 
    46         progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose)
 | 
| 
 | 
    47         for transcript in self.gffReader.getIterator():
 | 
| 
 | 
    48             chromosome = transcript.getChromosome()
 | 
| 
 | 
    49             GCpercent = 0
 | 
| 
 | 
    50             nPercent = 0
 | 
| 
 | 
    51             for exon in transcript.getExons():
 | 
| 
 | 
    52                     for sequenceName in iParser.getTags().keys():
 | 
| 
 | 
    53                         if sequenceName != chromosome:
 | 
| 
 | 
    54                             continue
 | 
| 
 | 
    55                         else:
 | 
| 
 | 
    56                             subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1)
 | 
| 
 | 
    57                             GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence)
 | 
| 
 | 
    58                             print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent)
 | 
| 
 | 
    59             transcript.setTagValue("GCpercent", GCpercent)
 | 
| 
 | 
    60             transcript.setTagValue("NPercent", nPercent)
 | 
| 
 | 
    61             self.outputWriter.addTranscript(transcript)
 | 
| 
 | 
    62             progress.inc()
 | 
| 
 | 
    63         progress.done()
 | 
| 
 | 
    64  
 | 
| 
 | 
    65     def run(self):
 | 
| 
 | 
    66         self.readGffAnnotation()
 | 
| 
 | 
    67         if self.outputWriter != None:
 | 
| 
 | 
    68             self.write()
 | 
| 
 | 
    69             
 | 
| 
 | 
    70 if __name__ == "__main__":
 | 
| 
 | 
    71         description = "Count GC percent for each read against a genome."
 | 
| 
 | 
    72         usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]"
 | 
| 
 | 
    73         examples = "\nExample: \n"
 | 
| 
 | 
    74         examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3"
 | 
| 
 | 
    75         examples += "\n\n"
 | 
| 
 | 
    76         parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples)
 | 
| 
 | 
    77         parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None )
 | 
| 
 | 
    78         parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None)
 | 
| 
 | 
    79         parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None )
 | 
| 
 | 
    80         parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 )
 | 
| 
 | 
    81         (options, args) = parser.parse_args()
 | 
| 
 | 
    82     
 | 
| 
 | 
    83         readGCPercent = CountReadGCPercent()
 | 
| 
 | 
    84         readGCPercent.setInputReferenceFile(options.fastaFile)
 | 
| 
 | 
    85         readGCPercent.setInputGffFile(options.gffFile)
 | 
| 
 | 
    86         readGCPercent.setOutputFileName(options.outputFile)
 | 
| 
 | 
    87         readGCPercent.run()
 | 
| 
 | 
    88          |