Mercurial > repos > yufei-luo > s_mart
comparison smart_toolShed/SMART/Java/Python/CountReadGCPercent.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
| author | yufei-luo |
|---|---|
| date | Thu, 17 Jan 2013 10:52:14 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e0f8dcca02ed |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 from optparse import OptionParser | |
| 4 from commons.core.parsing.FastaParser import FastaParser | |
| 5 from commons.core.writer.Gff3Writer import Gff3Writer | |
| 6 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer | |
| 7 from SMART.Java.Python.misc.Progress import Progress | |
| 8 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
| 9 from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow | |
| 10 | |
| 11 | |
| 12 class CountReadGCPercent(object): | |
| 13 | |
| 14 def __init__(self): | |
| 15 self.referenceReader = None | |
| 16 self.gffReader = None | |
| 17 self.outputWriter = None | |
| 18 self.verbose = 0 | |
| 19 | |
| 20 def setInputReferenceFile(self, fileName): | |
| 21 self.referenceReader = fileName | |
| 22 | |
| 23 def setInputGffFile(self, fileName): | |
| 24 self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose) | |
| 25 | |
| 26 def setOutputFileName(self, fileName): | |
| 27 self.outputWriter = Gff3Writer(fileName, self.verbose) | |
| 28 | |
| 29 def readGffAnnotation(self): | |
| 30 self.coveredRegions = {} | |
| 31 progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose) | |
| 32 for transcript in self.gffReader.getIterator(): | |
| 33 chromosome = transcript.getChromosome() | |
| 34 if chromosome not in self.coveredRegions: | |
| 35 self.coveredRegions[chromosome] = {} | |
| 36 for exon in transcript.getExons(): | |
| 37 for position in range(exon.getStart(), exon.getEnd()+1): | |
| 38 self.coveredRegions[chromosome][position] = 1 | |
| 39 progress.inc() | |
| 40 progress.done() | |
| 41 | |
| 42 def write(self): | |
| 43 iParser = FastaParser(self.referenceReader) | |
| 44 iParser.setTags() | |
| 45 iGetGCPercentBySW = CountGCPercentBySlidingWindow() | |
| 46 progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose) | |
| 47 for transcript in self.gffReader.getIterator(): | |
| 48 chromosome = transcript.getChromosome() | |
| 49 GCpercent = 0 | |
| 50 nPercent = 0 | |
| 51 for exon in transcript.getExons(): | |
| 52 for sequenceName in iParser.getTags().keys(): | |
| 53 if sequenceName != chromosome: | |
| 54 continue | |
| 55 else: | |
| 56 subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1) | |
| 57 GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence) | |
| 58 print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent) | |
| 59 transcript.setTagValue("GCpercent", GCpercent) | |
| 60 transcript.setTagValue("NPercent", nPercent) | |
| 61 self.outputWriter.addTranscript(transcript) | |
| 62 progress.inc() | |
| 63 progress.done() | |
| 64 | |
| 65 def run(self): | |
| 66 self.readGffAnnotation() | |
| 67 if self.outputWriter != None: | |
| 68 self.write() | |
| 69 | |
| 70 if __name__ == "__main__": | |
| 71 description = "Count GC percent for each read against a genome." | |
| 72 usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]" | |
| 73 examples = "\nExample: \n" | |
| 74 examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3" | |
| 75 examples += "\n\n" | |
| 76 parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples) | |
| 77 parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None ) | |
| 78 parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None) | |
| 79 parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None ) | |
| 80 parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 ) | |
| 81 (options, args) = parser.parse_args() | |
| 82 | |
| 83 readGCPercent = CountReadGCPercent() | |
| 84 readGCPercent.setInputReferenceFile(options.fastaFile) | |
| 85 readGCPercent.setInputGffFile(options.gffFile) | |
| 86 readGCPercent.setOutputFileName(options.outputFile) | |
| 87 readGCPercent.run() | |
| 88 |
