annotate SMART/Java/Python/CountReadGCPercent.py @ 69:1473ab954708 draft

Corrected bug in "CollapsedReads" XML file.
author m-zytnicki
date Wed, 18 Nov 2015 10:59:02 -0500
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 #!/usr/bin/env python
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 from optparse import OptionParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 from commons.core.parsing.FastaParser import FastaParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 from commons.core.writer.Gff3Writer import Gff3Writer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 from SMART.Java.Python.misc.Progress import Progress
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 from commons.core.utils.RepetOptionParser import RepetOptionParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 class CountReadGCPercent(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 def __init__(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 self.referenceReader = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 self.gffReader = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 self.outputWriter = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 self.verbose = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 def setInputReferenceFile(self, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 self.referenceReader = fileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 def setInputGffFile(self, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 def setOutputFileName(self, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 self.outputWriter = Gff3Writer(fileName, self.verbose)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 def readGffAnnotation(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 self.coveredRegions = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 for transcript in self.gffReader.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 chromosome = transcript.getChromosome()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 if chromosome not in self.coveredRegions:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 self.coveredRegions[chromosome] = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 for exon in transcript.getExons():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 for position in range(exon.getStart(), exon.getEnd()+1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 self.coveredRegions[chromosome][position] = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 def write(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 iParser = FastaParser(self.referenceReader)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 iParser.setTags()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 iGetGCPercentBySW = CountGCPercentBySlidingWindow()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 for transcript in self.gffReader.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 chromosome = transcript.getChromosome()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 GCpercent = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 nPercent = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 for exon in transcript.getExons():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 for sequenceName in iParser.getTags().keys():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 if sequenceName != chromosome:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 transcript.setTagValue("GCpercent", GCpercent)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 transcript.setTagValue("NPercent", nPercent)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 self.outputWriter.addTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 def run(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 self.readGffAnnotation()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 if self.outputWriter != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 self.write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 if __name__ == "__main__":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 description = "Count GC percent for each read against a genome."
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 examples = "\nExample: \n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 examples += "\n\n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 (options, args) = parser.parse_args()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 readGCPercent = CountReadGCPercent()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 readGCPercent.setInputReferenceFile(options.fastaFile)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 readGCPercent.setInputGffFile(options.gffFile)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 readGCPercent.setOutputFileName(options.outputFile)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 readGCPercent.run()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88