Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/plotCoverage.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
26 # same conditions as regards security. | 26 # same conditions as regards security. |
27 # | 27 # |
28 # The fact that you are presently reading this means that you have had | 28 # The fact that you are presently reading this means that you have had |
29 # knowledge of the CeCILL license and that you accept its terms. | 29 # knowledge of the CeCILL license and that you accept its terms. |
30 # | 30 # |
31 import os, subprocess, glob, random | 31 import os, os.path, subprocess, glob, random |
32 from optparse import OptionParser | 32 from optparse import OptionParser |
33 from SMART.Java.Python.structure.Interval import Interval | 33 from SMART.Java.Python.structure.Interval import Interval |
34 from SMART.Java.Python.structure.Transcript import Transcript | 34 from SMART.Java.Python.structure.Transcript import Transcript |
35 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer | 35 from commons.core.parsing.ParserChooser import ParserChooser |
36 from SMART.Java.Python.misc.RPlotter import RPlotter | 36 from SMART.Java.Python.misc.RPlotter import RPlotter |
37 from SMART.Java.Python.misc.Progress import Progress | 37 from SMART.Java.Python.misc.Progress import Progress |
38 from commons.core.parsing.FastaParser import FastaParser | 38 from commons.core.parsing.FastaParser import FastaParser |
39 | 39 |
40 strands = [-1, 1] | 40 strands = [-1, 1] |
41 colors = {-1: "blue", 1: "red", 0: "black"} | 41 colors = {-1: "blue", 1: "red", 0: "black"} |
42 colorLine = "black" | 42 colorLine = "black" |
43 | 43 |
44 def parseTargetField(field): | 44 def parseTargetField(field): |
45 strand = "+" | 45 strand = "+" |
46 splittedFieldSpace = field.split() | 46 splittedFieldSpace = field.split() |
47 splittedFieldPlus = field.split("+", 4) | 47 splittedFieldPlus = field.split("+", 4) |
48 if len(splittedFieldSpace) == 3: | 48 if len(splittedFieldSpace) == 3: |
49 id, start, end = splittedFieldSpace | 49 id, start, end = splittedFieldSpace |
50 elif len(splittedFieldSpace) == 4: | 50 elif len(splittedFieldSpace) == 4: |
51 id, start, end, strand = splittedFieldSpace | 51 id, start, end, strand = splittedFieldSpace |
52 elif len(splittedFieldPlus) == 3: | 52 elif len(splittedFieldPlus) == 3: |
53 id, start, end = splittedFieldPlus | 53 id, start, end = splittedFieldPlus |
54 elif len(splittedFieldPlus) == 4: | 54 elif len(splittedFieldPlus) == 4: |
55 id, start, end, strand = splittedFieldPlus | 55 id, start, end, strand = splittedFieldPlus |
56 else: | 56 else: |
57 raise Exception("Cannot parse Target field '%s'." % (field)) | 57 raise Exception("Cannot parse Target field '%s'." % (field)) |
58 return (id, int(start), int(end), strand) | 58 return (id, int(start), int(end), strand) |
59 | 59 |
60 | 60 |
61 class SimpleTranscript(object): | 61 class SimpleTranscript(object): |
62 def __init__(self, transcript1, transcript2, color = None): | 62 def __init__(self, transcript1, transcript2, color = None): |
63 self.start = max(0, transcript1.getStart() - transcript2.getStart()) | 63 self.start = max(0, transcript1.getStart() - transcript2.getStart()) |
64 self.end = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart()) | 64 self.end = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart()) |
65 self.strand = transcript1.getDirection() * transcript2.getDirection() | 65 self.strand = transcript1.getDirection() * transcript2.getDirection() |
66 self.exons = [] | 66 self.exons = [] |
67 for exon in transcript1.getExons(): | 67 for exon in transcript1.getExons(): |
68 if exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd(): | 68 if exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd(): |
69 start = max(0, exon.getStart() - transcript2.getStart()) | 69 start = max(0, exon.getStart() - transcript2.getStart()) |
70 end = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart()) | 70 end = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart()) |
71 self.addExon(start, end, self.strand, color) | 71 self.addExon(start, end, self.strand, color) |
72 | 72 |
73 def addExon(self, start, end, strand, color): | 73 def addExon(self, start, end, strand, color): |
74 exon = SimpleExon(start, end, strand, color) | 74 exon = SimpleExon(start, end, strand, color) |
75 self.exons.append(exon) | 75 self.exons.append(exon) |
76 | 76 |
77 def getRScript(self, yOffset, height): | 77 def getRScript(self, yOffset, height): |
78 rString = "" | 78 rString = "" |
79 previousEnd = None | 79 previousEnd = None |
80 for exon in sorted(self.exons, key=lambda exon: exon.start): | 80 for exon in sorted(self.exons, key=lambda exon: exon.start): |
81 if previousEnd != None: | 81 if previousEnd != None: |
82 rString += "segments(%.1f, %.1f, %.1f, %.1f, col = \"%s\")\n" % (previousEnd, yOffset + height / 4.0, exon.start, yOffset + height / 4.0, colorLine) | 82 rString += "segments(%.1f, %.1f, %.1f, %.1f, col = \"%s\")\n" % (previousEnd, yOffset + height / 4.0, exon.start, yOffset + height / 4.0, colorLine) |
83 rString += exon.getRScript(yOffset, height) | 83 rString += exon.getRScript(yOffset, height) |
84 previousEnd = exon.end | 84 previousEnd = exon.end |
85 return rString | 85 return rString |
86 | 86 |
87 | 87 |
88 class SimpleExon(object): | 88 class SimpleExon(object): |
89 def __init__(self, start, end, strand, color = None): | 89 def __init__(self, start, end, strand, color = None): |
90 self.start = start | 90 self.start = start |
91 self.end = end | 91 self.end = end |
92 self.strand = strand | 92 self.strand = strand |
93 self.color = color | 93 self.color = color |
94 | 94 |
95 def getRScript(self, yOffset, height): | 95 def getRScript(self, yOffset, height): |
96 color = self.color if self.color != None else colors[self.strand] | 96 color = self.color if self.color != None else colors[self.strand] |
97 return "rect(%.1f, %.1f, %.1f, %.1f, col=\"%s\", border = \"%s\")\n" % (self.start, yOffset, self.end, yOffset + height / 2.0, color, colorLine) | 97 return "rect(%.1f, %.1f, %.1f, %.1f, col=\"%s\", border = \"%s\")\n" % (self.start, yOffset, self.end, yOffset + height / 2.0, color, colorLine) |
98 | 98 |
99 | 99 |
100 class Plotter(object): | 100 class Plotter(object): |
101 | 101 |
102 def __init__(self, seed, index, verbosity): | 102 def __init__(self, seed, index, verbosity): |
103 self.seed = seed | 103 self.seed = seed |
104 self.index = index | 104 self.index = index |
105 self.verbosity = verbosity | 105 self.verbosity = verbosity |
106 self.maxCoverage = 0 | 106 self.maxCoverage = 0 |
107 self.maxOverlap = 0 | 107 self.maxOverlap = 0 |
108 self.log = "" | 108 self.log = "" |
109 self.merge = False | 109 self.merge = False |
110 self.width = 1500 | 110 self.width = 1500 |
111 self.heigth = 1000 | 111 self.heigth = 1000 |
112 self.xLabel = "" | 112 self.xLabel = "" |
113 self.yLabel = "" | 113 self.yLabel = "" |
114 self.title = None | 114 self.title = None |
115 self.absPath = os.getcwd() | 115 self.absPath = os.getcwd() |
116 self.coverageDataFileName = "tmpFile_%d_%s.dat" % (seed, index) | 116 self.coverageDataFileName = "tmpFile_%d_%s.dat" % (seed, index) |
117 self.coverageScript = "" | 117 self.coverageScript = "" |
118 self.overlapScript = "" | 118 self.overlapScript = "" |
119 self.outputFileName = None | 119 self.outputFileName = None |
120 | 120 |
121 def setOutputFileName(self, fileName): | 121 def setOutputFileName(self, fileName): |
122 self.outputFileName = fileName | 122 self.outputFileName = fileName |
123 | 123 |
124 def setTranscript(self, transcript): | 124 def setTranscript(self, transcript): |
125 self.transcript = transcript | 125 self.transcript = transcript |
126 self.name = transcript.getName() | 126 self.name = transcript.getName() |
127 self.size = transcript.getEnd() - transcript.getStart() + 1 | 127 self.size = transcript.getEnd() - transcript.getStart() + 1 |
128 if self.title == None: | 128 if self.title == None: |
129 self.title = self.name | 129 self.title = self.name |
130 else: | 130 else: |
131 self.title += " " + self.name | 131 self.title += " " + self.name |
132 | 132 |
133 def setTitle(self, title): | 133 def setTitle(self, title): |
134 self.title = title + " " + self.name | 134 self.title = title + " " + self.name |
135 | 135 |
136 def setPlotSize(self, width, height): | 136 def setPlotSize(self, width, height): |
137 self.width = width | 137 self.width = width |
138 self.height = height | 138 self.height = height |
139 | 139 |
140 def setLabels(self, xLabel, yLabel): | 140 def setLabels(self, xLabel, yLabel): |
141 self.xLabel = xLabel | 141 self.xLabel = xLabel |
142 self.yLabel = yLabel | 142 self.yLabel = yLabel |
143 | 143 |
144 def setMerge(self, merge): | 144 def setMerge(self, merge): |
145 self.merge = merge | 145 self.merge = merge |
146 | 146 |
147 def setCoverageData(self, coverage): | 147 def setCoverageData(self, coverage): |
148 outputCoveragePerStrand = dict([strand, 0] for strand in strands) | 148 outputCoveragePerStrand = dict([strand, 0] for strand in strands) |
149 outputCoverage = 0 | 149 outputCoverage = 0 |
150 dataFile = open(os.path.abspath(self.coverageDataFileName), "w") | 150 dataFile = open(os.path.abspath(self.coverageDataFileName), "w") |
151 for position in range(self.size+1): | 151 for position in range(self.size+1): |
152 sumValue = 0 | 152 sumValue = 0 |
153 found = False | 153 found = False |
154 dataFile.write("%d\t" % (position)) | 154 dataFile.write("%d\t" % (position)) |
155 for strand in strands: | 155 for strand in strands: |
156 value = coverage[strand].get(position, 0) | 156 value = coverage[strand].get(position, 0) |
157 sumValue += value | 157 sumValue += value |
158 dataFile.write("%d\t" % (value)) | 158 dataFile.write("%d\t" % (value)) |
159 if value > 0: | 159 if value > 0: |
160 found = True | 160 found = True |
161 outputCoveragePerStrand[strand] += 1 | 161 outputCoveragePerStrand[strand] += 1 |
162 self.maxCoverage = max(self.maxCoverage, sumValue) | 162 self.maxCoverage = max(self.maxCoverage, sumValue) |
163 dataFile.write("%d\n" % (sumValue)) | 163 dataFile.write("%d\n" % (sumValue)) |
164 if found: | 164 if found: |
165 outputCoverage += 1 | 165 outputCoverage += 1 |
166 dataFile.close() | 166 dataFile.close() |
167 self.log += "%s (%d nt):\n - both strands: %d (%.0f%%)\n - (+) strand: %d (%.0f%%)\n - (-) strand: %d (%.0f%%)\n" % (self.name, self.size, outputCoverage, float(outputCoverage) / self.size * 100, outputCoveragePerStrand[1], float(outputCoveragePerStrand[1]) / self.size * 100, outputCoveragePerStrand[-1], float(outputCoveragePerStrand[-1]) / self.size * 100) | 167 self.log += "%s (%d nt):\n - both strands: %d (%.0f%%)\n - (+) strand: %d (%.0f%%)\n - (-) strand: %d (%.0f%%)\n" % (self.name, self.size, outputCoverage, float(outputCoverage) / self.size * 100, outputCoveragePerStrand[1], float(outputCoveragePerStrand[1]) / self.size * 100, outputCoveragePerStrand[-1], float(outputCoveragePerStrand[-1]) / self.size * 100) |
168 self.coverageScript += "data = scan(\"%s\", list(pos = -666, minus = -666, plus = -666, sumValue = -666), sep=\"\t\")\n" % (os.path.abspath(self.coverageDataFileName)) | 168 self.coverageScript += "data = scan(\"%s\", list(pos = -666, minus = -666, plus = -666, sumValue = -666), sep=\"\t\")\n" % (os.path.abspath(self.coverageDataFileName)) |
169 self.coverageScript += "lines(x = data$pos, y = data$minus, col = \"%s\")\n" % (colors[-1]) | 169 self.coverageScript += "lines(x = data$pos, y = data$minus, col = \"%s\")\n" % (colors[-1]) |
170 self.coverageScript += "lines(x = data$pos, y = data$plus, col = \"%s\")\n" % (colors[1]) | 170 self.coverageScript += "lines(x = data$pos, y = data$plus, col = \"%s\")\n" % (colors[1]) |
171 self.coverageScript += "lines(x = data$pos, y = data$sumValue, col = \"%s\")\n" % (colors[0]) | 171 self.coverageScript += "lines(x = data$pos, y = data$sumValue, col = \"%s\")\n" % (colors[0]) |
172 | 172 |
173 def setOverlapData(self, overlap): | 173 def setOverlapData(self, overlap): |
174 height = 1 | 174 height = 1 |
175 self.maxOverlap = (len(overlap) + 1) * height | 175 self.maxOverlap = (len(overlap) + 1) * height |
176 thisElement = SimpleTranscript(self.transcript, self.transcript, "black") | 176 thisElement = SimpleTranscript(self.transcript, self.transcript, "black") |
177 self.overlapScript += thisElement.getRScript(0, height) | 177 self.overlapScript += thisElement.getRScript(0, height) |
178 for cpt, transcript in enumerate(sorted(overlap, cmp=lambda c1, c2: c1.start - c2.start if c1.start != c2.start else c1.end - c2.end)): | 178 for cpt, transcript in enumerate(sorted(overlap, cmp=lambda c1, c2: c1.start - c2.start if c1.start != c2.start else c1.end - c2.end)): |
179 self.overlapScript += transcript.getRScript((cpt + 1) * height, height) | 179 self.overlapScript += transcript.getRScript((cpt + 1) * height, height) |
180 | 180 |
181 def getFirstLine(self, suffix = None): | 181 def getFirstLine(self, suffix = None): |
182 return "png(file = \"%s_%s%s.png\", width = %d, height = %d, bg = \"white\")\n" % (self.outputFileName, self.name, "" if suffix == None or self.merge else "_%s" % (suffix), self.width, self.height) | 182 return "png(file = \"%s_%s%s.png\", width = %d, height = %d, bg = \"white\")\n" % (self.outputFileName, self.name, "" if suffix == None or self.merge else "_%s" % (suffix), self.width, self.height) |
183 | 183 |
184 def getLastLine(self): | 184 def getLastLine(self): |
185 return "dev.off()\n" | 185 return "dev.off()\n" |
186 | 186 |
187 def startR(self, fileName, script): | 187 def startR(self, fileName, script): |
188 scriptFile = open(fileName, "w") | 188 scriptFile = open(fileName, "w") |
189 scriptFile.write(script) | 189 scriptFile.write(script) |
190 scriptFile.close() | 190 scriptFile.close() |
191 command = "R CMD BATCH %s" % (fileName) | 191 command = "R CMD BATCH %s" % (fileName) |
192 status = subprocess.call(command, shell=True) | 192 status = subprocess.call(command, shell=True) |
193 if status != 0: | 193 if status != 0: |
194 raise Exception("Problem with the execution of script file %s, status is: %s" % (fileName, status)) | 194 raise Exception("Problem with the execution of script file %s, status is: %s" % (fileName, status)) |
195 | 195 |
196 def plot(self): | 196 def plot(self): |
197 print "outputfileName is written in :", self.outputFileName | 197 if self.merge: |
198 if self.merge: | 198 fileName = "%s_%d_%s.R" % (self.outputFileName, self.seed, self.index) |
199 fileName = "%s_%d_%s.R" % (self.outputFileName, self.seed, self.index) | 199 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, max(self.maxCoverage, self.maxOverlap), self.title) |
200 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, max(self.maxCoverage, self.maxOverlap), self.title) | 200 script = self.getFirstLine() + plotLine + self.overlapScript + self.coverageScript + self.getLastLine() |
201 script = self.getFirstLine() + plotLine + self.overlapScript + self.coverageScript + self.getLastLine() | 201 self.startR(fileName, script) |
202 self.startR(fileName, script) | 202 else: |
203 else: | 203 fileName = "%s_%d_%s_overlap.R" % (self.outputFileName, self.seed, self.index) |
204 fileName = "%s_%d_%s_overlap.R" % (self.outputFileName, self.seed, self.index) | 204 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxOverlap, self.title) |
205 print "overlap file is written in :", fileName | 205 script = self.getFirstLine("overlap") + plotLine + self.overlapScript + self.getLastLine() |
206 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxOverlap, self.title) | 206 self.startR(fileName, script) |
207 script = self.getFirstLine("overlap") + plotLine + self.overlapScript + self.getLastLine() | 207 fileName = "%s_%d_%s_coverage.R" % (self.outputFileName, self.seed, self.index) |
208 self.startR(fileName, script) | 208 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxCoverage, self.title) |
209 fileName = "%s_%d_%s_coverage.R" % (self.outputFileName, self.seed, self.index) | 209 script = self.getFirstLine("coverage") + plotLine + self.coverageScript + self.getLastLine() |
210 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxCoverage, self.title) | 210 self.startR(fileName, script) |
211 script = self.getFirstLine("coverage") + plotLine + self.coverageScript + self.getLastLine() | |
212 self.startR(fileName, script) | |
213 | 211 |
214 | 212 |
215 class PlotParser(object): | 213 class PlotParser(object): |
216 | 214 |
217 def __init__(self, verbosity): | 215 def __init__(self, verbosity): |
218 self.verbosity = verbosity | 216 self.verbosity = verbosity |
219 self.parsers = [None, None] | 217 self.parsers = [None, None] |
220 self.sequenceParser = None | 218 self.sequenceParser = None |
221 self.seed = random.randint(0, 10000) | 219 self.seed = random.randint(0, 10000) |
222 self.title = "" | 220 self.title = "" |
223 self.merge = False | 221 self.merge = False |
224 | 222 |
225 def __del__(self): | 223 def __del__(self): |
226 for fileName in glob.glob("tmpFile_%d*.dat" % (self.seed)): | 224 for fileName in glob.glob("tmpFile_%d*.dat" % (self.seed)): |
227 os.remove(fileName) | 225 os.remove(fileName) |
228 for fileName in glob.glob("%s*.R" % (os.path.abspath(self.outputFileName))): | 226 for fileName in glob.glob("%s*.R" % (os.path.abspath(self.outputFileName))): |
229 os.remove(fileName) | 227 os.remove(fileName) |
230 for fileName in glob.glob("%s*.Rout" % (os.path.abspath(self.outputFileName))): | 228 for fileName in glob.glob("%s*.Rout" % (os.path.abspath(self.outputFileName))): |
231 os.remove(fileName) | 229 os.remove(fileName) |
232 | 230 |
233 def addInput(self, inputNb, fileName, fileFormat): | 231 def addInput(self, inputNb, fileName, fileFormat): |
234 if fileName == None: | 232 if fileName == None: |
235 return | 233 return |
236 self.parsers[inputNb] = TranscriptContainer(fileName, fileFormat, self.verbosity) | 234 chooser = ParserChooser(self.verbosity) |
237 if inputNb == 0: | 235 chooser.findFormat(fileFormat) |
238 self.parsers[1] = self.parsers[0] | 236 self.parsers[inputNb] = chooser.getParser(fileName) |
239 | 237 if inputNb == 0: |
240 def addSequence(self, fileName): | 238 self.parsers[1] = self.parsers[0] |
241 if fileName == None: | 239 |
242 return | 240 def addSequence(self, fileName): |
243 self.sequenceParser = FastaParser(fileName, self.verbosity) | 241 if fileName == None: |
244 | 242 return |
245 def setOutput(self, fileName): | 243 self.sequenceParser = FastaParser(fileName, self.verbosity) |
246 self.outputFileName = fileName | 244 |
247 | 245 def setOutput(self, fileName): |
248 def setPlotSize(self, width, height): | 246 self.outputFileName = fileName |
249 self.width = width | 247 |
250 self.height = height | 248 def setPlotSize(self, width, height): |
251 | 249 self.width = width |
252 def setLabels(self, xLabel, yLabel): | 250 self.height = height |
253 self.xLabel = xLabel | 251 |
254 self.yLabel = yLabel | 252 def setLabels(self, xLabel, yLabel): |
255 | 253 self.xLabel = xLabel |
256 def setTitle(self, title): | 254 self.yLabel = yLabel |
257 self.title = title | 255 |
258 | 256 def setTitle(self, title): |
259 def setMerge(self, merge): | 257 self.title = title |
260 self.merge = merge | 258 |
261 | 259 def setMerge(self, merge): |
262 def initializeDataFromSequences(self): | 260 self.merge = merge |
263 self.sizes = {} | 261 |
264 self.coverage = {} | 262 def initializeDataFromSequences(self): |
265 self.overlap = {} | 263 self.sizes = {} |
266 for region in self.sequenceParser.getRegions(): | 264 self.coverage = {} |
267 self.sizes[region] = self.sequenceParser.getSizeOfRegion(region) | 265 self.overlap = {} |
268 self.coverage[region] = {} | 266 for region in self.sequenceParser.getRegions(): |
269 self.overlap[region] = [] | 267 self.sizes[region] = self.sequenceParser.getSizeOfRegion(region) |
270 for strand in strands: | 268 self.coverage[region] = {} |
271 self.coverage[region][strand] = {} | 269 self.overlap[region] = [] |
272 self.coverage[region][strand][1] = 0 | 270 for strand in strands: |
273 self.coverage[region][strand][self.sizes[region]] = 0 | 271 self.coverage[region][strand] = {} |
274 | 272 self.coverage[region][strand][1] = 0 |
275 | 273 self.coverage[region][strand][self.sizes[region]] = 0 |
276 def initializeDataFromTranscripts(self): | 274 |
277 self.coverage = dict([i, None] for i in range(self.parsers[1].getNbTranscripts())) | 275 def initializeDataFromTranscripts(self): |
278 self.overlap = dict([i, None] for i in range(self.parsers[1].getNbTranscripts())) | 276 self.coverage = dict([i, None] for i in range(self.parsers[1].getNbTranscripts())) |
279 self.sizes = dict([i, 0] for i in range(self.parsers[1].getNbTranscripts())) | 277 self.overlap = dict([i, None] for i in range(self.parsers[1].getNbTranscripts())) |
280 self.parsers[0].findData() | 278 self.sizes = dict([i, 0] for i in range(self.parsers[1].getNbTranscripts())) |
281 progress = Progress(self.parsers[1].getNbTranscripts(), "Reading regions", self.verbosity) | 279 progress = Progress(self.parsers[1].getNbTranscripts(), "Reading regions", self.verbosity) |
282 for cpt, transcript in enumerate(self.parsers[1].getIterator()): | 280 for cpt, transcript in enumerate(self.parsers[1].getIterator()): |
283 self.coverage[cpt] = {} | 281 self.coverage[cpt] = {} |
284 self.overlap[cpt] = [] | 282 self.overlap[cpt] = [] |
285 for strand in strands: | 283 for strand in strands: |
286 self.coverage[cpt][strand] = {} | 284 self.coverage[cpt][strand] = {} |
287 self.coverage[cpt][strand][0] = 0 | 285 self.coverage[cpt][strand][0] = 0 |
288 self.coverage[cpt][strand][transcript.getEnd() - transcript.getStart()] = 0 | 286 self.coverage[cpt][strand][transcript.getEnd() - transcript.getStart()] = 0 |
289 for exon in transcript.getExons(): | 287 for exon in transcript.getExons(): |
290 self.sizes[cpt] += exon.getSize() | 288 self.sizes[cpt] += exon.getSize() |
291 progress.inc() | 289 progress.inc() |
292 progress.done() | 290 progress.done() |
293 | 291 |
294 def initialize(self): | 292 def initialize(self): |
295 if self.sequenceParser == None: | 293 if self.sequenceParser == None: |
296 self.initializeDataFromTranscripts() | 294 self.initializeDataFromTranscripts() |
297 else: | 295 else: |
298 self.initializeDataFromSequences() | 296 self.initializeDataFromSequences() |
299 | 297 |
300 def computeCoverage(self, transcript1, transcript2, id): | 298 def computeCoverage(self, transcript1, transcript2, id): |
301 strand = transcript1.getDirection() * transcript2.getDirection() | 299 strand = transcript1.getDirection() * transcript2.getDirection() |
302 for exon1 in transcript1.getExons(): | 300 for exon1 in transcript1.getExons(): |
303 for exon2 in transcript2.getExons(): | 301 for exon2 in transcript2.getExons(): |
304 if exon1.overlapWith(exon2): | 302 if exon1.overlapWith(exon2): |
305 for position in range(max(exon1.getStart(), exon2.getStart()), min(exon1.getEnd(), exon2.getEnd()) + 1): | 303 for position in range(max(exon1.getStart(), exon2.getStart()), min(exon1.getEnd(), exon2.getEnd()) + 1): |
306 relativePosition = position - transcript2.getStart() + 1 | 304 relativePosition = position - transcript2.getStart() + 1 |
307 self.coverage[id][strand][relativePosition] = self.coverage[id][strand].get(relativePosition, 0) + 1 | 305 self.coverage[id][strand][relativePosition] = self.coverage[id][strand].get(relativePosition, 0) + 1 |
308 | 306 |
309 def computeOverlap(self, transcript1, transcript2, id): | 307 def computeOverlap(self, transcript1, transcript2, id): |
310 simpleTranscript = SimpleTranscript(transcript1, transcript2) | 308 simpleTranscript = SimpleTranscript(transcript1, transcript2) |
311 self.overlap[id].append(simpleTranscript) | 309 self.overlap[id].append(simpleTranscript) |
312 | 310 |
313 def compute2TranscriptFiles(self): | 311 def compute2TranscriptFiles(self): |
314 progress = Progress(self.parsers[1].getNbTranscripts(), "Comparing regions", self.verbosity) | 312 progress = Progress(self.parsers[1].getNbTranscripts(), "Comparing regions", self.verbosity) |
315 for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()): | 313 for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()): |
316 for transcript1 in self.parsers[0].getIterator(): | 314 for transcript1 in self.parsers[0].getIterator(): |
317 if transcript1.overlapWithExon(transcript2): | 315 if transcript1.overlapWithExon(transcript2): |
318 self.computeCoverage(transcript1, transcript2, cpt2) | 316 self.computeCoverage(transcript1, transcript2, cpt2) |
319 self.computeOverlap(transcript1, transcript2, cpt2) | 317 self.computeOverlap(transcript1, transcript2, cpt2) |
320 progress.inc() | 318 progress.inc() |
321 progress.done() | 319 progress.done() |
322 | 320 |
323 def extractReferenceQuery(self, inputTranscript): | 321 def extractReferenceQueryMapping(self, mapping): |
324 if "Target" not in inputTranscript.getTagNames(): | 322 queryTranscript = mapping.getTranscript() |
325 raise Exception("Cannot extract Target field in line '%s'." % (inputTranscript)) | 323 referenceTranscript = Transcript() |
326 id, start, end, strand = parseTargetField(inputTranscript.getTagValue("Target")) | 324 referenceTranscript.setChromosome(queryTranscript.getChromosome()) |
327 if id not in self.sizes: | 325 referenceTranscript.setName(queryTranscript.getChromosome()) |
328 raise Exception("Target id '%s' of transcript '%s' does not correspond to anything in FASTA file." % (id, inputTranscript)) | 326 referenceTranscript.setDirection("+") |
329 referenceTranscript = Transcript() | 327 referenceTranscript.setEnd(self.sizes[queryTranscript.getChromosome()]) |
330 referenceTranscript.setChromosome(id) | 328 referenceTranscript.setStart(1) |
331 referenceTranscript.setName(id) | 329 return (referenceTranscript, queryTranscript) |
332 referenceTranscript.setDirection("+") | 330 |
333 referenceTranscript.setEnd(self.sizes[id]) | 331 def extractReferenceQuery(self, inputTranscript): |
334 referenceTranscript.setStart(1) | 332 if "Target" not in inputTranscript.getTagNames(): |
335 queryTranscript = Transcript() | 333 raise Exception("Cannot extract Target field in line '%s'." % (inputTranscript)) |
336 queryTranscript.setChromosome(id) | 334 id, start, end, strand = parseTargetField(inputTranscript.getTagValue("Target")) |
337 queryTranscript.setName(id) | 335 if id not in self.sizes: |
338 queryTranscript.setStart(start) | 336 raise Exception("Target id '%s' of transcript '%s' does not correspond to anything in FASTA file." % (id, inputTranscript)) |
339 queryTranscript.setEnd(end) | 337 referenceTranscript = Transcript() |
340 queryTranscript.setDirection(strand) | 338 referenceTranscript.setChromosome(id) |
341 if inputTranscript.getNbExons() > 1: | 339 referenceTranscript.setName(id) |
342 factor = float(end - start) / (inputTranscript.getEnd() - inputTranscript.getStart()) | 340 referenceTranscript.setDirection("+") |
343 for exon in inputTranscript.getExons(): | 341 referenceTranscript.setEnd(self.sizes[id]) |
344 newExon = Interval() | 342 referenceTranscript.setStart(1) |
345 newExon.setChromosome(id) | 343 queryTranscript = Transcript() |
346 newExon.setDirection(strand) | 344 queryTranscript.setChromosome(id) |
347 if "Target" in inputTranscript.getTagNames(): | 345 queryTranscript.setName(id) |
348 id, start, end, strand = parseTargetField(exon.getTagValue("Target")) | 346 queryTranscript.setStart(start) |
349 newExon.setStart(start) | 347 queryTranscript.setEnd(end) |
350 newExon.setEnd(end) | 348 queryTranscript.setDirection(strand) |
351 else: | 349 if inputTranscript.getNbExons() > 1: |
352 newExon.setStart(int(round((exon.getStart() - inputTranscript.getStart()) * factor)) + start) | 350 factor = float(end - start) / (inputTranscript.getEnd() - inputTranscript.getStart()) |
353 newExon.setEnd( int(round((exon.getEnd() - inputTranscript.getStart()) * factor)) + start) | 351 for exon in inputTranscript.getExons(): |
354 queryTranscript.addExon(newExon) | 352 newExon = Interval() |
355 return (referenceTranscript, queryTranscript) | 353 newExon.setChromosome(id) |
356 | 354 newExon.setDirection(strand) |
357 def compute1TranscriptFiles(self): | 355 if "Target" in inputTranscript.getTagNames(): |
358 progress = Progress(self.parsers[1].getNbTranscripts(), "Comparing regions", self.verbosity) | 356 id, start, end, strand = parseTargetField(exon.getTagValue("Target")) |
359 for transcript in self.parsers[1].getIterator(): | 357 newExon.setStart(start) |
360 referenceTranscript, queryTranscript = self.extractReferenceQuery(transcript) | 358 newExon.setEnd(end) |
361 self.computeCoverage(queryTranscript, referenceTranscript, referenceTranscript.getName()) | 359 else: |
362 self.computeOverlap(queryTranscript, referenceTranscript, referenceTranscript.getName()) | 360 newExon.setStart(int(round((exon.getStart() - inputTranscript.getStart()) * factor)) + start) |
363 progress.inc() | 361 newExon.setEnd( int(round((exon.getEnd() - inputTranscript.getStart()) * factor)) + start) |
364 progress.done() | 362 queryTranscript.addExon(newExon) |
365 | 363 return (referenceTranscript, queryTranscript) |
366 def compute(self): | 364 |
367 if self.sequenceParser == None: | 365 def compute1TranscriptFiles(self): |
368 self.compute2TranscriptFiles() | 366 progress = Progress(self.parsers[1].getNbItems(), "Comparing regions", self.verbosity) |
369 else: | 367 for transcript in self.parsers[1].getIterator(): |
370 self.compute1TranscriptFiles() | 368 if transcript.__class__.__name__ == "Mapping": |
371 | 369 referenceTranscript, queryTranscript = self.extractReferenceQueryMapping(transcript) |
372 def plotTranscript(self, index, transcript): | 370 else: |
373 plotter = Plotter(self.seed, index, self.verbosity) | 371 referenceTranscript, queryTranscript = self.extractReferenceQuery(transcript) |
374 plotter.setOutputFileName(self.outputFileName) | 372 self.computeCoverage(queryTranscript, referenceTranscript, referenceTranscript.getName()) |
375 plotter.setTranscript(transcript) | 373 self.computeOverlap(queryTranscript, referenceTranscript, referenceTranscript.getName()) |
376 plotter.setTitle(self.title) | 374 progress.inc() |
377 plotter.setLabels(self.xLabel, self.yLabel) | 375 progress.done() |
378 plotter.setPlotSize(self.width, self.height) | 376 |
379 plotter.setCoverageData(self.coverage[index]) | 377 def compute(self): |
380 plotter.setOverlapData(self.overlap[index]) | 378 if self.sequenceParser == None: |
381 plotter.setMerge(self.merge) | 379 self.compute2TranscriptFiles() |
382 plotter.plot() | 380 else: |
383 output = plotter.log | 381 self.compute1TranscriptFiles() |
384 return output | 382 |
385 | 383 def plotTranscript(self, index, transcript): |
386 def plot1TranscriptFile(self): | 384 plotter = Plotter(self.seed, index, self.verbosity) |
387 self.outputCoverage = {} | 385 plotter.setOutputFileName(self.outputFileName) |
388 self.outputCoveragePerStrand = {} | 386 plotter.setTranscript(transcript) |
389 output = "" | 387 plotter.setTitle(self.title) |
390 progress = Progress(len(self.sequenceParser.getRegions()), "Plotting regions", self.verbosity) | 388 plotter.setLabels(self.xLabel, self.yLabel) |
391 for cpt2, region in enumerate(self.sequenceParser.getRegions()): | 389 plotter.setPlotSize(self.width, self.height) |
392 transcript = Transcript() | 390 plotter.setCoverageData(self.coverage[index]) |
393 transcript.setName(region) | 391 plotter.setOverlapData(self.overlap[index]) |
394 transcript.setDirection("+") | 392 plotter.setMerge(self.merge) |
395 transcript.setEnd(self.sizes[region]) | 393 plotter.plot() |
396 transcript.setStart(1) | 394 output = plotter.log |
397 output += self.plotTranscript(region, transcript) | 395 return output |
398 progress.inc() | 396 |
399 progress.done() | 397 def plot1TranscriptFile(self): |
400 if self.verbosity > 0: | 398 self.outputCoverage = {} |
401 print output | 399 self.outputCoveragePerStrand = {} |
402 | 400 output = "" |
403 def plot2TranscriptFiles(self): | 401 progress = Progress(len(self.sequenceParser.getRegions()), "Plotting regions", self.verbosity) |
404 self.outputCoverage = [0] * self.parsers[1].getNbTranscripts() | 402 for cpt2, region in enumerate(self.sequenceParser.getRegions()): |
405 self.outputCoveragePerStrand = [None] * self.parsers[1].getNbTranscripts() | 403 transcript = Transcript() |
406 for cpt in range(self.parsers[1].getNbTranscripts()): | 404 transcript.setName(region) |
407 self.outputCoveragePerStrand[cpt] = dict([strand, 0] for strand in strands) | 405 transcript.setDirection("+") |
408 progress = Progress(self.parsers[1].getNbTranscripts(), "Plotting regions", self.verbosity) | 406 transcript.setEnd(self.sizes[region]) |
409 output = "" | 407 transcript.setStart(1) |
410 for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()): | 408 output += self.plotTranscript(region, transcript) |
411 output += self.plotTranscript(cpt2, transcript2) | 409 progress.inc() |
412 progress.inc() | 410 progress.done() |
413 progress.done() | 411 if self.verbosity > 0: |
414 if self.verbosity > 0: | 412 print output |
415 print output | 413 |
416 | 414 def plot2TranscriptFiles(self): |
417 def plot(self): | 415 self.outputCoverage = [0] * self.parsers[1].getNbTranscripts() |
418 if self.sequenceParser == None: | 416 self.outputCoveragePerStrand = [None] * self.parsers[1].getNbTranscripts() |
419 self.plot2TranscriptFiles() | 417 for cpt in range(self.parsers[1].getNbTranscripts()): |
420 else: | 418 self.outputCoveragePerStrand[cpt] = dict([strand, 0] for strand in strands) |
421 self.plot1TranscriptFile() | 419 progress = Progress(self.parsers[1].getNbTranscripts(), "Plotting regions", self.verbosity) |
422 | 420 output = "" |
423 def start(self): | 421 for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()): |
424 self.initialize() | 422 output += self.plotTranscript(cpt2, transcript2) |
425 self.compute() | 423 progress.inc() |
426 self.plot() | 424 progress.done() |
425 if self.verbosity > 0: | |
426 print output | |
427 | |
428 def plot(self): | |
429 if self.sequenceParser == None: | |
430 self.plot2TranscriptFiles() | |
431 else: | |
432 self.plot1TranscriptFile() | |
433 | |
434 def start(self): | |
435 self.initialize() | |
436 self.compute() | |
437 self.plot() | |
427 | 438 |
428 | 439 |
429 if __name__ == "__main__": | 440 if __name__ == "__main__": |
430 | 441 |
431 # parse command line | 442 # parse command line |
432 description = "Plot Coverage v1.0.1: Plot the coverage of the first data with respect to the second one. [Category: Visualization]" | 443 description = "Plot Coverage v1.0.1: Plot the coverage of the first data with respect to the second one. [Category: Visualization]" |
433 | 444 |
434 parser = OptionParser(description = description) | 445 parser = OptionParser(description = description) |
435 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]") | 446 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript or mapping format given by -f]") |
436 parser.add_option("-f", "--inputFormat1", dest="inputFormat1", action="store", type="string", help="format of input file 1 [compulsory] [format: transcript file format]") | 447 parser.add_option("-f", "--inputFormat1", dest="inputFormat1", action="store", type="string", help="format of input file 1 [compulsory] [format: transcript or mapping file format]") |
437 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]") | 448 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]") |
438 parser.add_option("-g", "--inputFormat2", dest="inputFormat2", action="store", type="string", help="format of input file 2 [compulsory] [format: transcript file format]") | 449 parser.add_option("-g", "--inputFormat2", dest="inputFormat2", action="store", type="string", help="format of input file 2 [compulsory] [format: transcript file format]") |
439 parser.add_option("-q", "--sequence", dest="inputSequence", action="store", default=None, type="string", help="input sequence file [format: file in FASTA format] [default: None]") | 450 parser.add_option("-q", "--sequence", dest="inputSequence", action="store", default=None, type="string", help="input sequence file [format: file in FASTA format] [default: None]") |
440 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in PNG format]") | 451 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in PNG format]") |
441 parser.add_option("-w", "--width", dest="width", action="store", default=1500, type="int", help="width of the plots (in px) [format: int] [default: 1500]") | 452 parser.add_option("-w", "--width", dest="width", action="store", default=1500, type="int", help="width of the plots (in px) [format: int] [default: 1500]") |
442 parser.add_option("-e", "--height", dest="height", action="store", default=1000, type="int", help="height of the plots (in px) [format: int] [default: 1000]") | 453 parser.add_option("-e", "--height", dest="height", action="store", default=1000, type="int", help="height of the plots (in px) [format: int] [default: 1000]") |
443 parser.add_option("-t", "--title", dest="title", action="store", default="", type="string", help="title of the plots [format: string]") | 454 parser.add_option("-t", "--title", dest="title", action="store", default="", type="string", help="title of the plots [format: string]") |
444 parser.add_option("-x", "--xlab", dest="xLabel", action="store", default="", type="string", help="label on the x-axis [format: string]") | 455 parser.add_option("-x", "--xlab", dest="xLabel", action="store", default="", type="string", help="label on the x-axis [format: string]") |
445 parser.add_option("-y", "--ylab", dest="yLabel", action="store", default="", type="string", help="label on the y-axis [format: string]") | 456 parser.add_option("-y", "--ylab", dest="yLabel", action="store", default="", type="string", help="label on the y-axis [format: string]") |
446 parser.add_option("-p", "--plusColor", dest="plusColor", action="store", default="red", type="string", help="color for the elements on the plus strand [format: string] [default: red]") | 457 parser.add_option("-p", "--plusColor", dest="plusColor", action="store", default="red", type="string", help="color for the elements on the plus strand [format: string] [default: red]") |
447 parser.add_option("-m", "--minusColor", dest="minusColor", action="store", default="blue", type="string", help="color for the elements on the minus strand [format: string] [default: blue]") | 458 parser.add_option("-m", "--minusColor", dest="minusColor", action="store", default="blue", type="string", help="color for the elements on the minus strand [format: string] [default: blue]") |
448 parser.add_option("-s", "--sumColor", dest="sumColor", action="store", default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]") | 459 parser.add_option("-s", "--sumColor", dest="sumColor", action="store", default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]") |
449 parser.add_option("-l", "--lineColor", dest="lineColor", action="store", default="black", type="string", help="color for the lines [format: string] [default: black]") | 460 parser.add_option("-l", "--lineColor", dest="lineColor", action="store", default="black", type="string", help="color for the lines [format: string] [default: black]") |
450 parser.add_option("-1", "--merge", dest="merge", action="store_true", default=False, help="merge the 2 plots in 1 [format: boolean] [default: false]") | 461 parser.add_option("-1", "--merge", dest="merge", action="store_true", default=False, help="merge the 2 plots in 1 [format: boolean] [default: false]") |
451 parser.add_option("-D", "--directory", dest="working_Dir", action="store", default=os.getcwd(), type="string", help="the directory to store the results [format: directory]") | 462 parser.add_option("-D", "--directory", dest="working_Dir", action="store", default=os.getcwd(), type="string", help="the directory to store the results [format: directory]") |
452 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") | 463 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") |
453 (options, args) = parser.parse_args() | 464 (options, args) = parser.parse_args() |
454 | 465 |
455 colors[1] = options.plusColor | 466 colors[1] = options.plusColor |
456 colors[-1] = options.minusColor | 467 colors[-1] = options.minusColor |
457 colors[0] = options.sumColor | 468 colors[0] = options.sumColor |
458 colorLine = options.lineColor | 469 colorLine = options.lineColor |
459 | 470 |
460 pp = PlotParser(options.verbosity) | 471 pp = PlotParser(options.verbosity) |
461 pp.addInput(0, options.inputFileName1, options.inputFormat1) | 472 pp.addInput(0, options.inputFileName1, options.inputFormat1) |
462 pp.addInput(1, options.inputFileName2, options.inputFormat2) | 473 pp.addInput(1, options.inputFileName2, options.inputFormat2) |
463 pp.addSequence(options.inputSequence) | 474 pp.addSequence(options.inputSequence) |
464 if options.working_Dir[-1] != '/': | 475 pp.setOutput(options.outputFileName if os.path.isabs(options.outputFileName) else os.path.join(options.working_Dir, options.outputFileName)) |
465 path = options.working_Dir + '/' | 476 pp.setPlotSize(options.width, options.height) |
466 pp.setOutput(path + options.outputFileName) | 477 pp.setLabels(options.xLabel, options.yLabel) |
467 pp.setPlotSize(options.width, options.height) | 478 pp.setTitle(options.title) |
468 pp.setLabels(options.xLabel, options.yLabel) | 479 pp.setMerge(options.merge) |
469 pp.setTitle(options.title) | 480 pp.start() |
470 pp.setMerge(options.merge) | 481 |
471 pp.start() | |
472 | |
473 |