annotate SMART/Java/Python/plotCoverage.py @ 68:85e80c21b1f7 draft

Uploaded
author m-zytnicki
date Mon, 16 Nov 2015 12:00:32 -0500
parents 169d364ddd91
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
1 #! /usr/bin/env python
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
2 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
3 # Copyright INRA-URGI 2009-2010
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
4 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
9 # "http://www.cecill.info".
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
10 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
15 # liability.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
16 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
26 # same conditions as regards security.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
27 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
30 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
31 import os, os.path, subprocess, glob, random
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
32 from optparse import OptionParser
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
33 from SMART.Java.Python.structure.Interval import Interval
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
34 from SMART.Java.Python.structure.Transcript import Transcript
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
35 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
36 from SMART.Java.Python.misc.RPlotter import RPlotter
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
37 from SMART.Java.Python.misc.Progress import Progress
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
38 from commons.core.parsing.FastaParser import FastaParser
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
39
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
40 strands = [-1, 1]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
41 colors = {-1: "blue", 1: "red", 0: "black"}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
42 colorLine = "black"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
43
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
44 def parseTargetField(field):
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
45 strand = "+"
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
46 splittedFieldSpace = field.split()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
47 splittedFieldPlus = field.split("+", 4)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
48 if len(splittedFieldSpace) == 3:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
49 id, start, end = splittedFieldSpace
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
50 elif len(splittedFieldSpace) == 4:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
51 id, start, end, strand = splittedFieldSpace
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
52 elif len(splittedFieldPlus) == 3:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
53 id, start, end = splittedFieldPlus
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
54 elif len(splittedFieldPlus) == 4:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
55 id, start, end, strand = splittedFieldPlus
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
56 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
57 raise Exception("Cannot parse Target field '%s'." % (field))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
58 return (id, int(start), int(end), strand)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
59
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
60
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
61 class SimpleTranscript(object):
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
62 def __init__(self, transcript1, transcript2, color = None):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
63 self.start = max(0, transcript1.getStart() - transcript2.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
64 self.end = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
65 self.strand = transcript1.getDirection() * transcript2.getDirection()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
66 self.exons = []
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
67 for exon in transcript1.getExons():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
68 if exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
69 start = max(0, exon.getStart() - transcript2.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
70 end = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
71 self.addExon(start, end, self.strand, color)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
72
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
73 def addExon(self, start, end, strand, color):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
74 exon = SimpleExon(start, end, strand, color)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
75 self.exons.append(exon)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
76
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
77 def getRScript(self, yOffset, height):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
78 rString = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
79 previousEnd = None
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
80 for exon in sorted(self.exons, key=lambda exon: exon.start):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
81 if previousEnd != None:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
82 rString += "segments(%.1f, %.1f, %.1f, %.1f, col = \"%s\")\n" % (previousEnd, yOffset + height / 4.0, exon.start, yOffset + height / 4.0, colorLine)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
83 rString += exon.getRScript(yOffset, height)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
84 previousEnd = exon.end
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
85 return rString
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
86
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
87
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
88 class SimpleExon(object):
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
89 def __init__(self, start, end, strand, color = None):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
90 self.start = start
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
91 self.end = end
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
92 self.strand = strand
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
93 self.color = color
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
94
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
95 def getRScript(self, yOffset, height):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
96 color = self.color if self.color != None else colors[self.strand]
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
97 return "rect(%.1f, %.1f, %.1f, %.1f, col=\"%s\", border = \"%s\")\n" % (self.start, yOffset, self.end, yOffset + height / 2.0, color, colorLine)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
98
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
99
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
100 class Plotter(object):
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
101
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
102 def __init__(self, seed, index, verbosity):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
103 self.seed = seed
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
104 self.index = index
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
105 self.verbosity = verbosity
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
106 self.maxCoverage = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
107 self.maxOverlap = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
108 self.log = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
109 self.merge = False
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
110 self.width = 1500
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
111 self.heigth = 1000
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
112 self.xLabel = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
113 self.yLabel = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
114 self.title = None
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
115 self.absPath = os.getcwd()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
116 self.coverageDataFileName = "tmpFile_%d_%s.dat" % (seed, index)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
117 self.coverageScript = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
118 self.overlapScript = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
119 self.outputFileName = None
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
120
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
121 def setOutputFileName(self, fileName):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
122 self.outputFileName = fileName
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
123
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
124 def setTranscript(self, transcript):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
125 self.transcript = transcript
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
126 self.name = transcript.getName()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
127 self.size = transcript.getEnd() - transcript.getStart() + 1
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
128 if self.title == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
129 self.title = self.name
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
130 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
131 self.title += " " + self.name
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
132
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
133 def setTitle(self, title):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
134 self.title = title + " " + self.name
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
135
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
136 def setPlotSize(self, width, height):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
137 self.width = width
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
138 self.height = height
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
139
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
140 def setLabels(self, xLabel, yLabel):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
141 self.xLabel = xLabel
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
142 self.yLabel = yLabel
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
143
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
144 def setMerge(self, merge):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
145 self.merge = merge
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
146
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
147 def setCoverageData(self, coverage):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
148 outputCoveragePerStrand = dict([strand, 0] for strand in strands)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
149 outputCoverage = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
150 dataFile = open(os.path.abspath(self.coverageDataFileName), "w")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
151 for position in range(self.size+1):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
152 sumValue = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
153 found = False
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
154 dataFile.write("%d\t" % (position))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
155 for strand in strands:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
156 value = coverage[strand].get(position, 0)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
157 sumValue += value
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
158 dataFile.write("%d\t" % (value))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
159 if value > 0:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
160 found = True
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
161 outputCoveragePerStrand[strand] += 1
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
162 self.maxCoverage = max(self.maxCoverage, sumValue)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
163 dataFile.write("%d\n" % (sumValue))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
164 if found:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
165 outputCoverage += 1
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
166 dataFile.close()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
167 self.log += "%s (%d nt):\n - both strands: %d (%.0f%%)\n - (+) strand: %d (%.0f%%)\n - (-) strand: %d (%.0f%%)\n" % (self.name, self.size, outputCoverage, float(outputCoverage) / self.size * 100, outputCoveragePerStrand[1], float(outputCoveragePerStrand[1]) / self.size * 100, outputCoveragePerStrand[-1], float(outputCoveragePerStrand[-1]) / self.size * 100)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
168 self.coverageScript += "data = scan(\"%s\", list(pos = -666, minus = -666, plus = -666, sumValue = -666), sep=\"\t\")\n" % (os.path.abspath(self.coverageDataFileName))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
169 self.coverageScript += "lines(x = data$pos, y = data$minus, col = \"%s\")\n" % (colors[-1])
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
170 self.coverageScript += "lines(x = data$pos, y = data$plus, col = \"%s\")\n" % (colors[1])
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
171 self.coverageScript += "lines(x = data$pos, y = data$sumValue, col = \"%s\")\n" % (colors[0])
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
172
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
173 def setOverlapData(self, overlap):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
174 height = 1
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
175 self.maxOverlap = (len(overlap) + 1) * height
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
176 thisElement = SimpleTranscript(self.transcript, self.transcript, "black")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
177 self.overlapScript += thisElement.getRScript(0, height)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
178 for cpt, transcript in enumerate(sorted(overlap, cmp=lambda c1, c2: c1.start - c2.start if c1.start != c2.start else c1.end - c2.end)):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
179 self.overlapScript += transcript.getRScript((cpt + 1) * height, height)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
180
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
181 def getFirstLine(self, suffix = None):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
182 return "png(file = \"%s_%s%s.png\", width = %d, height = %d, bg = \"white\")\n" % (self.outputFileName, self.name, "" if suffix == None or self.merge else "_%s" % (suffix), self.width, self.height)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
183
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
184 def getLastLine(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
185 return "dev.off()\n"
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
186
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
187 def startR(self, fileName, script):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
188 scriptFile = open(fileName, "w")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
189 scriptFile.write(script)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
190 scriptFile.close()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
191 command = "R CMD BATCH %s" % (fileName)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
192 status = subprocess.call(command, shell=True)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
193 if status != 0:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
194 raise Exception("Problem with the execution of script file %s, status is: %s" % (fileName, status))
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
195
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
196 def plot(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
197 print "outputfileName is written in :", self.outputFileName
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
198 if self.merge:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
199 fileName = "%s_%d_%s.R" % (self.outputFileName, self.seed, self.index)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
200 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, max(self.maxCoverage, self.maxOverlap), self.title)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
201 script = self.getFirstLine() + plotLine + self.overlapScript + self.coverageScript + self.getLastLine()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
202 self.startR(fileName, script)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
203 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
204 fileName = "%s_%d_%s_overlap.R" % (self.outputFileName, self.seed, self.index)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
205 print "overlap file is written in :", fileName
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
206 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxOverlap, self.title)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
207 script = self.getFirstLine("overlap") + plotLine + self.overlapScript + self.getLastLine()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
208 self.startR(fileName, script)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
209 fileName = "%s_%d_%s_coverage.R" % (self.outputFileName, self.seed, self.index)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
210 plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxCoverage, self.title)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
211 script = self.getFirstLine("coverage") + plotLine + self.coverageScript + self.getLastLine()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
212 self.startR(fileName, script)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
213
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
214
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
215 class PlotParser(object):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
216
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
217 def __init__(self, verbosity):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
218 self.verbosity = verbosity
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
219 self.parsers = [None, None]
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
220 self.sequenceParser = None
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
221 self.seed = random.randint(0, 10000)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
222 self.title = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
223 self.merge = False
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
224
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
225 def __del__(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
226 for fileName in glob.glob("tmpFile_%d*.dat" % (self.seed)):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
227 os.remove(fileName)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
228 for fileName in glob.glob("%s*.R" % (os.path.abspath(self.outputFileName))):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
229 os.remove(fileName)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
230 for fileName in glob.glob("%s*.Rout" % (os.path.abspath(self.outputFileName))):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
231 os.remove(fileName)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
232
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
233 def addInput(self, inputNb, fileName, fileFormat):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
234 if fileName == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
235 return
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
236 self.parsers[inputNb] = TranscriptContainer(fileName, fileFormat, self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
237 if inputNb == 0:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
238 self.parsers[1] = self.parsers[0]
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
239
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
240 def addSequence(self, fileName):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
241 if fileName == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
242 return
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
243 self.sequenceParser = FastaParser(fileName, self.verbosity)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
244
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
245 def setOutput(self, fileName):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
246 self.outputFileName = fileName
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
247
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
248 def setPlotSize(self, width, height):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
249 self.width = width
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
250 self.height = height
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
251
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
252 def setLabels(self, xLabel, yLabel):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
253 self.xLabel = xLabel
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
254 self.yLabel = yLabel
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
255
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
256 def setTitle(self, title):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
257 self.title = title
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
258
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
259 def setMerge(self, merge):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
260 self.merge = merge
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
261
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
262 def initializeDataFromSequences(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
263 self.sizes = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
264 self.coverage = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
265 self.overlap = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
266 for region in self.sequenceParser.getRegions():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
267 self.sizes[region] = self.sequenceParser.getSizeOfRegion(region)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
268 self.coverage[region] = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
269 self.overlap[region] = []
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
270 for strand in strands:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
271 self.coverage[region][strand] = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
272 self.coverage[region][strand][1] = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
273 self.coverage[region][strand][self.sizes[region]] = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
274
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
275
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
276 def initializeDataFromTranscripts(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
277 self.coverage = dict([i, None] for i in range(self.parsers[1].getNbTranscripts()))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
278 self.overlap = dict([i, None] for i in range(self.parsers[1].getNbTranscripts()))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
279 self.sizes = dict([i, 0] for i in range(self.parsers[1].getNbTranscripts()))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
280 self.parsers[0].findData()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
281 progress = Progress(self.parsers[1].getNbTranscripts(), "Reading regions", self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
282 for cpt, transcript in enumerate(self.parsers[1].getIterator()):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
283 self.coverage[cpt] = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
284 self.overlap[cpt] = []
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
285 for strand in strands:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
286 self.coverage[cpt][strand] = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
287 self.coverage[cpt][strand][0] = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
288 self.coverage[cpt][strand][transcript.getEnd() - transcript.getStart()] = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
289 for exon in transcript.getExons():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
290 self.sizes[cpt] += exon.getSize()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
291 progress.inc()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
292 progress.done()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
293
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
294 def initialize(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
295 if self.sequenceParser == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
296 self.initializeDataFromTranscripts()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
297 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
298 self.initializeDataFromSequences()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
299
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
300 def computeCoverage(self, transcript1, transcript2, id):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
301 strand = transcript1.getDirection() * transcript2.getDirection()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
302 for exon1 in transcript1.getExons():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
303 for exon2 in transcript2.getExons():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
304 if exon1.overlapWith(exon2):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
305 for position in range(max(exon1.getStart(), exon2.getStart()), min(exon1.getEnd(), exon2.getEnd()) + 1):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
306 relativePosition = position - transcript2.getStart() + 1
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
307 self.coverage[id][strand][relativePosition] = self.coverage[id][strand].get(relativePosition, 0) + 1
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
308
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
309 def computeOverlap(self, transcript1, transcript2, id):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
310 simpleTranscript = SimpleTranscript(transcript1, transcript2)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
311 self.overlap[id].append(simpleTranscript)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
312
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
313 def compute2TranscriptFiles(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
314 progress = Progress(self.parsers[1].getNbTranscripts(), "Comparing regions", self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
315 for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
316 for transcript1 in self.parsers[0].getIterator():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
317 if transcript1.overlapWithExon(transcript2):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
318 self.computeCoverage(transcript1, transcript2, cpt2)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
319 self.computeOverlap(transcript1, transcript2, cpt2)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
320 progress.inc()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
321 progress.done()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
322
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
323 def extractReferenceQuery(self, inputTranscript):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
324 if "Target" not in inputTranscript.getTagNames():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
325 raise Exception("Cannot extract Target field in line '%s'." % (inputTranscript))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
326 id, start, end, strand = parseTargetField(inputTranscript.getTagValue("Target"))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
327 if id not in self.sizes:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
328 raise Exception("Target id '%s' of transcript '%s' does not correspond to anything in FASTA file." % (id, inputTranscript))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
329 referenceTranscript = Transcript()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
330 referenceTranscript.setChromosome(id)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
331 referenceTranscript.setName(id)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
332 referenceTranscript.setDirection("+")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
333 referenceTranscript.setEnd(self.sizes[id])
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
334 referenceTranscript.setStart(1)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
335 queryTranscript = Transcript()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
336 queryTranscript.setChromosome(id)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
337 queryTranscript.setName(id)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
338 queryTranscript.setStart(start)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
339 queryTranscript.setEnd(end)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
340 queryTranscript.setDirection(strand)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
341 if inputTranscript.getNbExons() > 1:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
342 factor = float(end - start) / (inputTranscript.getEnd() - inputTranscript.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
343 for exon in inputTranscript.getExons():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
344 newExon = Interval()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
345 newExon.setChromosome(id)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
346 newExon.setDirection(strand)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
347 if "Target" in inputTranscript.getTagNames():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
348 id, start, end, strand = parseTargetField(exon.getTagValue("Target"))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
349 newExon.setStart(start)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
350 newExon.setEnd(end)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
351 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
352 newExon.setStart(int(round((exon.getStart() - inputTranscript.getStart()) * factor)) + start)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
353 newExon.setEnd( int(round((exon.getEnd() - inputTranscript.getStart()) * factor)) + start)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
354 queryTranscript.addExon(newExon)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
355 return (referenceTranscript, queryTranscript)
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
356
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
357 def compute1TranscriptFiles(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
358 progress = Progress(self.parsers[1].getNbTranscripts(), "Comparing regions", self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
359 for transcript in self.parsers[1].getIterator():
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
360 referenceTranscript, queryTranscript = self.extractReferenceQuery(transcript)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
361 self.computeCoverage(queryTranscript, referenceTranscript, referenceTranscript.getName())
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
362 self.computeOverlap(queryTranscript, referenceTranscript, referenceTranscript.getName())
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
363 progress.inc()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
364 progress.done()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
365
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
366 def compute(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
367 if self.sequenceParser == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
368 self.compute2TranscriptFiles()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
369 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
370 self.compute1TranscriptFiles()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
371
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
372 def plotTranscript(self, index, transcript):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
373 plotter = Plotter(self.seed, index, self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
374 plotter.setOutputFileName(self.outputFileName)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
375 plotter.setTranscript(transcript)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
376 plotter.setTitle(self.title)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
377 plotter.setLabels(self.xLabel, self.yLabel)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
378 plotter.setPlotSize(self.width, self.height)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
379 plotter.setCoverageData(self.coverage[index])
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
380 plotter.setOverlapData(self.overlap[index])
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
381 plotter.setMerge(self.merge)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
382 plotter.plot()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
383 output = plotter.log
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
384 return output
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
385
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
386 def plot1TranscriptFile(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
387 self.outputCoverage = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
388 self.outputCoveragePerStrand = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
389 output = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
390 progress = Progress(len(self.sequenceParser.getRegions()), "Plotting regions", self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
391 for cpt2, region in enumerate(self.sequenceParser.getRegions()):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
392 transcript = Transcript()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
393 transcript.setName(region)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
394 transcript.setDirection("+")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
395 transcript.setEnd(self.sizes[region])
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
396 transcript.setStart(1)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
397 output += self.plotTranscript(region, transcript)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
398 progress.inc()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
399 progress.done()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
400 if self.verbosity > 0:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
401 print output
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
402
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
403 def plot2TranscriptFiles(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
404 self.outputCoverage = [0] * self.parsers[1].getNbTranscripts()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
405 self.outputCoveragePerStrand = [None] * self.parsers[1].getNbTranscripts()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
406 for cpt in range(self.parsers[1].getNbTranscripts()):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
407 self.outputCoveragePerStrand[cpt] = dict([strand, 0] for strand in strands)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
408 progress = Progress(self.parsers[1].getNbTranscripts(), "Plotting regions", self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
409 output = ""
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
410 for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
411 output += self.plotTranscript(cpt2, transcript2)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
412 progress.inc()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
413 progress.done()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
414 if self.verbosity > 0:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
415 print output
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
416
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
417 def plot(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
418 if self.sequenceParser == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
419 self.plot2TranscriptFiles()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
420 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
421 self.plot1TranscriptFile()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
422
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
423 def start(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
424 self.initialize()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
425 self.compute()
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
426 self.plot()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
427
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
428
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
429 if __name__ == "__main__":
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
430
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
431 # parse command line
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
432 description = "Plot Coverage v1.0.1: Plot the coverage of the first data with respect to the second one. [Category: Visualization]"
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
433
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
434 parser = OptionParser(description = description)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
435 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
436 parser.add_option("-f", "--inputFormat1", dest="inputFormat1", action="store", type="string", help="format of input file 1 [compulsory] [format: transcript file format]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
437 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
438 parser.add_option("-g", "--inputFormat2", dest="inputFormat2", action="store", type="string", help="format of input file 2 [compulsory] [format: transcript file format]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
439 parser.add_option("-q", "--sequence", dest="inputSequence", action="store", default=None, type="string", help="input sequence file [format: file in FASTA format] [default: None]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
440 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in PNG format]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
441 parser.add_option("-w", "--width", dest="width", action="store", default=1500, type="int", help="width of the plots (in px) [format: int] [default: 1500]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
442 parser.add_option("-e", "--height", dest="height", action="store", default=1000, type="int", help="height of the plots (in px) [format: int] [default: 1000]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
443 parser.add_option("-t", "--title", dest="title", action="store", default="", type="string", help="title of the plots [format: string]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
444 parser.add_option("-x", "--xlab", dest="xLabel", action="store", default="", type="string", help="label on the x-axis [format: string]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
445 parser.add_option("-y", "--ylab", dest="yLabel", action="store", default="", type="string", help="label on the y-axis [format: string]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
446 parser.add_option("-p", "--plusColor", dest="plusColor", action="store", default="red", type="string", help="color for the elements on the plus strand [format: string] [default: red]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
447 parser.add_option("-m", "--minusColor", dest="minusColor", action="store", default="blue", type="string", help="color for the elements on the minus strand [format: string] [default: blue]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
448 parser.add_option("-s", "--sumColor", dest="sumColor", action="store", default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
449 parser.add_option("-l", "--lineColor", dest="lineColor", action="store", default="black", type="string", help="color for the lines [format: string] [default: black]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
450 parser.add_option("-1", "--merge", dest="merge", action="store_true", default=False, help="merge the 2 plots in 1 [format: boolean] [default: false]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
451 parser.add_option("-D", "--directory", dest="working_Dir", action="store", default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
452 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
453 (options, args) = parser.parse_args()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
454
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
455 colors[1] = options.plusColor
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
456 colors[-1] = options.minusColor
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
457 colors[0] = options.sumColor
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
458 colorLine = options.lineColor
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
459
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
460 pp = PlotParser(options.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
461 pp.addInput(0, options.inputFileName1, options.inputFormat1)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
462 pp.addInput(1, options.inputFileName2, options.inputFormat2)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
463 pp.addSequence(options.inputSequence)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
464 pp.setOutput(options.outputFileName if os.path.isabs(options.outputFileName) else os.path.join(options.working_Dirpath, options.outputFileName))
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
465 pp.setPlotSize(options.width, options.height)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
466 pp.setLabels(options.xLabel, options.yLabel)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
467 pp.setTitle(options.title)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
468 pp.setMerge(options.merge)
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
469 pp.start()
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
470
46
169d364ddd91 Uploaded
m-zytnicki
parents: 36
diff changeset
471