Mercurial > repos > yufei-luo > s_mart
diff SMART/Java/Python/plotCoverage.py @ 46:169d364ddd91
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 30 Sep 2013 03:19:26 -0400 |
parents | 44d5973c188c |
children |
line wrap: on
line diff
--- a/SMART/Java/Python/plotCoverage.py Wed Sep 18 08:51:22 2013 -0400 +++ b/SMART/Java/Python/plotCoverage.py Mon Sep 30 03:19:26 2013 -0400 @@ -32,7 +32,7 @@ from optparse import OptionParser from SMART.Java.Python.structure.Interval import Interval from SMART.Java.Python.structure.Transcript import Transcript -from commons.core.parsing.ParserChooser import ParserChooser +from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer from SMART.Java.Python.misc.RPlotter import RPlotter from SMART.Java.Python.misc.Progress import Progress from commons.core.parsing.FastaParser import FastaParser @@ -42,440 +42,430 @@ colorLine = "black" def parseTargetField(field): - strand = "+" - splittedFieldSpace = field.split() - splittedFieldPlus = field.split("+", 4) - if len(splittedFieldSpace) == 3: - id, start, end = splittedFieldSpace - elif len(splittedFieldSpace) == 4: - id, start, end, strand = splittedFieldSpace - elif len(splittedFieldPlus) == 3: - id, start, end = splittedFieldPlus - elif len(splittedFieldPlus) == 4: - id, start, end, strand = splittedFieldPlus - else: - raise Exception("Cannot parse Target field '%s'." % (field)) - return (id, int(start), int(end), strand) + strand = "+" + splittedFieldSpace = field.split() + splittedFieldPlus = field.split("+", 4) + if len(splittedFieldSpace) == 3: + id, start, end = splittedFieldSpace + elif len(splittedFieldSpace) == 4: + id, start, end, strand = splittedFieldSpace + elif len(splittedFieldPlus) == 3: + id, start, end = splittedFieldPlus + elif len(splittedFieldPlus) == 4: + id, start, end, strand = splittedFieldPlus + else: + raise Exception("Cannot parse Target field '%s'." % (field)) + return (id, int(start), int(end), strand) class SimpleTranscript(object): - def __init__(self, transcript1, transcript2, color = None): - self.start = max(0, transcript1.getStart() - transcript2.getStart()) - self.end = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart()) - self.strand = transcript1.getDirection() * transcript2.getDirection() - self.exons = [] - for exon in transcript1.getExons(): - if exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd(): - start = max(0, exon.getStart() - transcript2.getStart()) - end = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart()) - self.addExon(start, end, self.strand, color) + def __init__(self, transcript1, transcript2, color = None): + self.start = max(0, transcript1.getStart() - transcript2.getStart()) + self.end = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart()) + self.strand = transcript1.getDirection() * transcript2.getDirection() + self.exons = [] + for exon in transcript1.getExons(): + if exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd(): + start = max(0, exon.getStart() - transcript2.getStart()) + end = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart()) + self.addExon(start, end, self.strand, color) - def addExon(self, start, end, strand, color): - exon = SimpleExon(start, end, strand, color) - self.exons.append(exon) + def addExon(self, start, end, strand, color): + exon = SimpleExon(start, end, strand, color) + self.exons.append(exon) - def getRScript(self, yOffset, height): - rString = "" - previousEnd = None - for exon in sorted(self.exons, key=lambda exon: exon.start): - if previousEnd != None: - rString += "segments(%.1f, %.1f, %.1f, %.1f, col = \"%s\")\n" % (previousEnd, yOffset + height / 4.0, exon.start, yOffset + height / 4.0, colorLine) - rString += exon.getRScript(yOffset, height) - previousEnd = exon.end - return rString + def getRScript(self, yOffset, height): + rString = "" + previousEnd = None + for exon in sorted(self.exons, key=lambda exon: exon.start): + if previousEnd != None: + rString += "segments(%.1f, %.1f, %.1f, %.1f, col = \"%s\")\n" % (previousEnd, yOffset + height / 4.0, exon.start, yOffset + height / 4.0, colorLine) + rString += exon.getRScript(yOffset, height) + previousEnd = exon.end + return rString class SimpleExon(object): - def __init__(self, start, end, strand, color = None): - self.start = start - self.end = end - self.strand = strand - self.color = color - - def getRScript(self, yOffset, height): - color = self.color if self.color != None else colors[self.strand] - return "rect(%.1f, %.1f, %.1f, %.1f, col=\"%s\", border = \"%s\")\n" % (self.start, yOffset, self.end, yOffset + height / 2.0, color, colorLine) + def __init__(self, start, end, strand, color = None): + self.start = start + self.end = end + self.strand = strand + self.color = color + + def getRScript(self, yOffset, height): + color = self.color if self.color != None else colors[self.strand] + return "rect(%.1f, %.1f, %.1f, %.1f, col=\"%s\", border = \"%s\")\n" % (self.start, yOffset, self.end, yOffset + height / 2.0, color, colorLine) class Plotter(object): - - def __init__(self, seed, index, verbosity): - self.seed = seed - self.index = index - self.verbosity = verbosity - self.maxCoverage = 0 - self.maxOverlap = 0 - self.log = "" - self.merge = False - self.width = 1500 - self.heigth = 1000 - self.xLabel = "" - self.yLabel = "" - self.title = None - self.absPath = os.getcwd() - self.coverageDataFileName = "tmpFile_%d_%s.dat" % (seed, index) - self.coverageScript = "" - self.overlapScript = "" - self.outputFileName = None + + def __init__(self, seed, index, verbosity): + self.seed = seed + self.index = index + self.verbosity = verbosity + self.maxCoverage = 0 + self.maxOverlap = 0 + self.log = "" + self.merge = False + self.width = 1500 + self.heigth = 1000 + self.xLabel = "" + self.yLabel = "" + self.title = None + self.absPath = os.getcwd() + self.coverageDataFileName = "tmpFile_%d_%s.dat" % (seed, index) + self.coverageScript = "" + self.overlapScript = "" + self.outputFileName = None - def setOutputFileName(self, fileName): - self.outputFileName = fileName + def setOutputFileName(self, fileName): + self.outputFileName = fileName - def setTranscript(self, transcript): - self.transcript = transcript - self.name = transcript.getName() - self.size = transcript.getEnd() - transcript.getStart() + 1 - if self.title == None: - self.title = self.name - else: - self.title += " " + self.name + def setTranscript(self, transcript): + self.transcript = transcript + self.name = transcript.getName() + self.size = transcript.getEnd() - transcript.getStart() + 1 + if self.title == None: + self.title = self.name + else: + self.title += " " + self.name - def setTitle(self, title): - self.title = title + " " + self.name + def setTitle(self, title): + self.title = title + " " + self.name - def setPlotSize(self, width, height): - self.width = width - self.height = height + def setPlotSize(self, width, height): + self.width = width + self.height = height - def setLabels(self, xLabel, yLabel): - self.xLabel = xLabel - self.yLabel = yLabel + def setLabels(self, xLabel, yLabel): + self.xLabel = xLabel + self.yLabel = yLabel - def setMerge(self, merge): - self.merge = merge + def setMerge(self, merge): + self.merge = merge - def setCoverageData(self, coverage): - outputCoveragePerStrand = dict([strand, 0] for strand in strands) - outputCoverage = 0 - dataFile = open(os.path.abspath(self.coverageDataFileName), "w") - for position in range(self.size+1): - sumValue = 0 - found = False - dataFile.write("%d\t" % (position)) - for strand in strands: - value = coverage[strand].get(position, 0) - sumValue += value - dataFile.write("%d\t" % (value)) - if value > 0: - found = True - outputCoveragePerStrand[strand] += 1 - self.maxCoverage = max(self.maxCoverage, sumValue) - dataFile.write("%d\n" % (sumValue)) - if found: - outputCoverage += 1 - dataFile.close() - self.log += "%s (%d nt):\n - both strands: %d (%.0f%%)\n - (+) strand: %d (%.0f%%)\n - (-) strand: %d (%.0f%%)\n" % (self.name, self.size, outputCoverage, float(outputCoverage) / self.size * 100, outputCoveragePerStrand[1], float(outputCoveragePerStrand[1]) / self.size * 100, outputCoveragePerStrand[-1], float(outputCoveragePerStrand[-1]) / self.size * 100) - self.coverageScript += "data = scan(\"%s\", list(pos = -666, minus = -666, plus = -666, sumValue = -666), sep=\"\t\")\n" % (os.path.abspath(self.coverageDataFileName)) - self.coverageScript += "lines(x = data$pos, y = data$minus, col = \"%s\")\n" % (colors[-1]) - self.coverageScript += "lines(x = data$pos, y = data$plus, col = \"%s\")\n" % (colors[1]) - self.coverageScript += "lines(x = data$pos, y = data$sumValue, col = \"%s\")\n" % (colors[0]) + def setCoverageData(self, coverage): + outputCoveragePerStrand = dict([strand, 0] for strand in strands) + outputCoverage = 0 + dataFile = open(os.path.abspath(self.coverageDataFileName), "w") + for position in range(self.size+1): + sumValue = 0 + found = False + dataFile.write("%d\t" % (position)) + for strand in strands: + value = coverage[strand].get(position, 0) + sumValue += value + dataFile.write("%d\t" % (value)) + if value > 0: + found = True + outputCoveragePerStrand[strand] += 1 + self.maxCoverage = max(self.maxCoverage, sumValue) + dataFile.write("%d\n" % (sumValue)) + if found: + outputCoverage += 1 + dataFile.close() + self.log += "%s (%d nt):\n - both strands: %d (%.0f%%)\n - (+) strand: %d (%.0f%%)\n - (-) strand: %d (%.0f%%)\n" % (self.name, self.size, outputCoverage, float(outputCoverage) / self.size * 100, outputCoveragePerStrand[1], float(outputCoveragePerStrand[1]) / self.size * 100, outputCoveragePerStrand[-1], float(outputCoveragePerStrand[-1]) / self.size * 100) + self.coverageScript += "data = scan(\"%s\", list(pos = -666, minus = -666, plus = -666, sumValue = -666), sep=\"\t\")\n" % (os.path.abspath(self.coverageDataFileName)) + self.coverageScript += "lines(x = data$pos, y = data$minus, col = \"%s\")\n" % (colors[-1]) + self.coverageScript += "lines(x = data$pos, y = data$plus, col = \"%s\")\n" % (colors[1]) + self.coverageScript += "lines(x = data$pos, y = data$sumValue, col = \"%s\")\n" % (colors[0]) - def setOverlapData(self, overlap): - height = 1 - self.maxOverlap = (len(overlap) + 1) * height - thisElement = SimpleTranscript(self.transcript, self.transcript, "black") - self.overlapScript += thisElement.getRScript(0, height) - for cpt, transcript in enumerate(sorted(overlap, cmp=lambda c1, c2: c1.start - c2.start if c1.start != c2.start else c1.end - c2.end)): - self.overlapScript += transcript.getRScript((cpt + 1) * height, height) + def setOverlapData(self, overlap): + height = 1 + self.maxOverlap = (len(overlap) + 1) * height + thisElement = SimpleTranscript(self.transcript, self.transcript, "black") + self.overlapScript += thisElement.getRScript(0, height) + for cpt, transcript in enumerate(sorted(overlap, cmp=lambda c1, c2: c1.start - c2.start if c1.start != c2.start else c1.end - c2.end)): + self.overlapScript += transcript.getRScript((cpt + 1) * height, height) - def getFirstLine(self, suffix = None): - return "png(file = \"%s_%s%s.png\", width = %d, height = %d, bg = \"white\")\n" % (self.outputFileName, self.name, "" if suffix == None or self.merge else "_%s" % (suffix), self.width, self.height) + def getFirstLine(self, suffix = None): + return "png(file = \"%s_%s%s.png\", width = %d, height = %d, bg = \"white\")\n" % (self.outputFileName, self.name, "" if suffix == None or self.merge else "_%s" % (suffix), self.width, self.height) - def getLastLine(self): - return "dev.off()\n" + def getLastLine(self): + return "dev.off()\n" - def startR(self, fileName, script): - scriptFile = open(fileName, "w") - scriptFile.write(script) - scriptFile.close() - command = "R CMD BATCH %s" % (fileName) - status = subprocess.call(command, shell=True) - if status != 0: - raise Exception("Problem with the execution of script file %s, status is: %s" % (fileName, status)) + def startR(self, fileName, script): + scriptFile = open(fileName, "w") + scriptFile.write(script) + scriptFile.close() + command = "R CMD BATCH %s" % (fileName) + status = subprocess.call(command, shell=True) + if status != 0: + raise Exception("Problem with the execution of script file %s, status is: %s" % (fileName, status)) - def plot(self): - if self.merge: - fileName = "%s_%d_%s.R" % (self.outputFileName, self.seed, self.index) - plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, max(self.maxCoverage, self.maxOverlap), self.title) - script = self.getFirstLine() + plotLine + self.overlapScript + self.coverageScript + self.getLastLine() - self.startR(fileName, script) - else: - fileName = "%s_%d_%s_overlap.R" % (self.outputFileName, self.seed, self.index) - plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxOverlap, self.title) - script = self.getFirstLine("overlap") + plotLine + self.overlapScript + self.getLastLine() - self.startR(fileName, script) - fileName = "%s_%d_%s_coverage.R" % (self.outputFileName, self.seed, self.index) - plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxCoverage, self.title) - script = self.getFirstLine("coverage") + plotLine + self.coverageScript + self.getLastLine() - self.startR(fileName, script) + def plot(self): + print "outputfileName is written in :", self.outputFileName + if self.merge: + fileName = "%s_%d_%s.R" % (self.outputFileName, self.seed, self.index) + plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, max(self.maxCoverage, self.maxOverlap), self.title) + script = self.getFirstLine() + plotLine + self.overlapScript + self.coverageScript + self.getLastLine() + self.startR(fileName, script) + else: + fileName = "%s_%d_%s_overlap.R" % (self.outputFileName, self.seed, self.index) + print "overlap file is written in :", fileName + plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxOverlap, self.title) + script = self.getFirstLine("overlap") + plotLine + self.overlapScript + self.getLastLine() + self.startR(fileName, script) + fileName = "%s_%d_%s_coverage.R" % (self.outputFileName, self.seed, self.index) + plotLine = "plot(x = NA, y = NA, xlab=\"%s\", ylab=\"%s\", panel.first = grid(lwd = 1.0), xlim = c(0, %d), ylim = c(0, %d), cex.axis = 2, cex.lab = 2, cex.main=2, main = \"%s\")\n" % (self.xLabel, self.yLabel, self.size, self.maxCoverage, self.title) + script = self.getFirstLine("coverage") + plotLine + self.coverageScript + self.getLastLine() + self.startR(fileName, script) class PlotParser(object): - def __init__(self, verbosity): - self.verbosity = verbosity - self.parsers = [None, None] - self.sequenceParser = None - self.seed = random.randint(0, 10000) - self.title = "" - self.merge = False + def __init__(self, verbosity): + self.verbosity = verbosity + self.parsers = [None, None] + self.sequenceParser = None + self.seed = random.randint(0, 10000) + self.title = "" + self.merge = False - def __del__(self): - for fileName in glob.glob("tmpFile_%d*.dat" % (self.seed)): - os.remove(fileName) - for fileName in glob.glob("%s*.R" % (os.path.abspath(self.outputFileName))): - os.remove(fileName) - for fileName in glob.glob("%s*.Rout" % (os.path.abspath(self.outputFileName))): - os.remove(fileName) + def __del__(self): + for fileName in glob.glob("tmpFile_%d*.dat" % (self.seed)): + os.remove(fileName) + for fileName in glob.glob("%s*.R" % (os.path.abspath(self.outputFileName))): + os.remove(fileName) + for fileName in glob.glob("%s*.Rout" % (os.path.abspath(self.outputFileName))): + os.remove(fileName) - def addInput(self, inputNb, fileName, fileFormat): - if fileName == None: - return - chooser = ParserChooser(self.verbosity) - chooser.findFormat(fileFormat) - self.parsers[inputNb] = chooser.getParser(fileName) - if inputNb == 0: - self.parsers[1] = self.parsers[0] + def addInput(self, inputNb, fileName, fileFormat): + if fileName == None: + return + self.parsers[inputNb] = TranscriptContainer(fileName, fileFormat, self.verbosity) + if inputNb == 0: + self.parsers[1] = self.parsers[0] - def addSequence(self, fileName): - if fileName == None: - return - self.sequenceParser = FastaParser(fileName, self.verbosity) + def addSequence(self, fileName): + if fileName == None: + return + self.sequenceParser = FastaParser(fileName, self.verbosity) - def setOutput(self, fileName): - self.outputFileName = fileName + def setOutput(self, fileName): + self.outputFileName = fileName - def setPlotSize(self, width, height): - self.width = width - self.height = height + def setPlotSize(self, width, height): + self.width = width + self.height = height - def setLabels(self, xLabel, yLabel): - self.xLabel = xLabel - self.yLabel = yLabel + def setLabels(self, xLabel, yLabel): + self.xLabel = xLabel + self.yLabel = yLabel - def setTitle(self, title): - self.title = title + def setTitle(self, title): + self.title = title - def setMerge(self, merge): - self.merge = merge + def setMerge(self, merge): + self.merge = merge - def initializeDataFromSequences(self): - self.sizes = {} - self.coverage = {} - self.overlap = {} - for region in self.sequenceParser.getRegions(): - self.sizes[region] = self.sequenceParser.getSizeOfRegion(region) - self.coverage[region] = {} - self.overlap[region] = [] - for strand in strands: - self.coverage[region][strand] = {} - self.coverage[region][strand][1] = 0 - self.coverage[region][strand][self.sizes[region]] = 0 + def initializeDataFromSequences(self): + self.sizes = {} + self.coverage = {} + self.overlap = {} + for region in self.sequenceParser.getRegions(): + self.sizes[region] = self.sequenceParser.getSizeOfRegion(region) + self.coverage[region] = {} + self.overlap[region] = [] + for strand in strands: + self.coverage[region][strand] = {} + self.coverage[region][strand][1] = 0 + self.coverage[region][strand][self.sizes[region]] = 0 + - def initializeDataFromTranscripts(self): - self.coverage = dict([i, None] for i in range(self.parsers[1].getNbTranscripts())) - self.overlap = dict([i, None] for i in range(self.parsers[1].getNbTranscripts())) - self.sizes = dict([i, 0] for i in range(self.parsers[1].getNbTranscripts())) - progress = Progress(self.parsers[1].getNbTranscripts(), "Reading regions", self.verbosity) - for cpt, transcript in enumerate(self.parsers[1].getIterator()): - self.coverage[cpt] = {} - self.overlap[cpt] = [] - for strand in strands: - self.coverage[cpt][strand] = {} - self.coverage[cpt][strand][0] = 0 - self.coverage[cpt][strand][transcript.getEnd() - transcript.getStart()] = 0 - for exon in transcript.getExons(): - self.sizes[cpt] += exon.getSize() - progress.inc() - progress.done() - - def initialize(self): - if self.sequenceParser == None: - self.initializeDataFromTranscripts() - else: - self.initializeDataFromSequences() + def initializeDataFromTranscripts(self): + self.coverage = dict([i, None] for i in range(self.parsers[1].getNbTranscripts())) + self.overlap = dict([i, None] for i in range(self.parsers[1].getNbTranscripts())) + self.sizes = dict([i, 0] for i in range(self.parsers[1].getNbTranscripts())) + self.parsers[0].findData() + progress = Progress(self.parsers[1].getNbTranscripts(), "Reading regions", self.verbosity) + for cpt, transcript in enumerate(self.parsers[1].getIterator()): + self.coverage[cpt] = {} + self.overlap[cpt] = [] + for strand in strands: + self.coverage[cpt][strand] = {} + self.coverage[cpt][strand][0] = 0 + self.coverage[cpt][strand][transcript.getEnd() - transcript.getStart()] = 0 + for exon in transcript.getExons(): + self.sizes[cpt] += exon.getSize() + progress.inc() + progress.done() - def computeCoverage(self, transcript1, transcript2, id): - strand = transcript1.getDirection() * transcript2.getDirection() - for exon1 in transcript1.getExons(): - for exon2 in transcript2.getExons(): - if exon1.overlapWith(exon2): - for position in range(max(exon1.getStart(), exon2.getStart()), min(exon1.getEnd(), exon2.getEnd()) + 1): - relativePosition = position - transcript2.getStart() + 1 - self.coverage[id][strand][relativePosition] = self.coverage[id][strand].get(relativePosition, 0) + 1 + def initialize(self): + if self.sequenceParser == None: + self.initializeDataFromTranscripts() + else: + self.initializeDataFromSequences() - def computeOverlap(self, transcript1, transcript2, id): - simpleTranscript = SimpleTranscript(transcript1, transcript2) - self.overlap[id].append(simpleTranscript) - - def compute2TranscriptFiles(self): - progress = Progress(self.parsers[1].getNbTranscripts(), "Comparing regions", self.verbosity) - for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()): - for transcript1 in self.parsers[0].getIterator(): - if transcript1.overlapWithExon(transcript2): - self.computeCoverage(transcript1, transcript2, cpt2) - self.computeOverlap(transcript1, transcript2, cpt2) - progress.inc() - progress.done() + def computeCoverage(self, transcript1, transcript2, id): + strand = transcript1.getDirection() * transcript2.getDirection() + for exon1 in transcript1.getExons(): + for exon2 in transcript2.getExons(): + if exon1.overlapWith(exon2): + for position in range(max(exon1.getStart(), exon2.getStart()), min(exon1.getEnd(), exon2.getEnd()) + 1): + relativePosition = position - transcript2.getStart() + 1 + self.coverage[id][strand][relativePosition] = self.coverage[id][strand].get(relativePosition, 0) + 1 - def extractReferenceQueryMapping(self, mapping): - queryTranscript = mapping.getTranscript() - referenceTranscript = Transcript() - referenceTranscript.setChromosome(queryTranscript.getChromosome()) - referenceTranscript.setName(queryTranscript.getChromosome()) - referenceTranscript.setDirection("+") - referenceTranscript.setEnd(self.sizes[queryTranscript.getChromosome()]) - referenceTranscript.setStart(1) - return (referenceTranscript, queryTranscript) + def computeOverlap(self, transcript1, transcript2, id): + simpleTranscript = SimpleTranscript(transcript1, transcript2) + self.overlap[id].append(simpleTranscript) + + def compute2TranscriptFiles(self): + progress = Progress(self.parsers[1].getNbTranscripts(), "Comparing regions", self.verbosity) + for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()): + for transcript1 in self.parsers[0].getIterator(): + if transcript1.overlapWithExon(transcript2): + self.computeCoverage(transcript1, transcript2, cpt2) + self.computeOverlap(transcript1, transcript2, cpt2) + progress.inc() + progress.done() - def extractReferenceQuery(self, inputTranscript): - if "Target" not in inputTranscript.getTagNames(): - raise Exception("Cannot extract Target field in line '%s'." % (inputTranscript)) - id, start, end, strand = parseTargetField(inputTranscript.getTagValue("Target")) - if id not in self.sizes: - raise Exception("Target id '%s' of transcript '%s' does not correspond to anything in FASTA file." % (id, inputTranscript)) - referenceTranscript = Transcript() - referenceTranscript.setChromosome(id) - referenceTranscript.setName(id) - referenceTranscript.setDirection("+") - referenceTranscript.setEnd(self.sizes[id]) - referenceTranscript.setStart(1) - queryTranscript = Transcript() - queryTranscript.setChromosome(id) - queryTranscript.setName(id) - queryTranscript.setStart(start) - queryTranscript.setEnd(end) - queryTranscript.setDirection(strand) - if inputTranscript.getNbExons() > 1: - factor = float(end - start) / (inputTranscript.getEnd() - inputTranscript.getStart()) - for exon in inputTranscript.getExons(): - newExon = Interval() - newExon.setChromosome(id) - newExon.setDirection(strand) - if "Target" in inputTranscript.getTagNames(): - id, start, end, strand = parseTargetField(exon.getTagValue("Target")) - newExon.setStart(start) - newExon.setEnd(end) - else: - newExon.setStart(int(round((exon.getStart() - inputTranscript.getStart()) * factor)) + start) - newExon.setEnd( int(round((exon.getEnd() - inputTranscript.getStart()) * factor)) + start) - queryTranscript.addExon(newExon) - return (referenceTranscript, queryTranscript) + def extractReferenceQuery(self, inputTranscript): + if "Target" not in inputTranscript.getTagNames(): + raise Exception("Cannot extract Target field in line '%s'." % (inputTranscript)) + id, start, end, strand = parseTargetField(inputTranscript.getTagValue("Target")) + if id not in self.sizes: + raise Exception("Target id '%s' of transcript '%s' does not correspond to anything in FASTA file." % (id, inputTranscript)) + referenceTranscript = Transcript() + referenceTranscript.setChromosome(id) + referenceTranscript.setName(id) + referenceTranscript.setDirection("+") + referenceTranscript.setEnd(self.sizes[id]) + referenceTranscript.setStart(1) + queryTranscript = Transcript() + queryTranscript.setChromosome(id) + queryTranscript.setName(id) + queryTranscript.setStart(start) + queryTranscript.setEnd(end) + queryTranscript.setDirection(strand) + if inputTranscript.getNbExons() > 1: + factor = float(end - start) / (inputTranscript.getEnd() - inputTranscript.getStart()) + for exon in inputTranscript.getExons(): + newExon = Interval() + newExon.setChromosome(id) + newExon.setDirection(strand) + if "Target" in inputTranscript.getTagNames(): + id, start, end, strand = parseTargetField(exon.getTagValue("Target")) + newExon.setStart(start) + newExon.setEnd(end) + else: + newExon.setStart(int(round((exon.getStart() - inputTranscript.getStart()) * factor)) + start) + newExon.setEnd( int(round((exon.getEnd() - inputTranscript.getStart()) * factor)) + start) + queryTranscript.addExon(newExon) + return (referenceTranscript, queryTranscript) - def compute1TranscriptFiles(self): - progress = Progress(self.parsers[1].getNbItems(), "Comparing regions", self.verbosity) - for transcript in self.parsers[1].getIterator(): - if transcript.__class__.__name__ == "Mapping": - referenceTranscript, queryTranscript = self.extractReferenceQueryMapping(transcript) - else: - referenceTranscript, queryTranscript = self.extractReferenceQuery(transcript) - self.computeCoverage(queryTranscript, referenceTranscript, referenceTranscript.getName()) - self.computeOverlap(queryTranscript, referenceTranscript, referenceTranscript.getName()) - progress.inc() - progress.done() + def compute1TranscriptFiles(self): + progress = Progress(self.parsers[1].getNbTranscripts(), "Comparing regions", self.verbosity) + for transcript in self.parsers[1].getIterator(): + referenceTranscript, queryTranscript = self.extractReferenceQuery(transcript) + self.computeCoverage(queryTranscript, referenceTranscript, referenceTranscript.getName()) + self.computeOverlap(queryTranscript, referenceTranscript, referenceTranscript.getName()) + progress.inc() + progress.done() - def compute(self): - if self.sequenceParser == None: - self.compute2TranscriptFiles() - else: - self.compute1TranscriptFiles() + def compute(self): + if self.sequenceParser == None: + self.compute2TranscriptFiles() + else: + self.compute1TranscriptFiles() - def plotTranscript(self, index, transcript): - plotter = Plotter(self.seed, index, self.verbosity) - plotter.setOutputFileName(self.outputFileName) - plotter.setTranscript(transcript) - plotter.setTitle(self.title) - plotter.setLabels(self.xLabel, self.yLabel) - plotter.setPlotSize(self.width, self.height) - plotter.setCoverageData(self.coverage[index]) - plotter.setOverlapData(self.overlap[index]) - plotter.setMerge(self.merge) - plotter.plot() - output = plotter.log - return output - - def plot1TranscriptFile(self): - self.outputCoverage = {} - self.outputCoveragePerStrand = {} - output = "" - progress = Progress(len(self.sequenceParser.getRegions()), "Plotting regions", self.verbosity) - for cpt2, region in enumerate(self.sequenceParser.getRegions()): - transcript = Transcript() - transcript.setName(region) - transcript.setDirection("+") - transcript.setEnd(self.sizes[region]) - transcript.setStart(1) - output += self.plotTranscript(region, transcript) - progress.inc() - progress.done() - if self.verbosity > 0: - print output + def plotTranscript(self, index, transcript): + plotter = Plotter(self.seed, index, self.verbosity) + plotter.setOutputFileName(self.outputFileName) + plotter.setTranscript(transcript) + plotter.setTitle(self.title) + plotter.setLabels(self.xLabel, self.yLabel) + plotter.setPlotSize(self.width, self.height) + plotter.setCoverageData(self.coverage[index]) + plotter.setOverlapData(self.overlap[index]) + plotter.setMerge(self.merge) + plotter.plot() + output = plotter.log + return output + + def plot1TranscriptFile(self): + self.outputCoverage = {} + self.outputCoveragePerStrand = {} + output = "" + progress = Progress(len(self.sequenceParser.getRegions()), "Plotting regions", self.verbosity) + for cpt2, region in enumerate(self.sequenceParser.getRegions()): + transcript = Transcript() + transcript.setName(region) + transcript.setDirection("+") + transcript.setEnd(self.sizes[region]) + transcript.setStart(1) + output += self.plotTranscript(region, transcript) + progress.inc() + progress.done() + if self.verbosity > 0: + print output - def plot2TranscriptFiles(self): - self.outputCoverage = [0] * self.parsers[1].getNbTranscripts() - self.outputCoveragePerStrand = [None] * self.parsers[1].getNbTranscripts() - for cpt in range(self.parsers[1].getNbTranscripts()): - self.outputCoveragePerStrand[cpt] = dict([strand, 0] for strand in strands) - progress = Progress(self.parsers[1].getNbTranscripts(), "Plotting regions", self.verbosity) - output = "" - for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()): - output += self.plotTranscript(cpt2, transcript2) - progress.inc() - progress.done() - if self.verbosity > 0: - print output + def plot2TranscriptFiles(self): + self.outputCoverage = [0] * self.parsers[1].getNbTranscripts() + self.outputCoveragePerStrand = [None] * self.parsers[1].getNbTranscripts() + for cpt in range(self.parsers[1].getNbTranscripts()): + self.outputCoveragePerStrand[cpt] = dict([strand, 0] for strand in strands) + progress = Progress(self.parsers[1].getNbTranscripts(), "Plotting regions", self.verbosity) + output = "" + for cpt2, transcript2 in enumerate(self.parsers[1].getIterator()): + output += self.plotTranscript(cpt2, transcript2) + progress.inc() + progress.done() + if self.verbosity > 0: + print output - def plot(self): - if self.sequenceParser == None: - self.plot2TranscriptFiles() - else: - self.plot1TranscriptFile() + def plot(self): + if self.sequenceParser == None: + self.plot2TranscriptFiles() + else: + self.plot1TranscriptFile() - def start(self): - self.initialize() - self.compute() - self.plot() + def start(self): + self.initialize() + self.compute() + self.plot() if __name__ == "__main__": - - # parse command line - description = "Plot Coverage v1.0.1: Plot the coverage of the first data with respect to the second one. [Category: Visualization]" + + # parse command line + description = "Plot Coverage v1.0.1: Plot the coverage of the first data with respect to the second one. [Category: Visualization]" - parser = OptionParser(description = description) - parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript or mapping format given by -f]") - parser.add_option("-f", "--inputFormat1", dest="inputFormat1", action="store", type="string", help="format of input file 1 [compulsory] [format: transcript or mapping file format]") - parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]") - parser.add_option("-g", "--inputFormat2", dest="inputFormat2", action="store", type="string", help="format of input file 2 [compulsory] [format: transcript file format]") - parser.add_option("-q", "--sequence", dest="inputSequence", action="store", default=None, type="string", help="input sequence file [format: file in FASTA format] [default: None]") - parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in PNG format]") - parser.add_option("-w", "--width", dest="width", action="store", default=1500, type="int", help="width of the plots (in px) [format: int] [default: 1500]") - parser.add_option("-e", "--height", dest="height", action="store", default=1000, type="int", help="height of the plots (in px) [format: int] [default: 1000]") - parser.add_option("-t", "--title", dest="title", action="store", default="", type="string", help="title of the plots [format: string]") - parser.add_option("-x", "--xlab", dest="xLabel", action="store", default="", type="string", help="label on the x-axis [format: string]") - parser.add_option("-y", "--ylab", dest="yLabel", action="store", default="", type="string", help="label on the y-axis [format: string]") - parser.add_option("-p", "--plusColor", dest="plusColor", action="store", default="red", type="string", help="color for the elements on the plus strand [format: string] [default: red]") - parser.add_option("-m", "--minusColor", dest="minusColor", action="store", default="blue", type="string", help="color for the elements on the minus strand [format: string] [default: blue]") - parser.add_option("-s", "--sumColor", dest="sumColor", action="store", default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]") - parser.add_option("-l", "--lineColor", dest="lineColor", action="store", default="black", type="string", help="color for the lines [format: string] [default: black]") - parser.add_option("-1", "--merge", dest="merge", action="store_true", default=False, help="merge the 2 plots in 1 [format: boolean] [default: false]") - parser.add_option("-D", "--directory", dest="working_Dir", action="store", default=os.getcwd(), type="string", help="the directory to store the results [format: directory]") - parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") - (options, args) = parser.parse_args() + parser = OptionParser(description = description) + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--inputFormat1", dest="inputFormat1", action="store", type="string", help="format of input file 1 [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--inputFormat2", dest="inputFormat2", action="store", type="string", help="format of input file 2 [compulsory] [format: transcript file format]") + parser.add_option("-q", "--sequence", dest="inputSequence", action="store", default=None, type="string", help="input sequence file [format: file in FASTA format] [default: None]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in PNG format]") + parser.add_option("-w", "--width", dest="width", action="store", default=1500, type="int", help="width of the plots (in px) [format: int] [default: 1500]") + parser.add_option("-e", "--height", dest="height", action="store", default=1000, type="int", help="height of the plots (in px) [format: int] [default: 1000]") + parser.add_option("-t", "--title", dest="title", action="store", default="", type="string", help="title of the plots [format: string]") + parser.add_option("-x", "--xlab", dest="xLabel", action="store", default="", type="string", help="label on the x-axis [format: string]") + parser.add_option("-y", "--ylab", dest="yLabel", action="store", default="", type="string", help="label on the y-axis [format: string]") + parser.add_option("-p", "--plusColor", dest="plusColor", action="store", default="red", type="string", help="color for the elements on the plus strand [format: string] [default: red]") + parser.add_option("-m", "--minusColor", dest="minusColor", action="store", default="blue", type="string", help="color for the elements on the minus strand [format: string] [default: blue]") + parser.add_option("-s", "--sumColor", dest="sumColor", action="store", default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]") + parser.add_option("-l", "--lineColor", dest="lineColor", action="store", default="black", type="string", help="color for the lines [format: string] [default: black]") + parser.add_option("-1", "--merge", dest="merge", action="store_true", default=False, help="merge the 2 plots in 1 [format: boolean] [default: false]") + parser.add_option("-D", "--directory", dest="working_Dir", action="store", default=os.getcwd(), type="string", help="the directory to store the results [format: directory]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() - colors[1] = options.plusColor - colors[-1] = options.minusColor - colors[0] = options.sumColor - colorLine = options.lineColor + colors[1] = options.plusColor + colors[-1] = options.minusColor + colors[0] = options.sumColor + colorLine = options.lineColor - pp = PlotParser(options.verbosity) - pp.addInput(0, options.inputFileName1, options.inputFormat1) - pp.addInput(1, options.inputFileName2, options.inputFormat2) - pp.addSequence(options.inputSequence) - pp.setOutput(options.outputFileName if os.path.isabs(options.outputFileName) else os.path.join(options.working_Dir, options.outputFileName)) - pp.setPlotSize(options.width, options.height) - pp.setLabels(options.xLabel, options.yLabel) - pp.setTitle(options.title) - pp.setMerge(options.merge) - pp.start() + pp = PlotParser(options.verbosity) + pp.addInput(0, options.inputFileName1, options.inputFormat1) + pp.addInput(1, options.inputFileName2, options.inputFormat2) + pp.addSequence(options.inputSequence) + pp.setOutput(options.outputFileName if os.path.isabs(options.outputFileName) else os.path.join(options.working_Dirpath, options.outputFileName)) + pp.setPlotSize(options.width, options.height) + pp.setLabels(options.xLabel, options.yLabel) + pp.setTitle(options.title) + pp.setMerge(options.merge) + pp.start() +