diff smart_toolShed/SMART/Java/Python/plot.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/SMART/Java/Python/plot.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,223 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+"""
+Plot the data from the data files
+"""
+
+import os, re, math
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.utils.FileUtils import FileUtils
+
+class Plot(object):
+
+    def __init__(self, verbosity):
+        self.verbosity = verbosity
+        self.keep      = False
+
+    def keepTmpFiles(self, boolean):
+        self.keep = boolean
+
+    def setShape(self, shape):
+        self.shape = shape
+
+    def setInputFileName(self, fileName, format):
+        self.parser = TranscriptContainer(fileName, format, self.verbosity)
+
+    def setXData(self, tag, default):
+        self.x        = tag
+        self.xDefault = default
+
+    def setYData(self, tag, default):
+        self.y        = tag
+        self.yDefault = default
+
+    def setZData(self, tag, default):
+        self.z        = tag
+        self.zDefault = default
+
+    def setNbBars(self, nbBars):
+        self.nbBars = nbBars
+
+    def setOutputFileName(self, fileName):
+        self.outputFileName = fileName
+
+    def setRegression(self, regression):
+        self.regression = regression
+
+    def setLog(self, log):
+        self.log = log
+
+    def createPlotter(self):
+        self.plotter = RPlotter(self.outputFileName, self.verbosity, self.keep)
+        if self.shape == "barplot":
+            self.plotter.setBarplot(True)
+        elif self.shape == "line":
+            pass
+        elif self.shape == "points":
+            self.plotter.setPoints(True)
+        elif self.shape == "heatPoints":
+            self.plotter.setHeatPoints(True)
+        else:
+            raise Exception("Do not understand shape '%s'\n" % (self.shape))
+            
+        self.plotter.setLog(self.log)
+        self.plotter.setRegression(self.regression)
+
+    def getValues(self, transcript):
+        x = transcript.getTagValue(self.x)
+        y = None
+        z = None
+        if self.y != None:
+            y = transcript.getTagValue(self.y)
+        if self.z != None:
+            z = transcript.getTagValue(self.z)
+        if x == None:
+            if self.xDefault != None:
+                x = self.xDefault
+            else:
+                raise Exception("Error! Transcript %s do not have the x-tag %s\n" % (transcript, self.x))
+        if self.y != None:
+            if y == None:
+                if self.yDefault != None:
+                    y = self.yDefault
+                else:
+                    raise Exception("Error! Transcript %s do not have the y-tag %s\n" % (transcript, self.y))
+        if self.z != None:
+            if z == None:
+                if self.zDefault != None:
+                    z = self.zDefault
+                else:
+                    raise Exception("Error! Transcript %s do not have the z-tag %s\n" % (transcript, self.z))
+        x = float(x)
+        if self.y != None:
+            y = float(y)
+        if self.z != None:
+            z = float(z)
+        return (x, y, z)
+
+    def correctPointsToBarplot(self, line):
+        minValue = int(math.floor(min(line.keys())))
+        maxValue = int(math.ceil(max(line.keys())))
+        step     = (maxValue - minValue) / self.nbBars
+        values   = dict([i * step + minValue, 0] for i in range(0, self.nbBars))
+        top      = (self.nbBars - 1) * step + minValue
+        for key, value in line.iteritems():
+            newKey = min(top, int(math.floor((key - minValue) / float(maxValue - minValue) * self.nbBars)) * step + minValue)
+            values[newKey] += value
+        return values
+
+    def parseFile(self):
+        line     = {}
+        heatLine = {}
+
+        cpt = 1
+        for transcript in self.parser.getIterator():
+            x, y, z = self.getValues(transcript)
+            name = transcript.name
+            if name == "unnamed transcript":
+                name = "transcript %d" % (cpt)
+                cpt += 1
+            if self.shape in ("points", "heatPoints"):
+                line[name] = (x, y)
+            if self.shape == "heatPoints":
+                heatLine[name] = z
+            if self.shape == "line":
+                line[x] = y
+            if self.shape == "barplot":
+                line[x] = line.get(x, 0) + 1
+        if self.shape == "barplot":
+            line = self.correctPointsToBarplot(line)
+        self.plotter.setXLabel(self.x)
+        if self.y != None:
+            self.plotter.setYLabel(self.y)
+        else:
+            self.plotter.setYLabel("Count")
+        self.plotter.addLine(line)
+        if self.shape == "heatPoints":
+            self.plotter.addHeatLine(heatLine)
+        self.plotter.plot()
+
+    def close(self):
+        if self.regression:
+            print self.plotter.getCorrelationData()
+        if self.shape == "points":
+            rho = self.plotter.getSpearmanRho()
+            if rho == None:
+                print "Cannot compute Spearman rho."
+            else:
+                print "Spearman rho: %f" % (rho)    
+
+    def run(self):
+        self.createPlotter()
+        self.parseFile() 
+        self.close()
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Plot v1.0.2: Plot some information from a list of transcripts. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",                      type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",     dest="format",         action="store",                      type="string", help="format of the input [compulsory] [format: transcript file format]")
+    parser.add_option("-x", "--x",          dest="x",              action="store",                      type="string", help="tag for the x value [format: string]")
+    parser.add_option("-y", "--y",          dest="y",              action="store",                      type="string", help="tag for the y value [format: string]")
+    parser.add_option("-z", "--z",          dest="z",              action="store",      default=None,   type="string", help="tag for the z value [format: string]")
+    parser.add_option("-X", "--xDefault",   dest="xDefault",       action="store",      default=None,   type="float",  help="value for x when tag is not present [format: float]")
+    parser.add_option("-Y", "--yDefault",   dest="yDefault",       action="store",      default=None,   type="float",  help="value for y when tag is not present [format: float]")
+    parser.add_option("-Z", "--zDefault",   dest="zDefault",       action="store",      default=None,   type="float",  help="value for z when tag is not present [format: float]")
+    parser.add_option("-o", "--output",     dest="outputFileName", action="store",                      type="string", help="output file names [format: output file in PNG format]")
+    parser.add_option("-s", "--shape",      dest="shape",          action="store", default="barplot",   type="string", help="shape of the plot [format: choice (barplot, line, points, heatPoints)]")
+    parser.add_option("-n", "--nbBars",     dest="nbBars",         action="store",      default=2,                type="int",    help="number of bars in barplot [format: int]")
+    parser.add_option("-k", "--keep",       dest="keep",           action="store_true", default=False,                 help="keep temporary files [format: bool]")
+    parser.add_option("-r", "--regression", dest="regression",     action="store_true", default=False,                 help="plot regression line (in 'points' format) [format: bool]")
+    parser.add_option("-l", "--log",        dest="log",            action="store",      default="y",     type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string]")
+    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store",      default=1,      type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    plot = Plot(options.verbosity)
+    plot.setInputFileName(options.inputFileName, options.format)
+    plot.setOutputFileName(options.outputFileName)
+    plot.setXData(options.x, options.xDefault)
+    plot.setYData(options.y, options.yDefault)
+    plot.setZData(options.z, options.zDefault)
+    plot.setShape(options.shape)
+    plot.setNbBars(options.nbBars)
+    plot.setRegression(options.regression)
+    plot.setLog(options.log)
+    plot.keepTmpFiles(options.keep)
+    plot.run()
+