Mercurial > repos > yufei-luo > s_mart
comparison smart_toolShed/SMART/Java/Python/plotTranscriptList.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
| author | yufei-luo |
|---|---|
| date | Thu, 17 Jan 2013 10:52:14 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e0f8dcca02ed |
|---|---|
| 1 #! /usr/bin/env python | |
| 2 # | |
| 3 # Copyright INRA-URGI 2009-2010 | |
| 4 # | |
| 5 # This software is governed by the CeCILL license under French law and | |
| 6 # abiding by the rules of distribution of free software. You can use, | |
| 7 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 9 # "http://www.cecill.info". | |
| 10 # | |
| 11 # As a counterpart to the access to the source code and rights to copy, | |
| 12 # modify and redistribute granted by the license, users are provided only | |
| 13 # with a limited warranty and the software's author, the holder of the | |
| 14 # economic rights, and the successive licensors have only limited | |
| 15 # liability. | |
| 16 # | |
| 17 # In this respect, the user's attention is drawn to the risks associated | |
| 18 # with loading, using, modifying and/or developing or reproducing the | |
| 19 # software by the user in light of its specific status of free software, | |
| 20 # that may mean that it is complicated to manipulate, and that also | |
| 21 # therefore means that it is reserved for developers and experienced | |
| 22 # professionals having in-depth computer knowledge. Users are therefore | |
| 23 # encouraged to load and test the software's suitability as regards their | |
| 24 # requirements in conditions enabling the security of their systems and/or | |
| 25 # data to be ensured and, more generally, to use and operate it in the | |
| 26 # same conditions as regards security. | |
| 27 # | |
| 28 # The fact that you are presently reading this means that you have had | |
| 29 # knowledge of the CeCILL license and that you accept its terms. | |
| 30 # | |
| 31 """ | |
| 32 Plot the data from the data files | |
| 33 """ | |
| 34 import sys | |
| 35 import math | |
| 36 from optparse import OptionParser | |
| 37 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer | |
| 38 from SMART.Java.Python.misc.RPlotter import RPlotter | |
| 39 | |
| 40 | |
| 41 class PlotTranscriptList(object): | |
| 42 | |
| 43 def __init__(self, verbosity = 0): | |
| 44 self.inputFileName = None | |
| 45 self.format = None | |
| 46 self.x = None | |
| 47 self.y = None | |
| 48 self.z = None | |
| 49 self.xDefault = None | |
| 50 self.yDefault = None | |
| 51 self.zDefault = None | |
| 52 self.xLabel = None | |
| 53 self.yLabel = None | |
| 54 self.shape = None | |
| 55 self.bucket = None | |
| 56 self.keep = None | |
| 57 self.log = None | |
| 58 self.verbosity = None | |
| 59 | |
| 60 | |
| 61 def setPlotter(self, outputFileName, keep, log, xLabel, yLabel): | |
| 62 self.plotter = RPlotter(outputFileName, self.verbosity, keep) | |
| 63 if self.shape != "barplot": | |
| 64 self.plotter.setLog(log) | |
| 65 self.plotter.setXLabel(xLabel) | |
| 66 self.plotter.setYLabel(yLabel) | |
| 67 | |
| 68 | |
| 69 def setShape(self, shape): | |
| 70 if self.shape == "line": | |
| 71 pass | |
| 72 elif shape == "barplot": | |
| 73 self.plotter.setBarplot(True) | |
| 74 elif shape == "points": | |
| 75 self.plotter.setPoints(True) | |
| 76 elif shape == "heatPoints": | |
| 77 self.plotter.setHeatPoints(True) | |
| 78 else: | |
| 79 sys.exit("Do not understand shape '%s'" % (shape)) | |
| 80 | |
| 81 | |
| 82 def setInput(self, inputFileName, format): | |
| 83 self.parser = TranscriptContainer(inputFileName, format, self.verbosity) | |
| 84 | |
| 85 | |
| 86 def getValues(self, transcript): | |
| 87 x, y, z = None, None, None | |
| 88 x = transcript.getTagValue(self.x) | |
| 89 if self.y != None: | |
| 90 y = transcript.getTagValue(self.y) | |
| 91 if self.z != None: | |
| 92 z = transcript.getTagValue(self.z) | |
| 93 if x == None: | |
| 94 if self.xDefault != None: | |
| 95 x = self.xDefault | |
| 96 else: | |
| 97 sys.exit("Error! Transcript %s do not have the x-tag %s" % (transcript, self.x)) | |
| 98 if y == None and self.shape != "line" and self.shape != "barplot": | |
| 99 if self.yDefault != None: | |
| 100 y = self.yDefault | |
| 101 else: | |
| 102 sys.exit("Error! Transcript %s do not have the y-tag %s" % (transcript, self.y)) | |
| 103 if self.z != None: | |
| 104 if z == None: | |
| 105 if self.zDefault != None: | |
| 106 z = self.zDefault | |
| 107 else: | |
| 108 sys.exit("Error! Transcript %s do not have the z-tag %s" % (transcript, self.z)) | |
| 109 x = float(x) | |
| 110 if self.y != None: | |
| 111 y = float(y) | |
| 112 if self.z != None: | |
| 113 z = float(z) | |
| 114 return (x, y, z) | |
| 115 | |
| 116 | |
| 117 def readFile(self): | |
| 118 cpt = 1 | |
| 119 line = {} | |
| 120 heatLine = {} | |
| 121 for transcript in self.parser.getIterator(): | |
| 122 x, y, z = self.getValues(transcript) | |
| 123 | |
| 124 name = transcript.name | |
| 125 if name == "unnamed transcript": | |
| 126 name = "transcript %d" % (cpt) | |
| 127 cpt += 1 | |
| 128 if self.shape == "points": | |
| 129 line[name] = (x, y) | |
| 130 elif self.shape == "heatPoints": | |
| 131 line[name] = (x, y) | |
| 132 heatLine[name] = z | |
| 133 elif self.shape == "line" or self.shape == "barplot": | |
| 134 if x not in line: | |
| 135 line[x] = 1 | |
| 136 else: | |
| 137 line[x] += 1 | |
| 138 else: | |
| 139 sys.exit("Do not understand shape '%s'" % (self.shape)) | |
| 140 return line, heatLine | |
| 141 | |
| 142 | |
| 143 def putLineInBuckets(self, line): | |
| 144 tmpLine = line | |
| 145 line = {} | |
| 146 for key, value in tmpLine.iteritems(): | |
| 147 line[int(key / float(self.bucket)) * self.bucket] = value | |
| 148 return line | |
| 149 | |
| 150 | |
| 151 def clusterInBarplot(self, line): | |
| 152 nbZeros = 0 | |
| 153 minValue = min(line.keys()) | |
| 154 maxValue = max(line.keys()) | |
| 155 if self.log != "": | |
| 156 if minValue == 0: | |
| 157 minValue = 1000000000 | |
| 158 for value in line: | |
| 159 if value < minValue: | |
| 160 if value == 0: | |
| 161 nbZeros += 1 | |
| 162 else: | |
| 163 minValue = value | |
| 164 minValue = math.log(minValue) | |
| 165 maxValue = math.log(maxValue) | |
| 166 bucketSize = (maxValue - minValue) / self.bucket | |
| 167 tmpLine = line | |
| 168 line = {} | |
| 169 for i in range(int(self.bucket) + 1): | |
| 170 line[i * bucketSize + minValue] = 0 | |
| 171 for key, value in tmpLine.iteritems(): | |
| 172 if self.log != "" and key != 0: | |
| 173 key = math.log(key) | |
| 174 bucketKey = int((key - minValue) / bucketSize) * bucketSize + minValue | |
| 175 if self.log == "" or key != 0: | |
| 176 line[bucketKey] += value | |
| 177 # if self.log != "": | |
| 178 # tmpLine = line | |
| 179 # line = {} | |
| 180 # for key, value in tmpLine.iteritems(): | |
| 181 # line[math.exp(key)] = value | |
| 182 print "%d zeros have been removed" % (nbZeros) | |
| 183 return line | |
| 184 | |
| 185 | |
| 186 def getSpearmanRho(self): | |
| 187 rho = self.plotter.getSpearmanRho() | |
| 188 if rho == None: | |
| 189 print "Cannot compute Spearman rho." | |
| 190 else: | |
| 191 print "Spearman rho: %f" % (rho) | |
| 192 | |
| 193 | |
| 194 def run(self): | |
| 195 line, heatLine = self.readFile() | |
| 196 | |
| 197 if self.shape == "line" and self.bucket != None: | |
| 198 line = self.putLineInBuckets(line) | |
| 199 if self.shape == "barplot": | |
| 200 line = self.clusterInBarplot(line) | |
| 201 | |
| 202 if self.shape == "points" or self.shape == "barplot" or self.shape == "line": | |
| 203 self.plotter.addLine(line) | |
| 204 elif self.shape == "heatPoints": | |
| 205 self.plotter.addLine(line) | |
| 206 self.plotter.addHeatLine(heatLine) | |
| 207 else: | |
| 208 sys.exit("Do not understand shape '%s'" % (self.shape)) | |
| 209 | |
| 210 self.plotter.plot() | |
| 211 | |
| 212 if self.shape == "points" or self.shape == "heatPoints": | |
| 213 self.getSpearmanRho() | |
| 214 | |
| 215 | |
| 216 | |
| 217 if __name__ == "__main__": | |
| 218 | |
| 219 # parse command line | |
| 220 description = "Plot v1.0.2: Plot some information from a list of transcripts. [Category: Visualization]" | |
| 221 | |
| 222 parser = OptionParser(description = description) | |
| 223 parser.add_option("-i", "--input",dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]") | |
| 224 parser.add_option("-f", "--format",dest="format", action="store",type="string", help="format of the input [compulsory] [format: transcript file format]") | |
| 225 parser.add_option("-x", "--x",dest="x",action="store", type="string", help="tag for the x value [format: string]") | |
| 226 parser.add_option("-y", "--y",dest="y",action="store", type="string", help="tag for the y value [format: string]") | |
| 227 parser.add_option("-z", "--z",dest="z", action="store", default=None,type="string", help="tag for the z value [format: string]") | |
| 228 parser.add_option("-X", "--xDefault",dest="xDefault",action="store", default=None,type="float",help="value for x when tag is not present [format: float]") | |
| 229 parser.add_option("-Y", "--yDefault",dest="yDefault",action="store",default=None,type="float",help="value for y when tag is not present [format: float]") | |
| 230 parser.add_option("-Z", "--zDefault",dest="zDefault", action="store",default=None,type="float",help="value for z when tag is not present [format: float]") | |
| 231 parser.add_option("-n", "--xLabel",dest="xLabel",action="store",default="",type="string", help="label on the x-axis [format: string] [default: ]") | |
| 232 parser.add_option("-m", "--yLabel",dest="yLabel",action="store",default="", type="string", help="label on the y-axis [format: string] [default: ]") | |
| 233 parser.add_option("-o", "--output",dest="outputFileName",action="store",type="string", help="output file names [format: output file in PNG format]") | |
| 234 parser.add_option("-s", "--shape",dest="shape",action="store", type="string", help="shape of the plot [format: choice (barplot, line, points, heatPoints)]") | |
| 235 parser.add_option("-b", "--bucket",dest="bucket",action="store",default=None,type="float",help="bucket size (for the line plot) [format: int] [default: 1]") | |
| 236 parser.add_option("-k", "--keep",dest="keep",action="store_true", default=False, help="keep temporary files [format: bool]") | |
| 237 parser.add_option("-l", "--log",dest="log",action="store",default="",type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string] [default: ]") | |
| 238 parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1, type="int",help="trace level [format: int]") | |
| 239 (options, args) = parser.parse_args() | |
| 240 | |
| 241 plotTranscriptList = PlotTranscriptList(options.verbosity) | |
| 242 plotTranscriptList.x = options.x | |
| 243 plotTranscriptList.y = options.y | |
| 244 plotTranscriptList.z = options.z | |
| 245 plotTranscriptList.xDefault = options.xDefault | |
| 246 plotTranscriptList.yDefault = options.yDefault | |
| 247 plotTranscriptList.zDefault = options.zDefault | |
| 248 plotTranscriptList.shape = options.shape | |
| 249 plotTranscriptList.bucket = options.bucket | |
| 250 plotTranscriptList.log = options.log | |
| 251 plotTranscriptList.setPlotter(options.outputFileName, options.keep, options.log, options.xLabel, options.yLabel) | |
| 252 plotTranscriptList.setShape(options.shape) | |
| 253 plotTranscriptList.setInput(options.inputFileName, options.format) | |
| 254 plotTranscriptList.run() | |
| 255 |
