s_mart: SMART/Java/Python/getSizes.py comparison

comparison SMART/Java/Python/getSizes.py @ 6:769e306b7933

Change the repository level.

author	yufei-luo
date	Fri, 18 Jan 2013 04:54:14 -0500
parents
children	94ab73e8a190

comparison

equal deleted inserted replaced

-:ea3082881bf8
+:769e306b7933
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os, sys
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.parsing.FastqParser import FastqParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.parsing.GffParser import GffParser
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc import Utils
+from commons.core.LoggerFactory import LoggerFactory
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+LOG_DEPTH = "smart"
+class GetSizes(object):
+def __init__(self, inFileName = None, inFormat=None, outFileName = None, query=None,xMax=None, xMin=None, csv=False, verbosity = 0):
+self.inFileName = inFileName
+self.inFormat= inFormat
+self.outFileName = outFileName
+self.query = query
+self.xMax = xMax
+self.xMin = xMin
+self.xLab = "Size"
+self.yLab = "# reads"
+self.barplot = False
+self.csv = csv
+self._verbosity = verbosity
+self.parser = None
+self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
+def setAttributesFromCmdLine(self):
+description = "Usage: getSizes.py [options]\n\nGet Sizes v1.0.2: Get the sizes of a set of genomic coordinates. [Category: Visualization]\n"
+epilog = ""
+parser = RepetOptionParser(description = description, epilog = epilog)
+parser.add_option("-i", "--input",     dest="inputFileName",  action="store",      default=None,      type="string", help="input file [compulsory] [format: file in transcript or sequence format given by -f]")
+parser.add_option("-f", "--format",    dest="format",         action="store",      default=None,      type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")
+parser.add_option("-q", "--query",     dest="query",          action="store",      default=None,      type="string", help="type to mesure [default: size] [format: choice (size, intron size, exon size, 1st exon size)]")
+parser.add_option("-o", "--output",    dest="outputFileName", action="store",      default=None,      type="string", help="output file [format: output file in PNG format]")
+parser.add_option("-x", "--xMax",      dest="xMax",           action="store",      default=None,      type="int",    help="maximum value on the x-axis to plot [format: int]")
+parser.add_option("-X", "--xMin",      dest="xMin",           action="store",      default=None,      type="int",    help="minimum value on the x-axis to plot [format: int]")
+parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,         type="int",    help="trace level [format: int]")
+parser.add_option("-c", "--csv",       dest="csv",            action="store",                         type="string", help="write a .csv file [format: bool] [default: false]")
+parser.add_option("-a", "--xLabel",    dest="xLab",           action="store",      default="Size",    type="string", help="x absis label name [format: string] [default: Size]")
+parser.add_option("-b", "--yLabel",    dest="yLab",           action="store",      default="# reads", type="string", help="y absis label name [format: string] [default: Reads]")
+parser.add_option("-B", "--barplot",   dest="barplot",        action="store_true", default=False,                    help="use barplot representation [format: bool] [default: false]")
+options = parser.parse_args()[0]
+self._setAttributesFromOptions(options)
+def _setAttributesFromOptions(self, options):
+self.setInFileName(options.inputFileName)
+self.setInFormat(options.format)
+self.setQuery(options.query)
+self.setOutFileName(options.outputFileName)
+self.setXMax(options.xMax)
+self.setXMin(options.xMin)
+self.setxLab(options.xLab)
+self.setyLab(options.yLab)
+self.setBarplot(options.barplot)
+self.setVerbosity(options.verbosity)
+def setInFileName(self, inputFileName):
+self.inFileName = inputFileName
+def setInFormat(self, inFormat):
+self.inFormat = inFormat
+def setQuery(self, query):
+self.query = query
+def setOutFileName(self, outFileName):
+self.outFileName = outFileName
+def setXMax(self, xMax):
+self.xMax = xMax
+def setXMin(self, xMin):
+self.xMin = xMin
+def setxLab(self, xLab):
+self.xLab = xLab
+def setyLab(self, yLab):
+self.yLab = yLab
+def setBarplot(self, barplot):
+self.barplot = barplot
+def setCsv(self, csv):
+self.csv = csv
+def setVerbosity(self, verbosity):
+self._verbosity = verbosity
+def _checkOptions(self):
+if self.inFileName == None:
+self._logAndRaise("ERROR: Missing input file name")
+if self.inFormat == "fasta":
+self.parser = FastaParser(self.inFileName, self._verbosity)
+elif self.inFormat == "fastq":
+self.parser = FastqParser(self.inFileName, self._verbosity)
+else:
+self.parser = TranscriptContainer(self.inFileName, self.inFormat, self._verbosity)
+def _logAndRaise(self, errorMsg):
+self._log.error(errorMsg)
+raise Exception(errorMsg)
+def run(self):
+LoggerFactory.setLevel(self._log, self._verbosity)
+self._checkOptions()
+self._log.info("START getsizes")
+self._log.debug("Input file name: %s" % self.inFileName)
+nbItems = self.parser.getNbItems()
+self._log.info( "%i items found" % (nbItems))
+# treat items
+progress   = Progress(nbItems, "Analyzing sequences of %s" % (self.inFileName), self._verbosity)
+sizes      = {}
+names      = {}
+minimum    = 1000000000000
+maximum    = 0
+sum        = 0
+number     = 0
+nbSubItems = 0
+for item in self.parser.getIterator():
+items = []
+if self.query == "exon":
+items = item.getExons()
+elif self.query == "exon1":
+if len(item.getExons()) > 1:
+item.sortExons()
+items = [item.getExons()[0]]
+elif self.query == "intron":
+items = item.getIntrons()
+else:
+items = [item, ]
+for thisItem in items:
+try:
+nbElements = int(float(thisItem.getTagValue("nbElements")))
+if nbElements == None:
+nbElements = 1
+except:
+nbElements = 1
+size    = thisItem.getSize()
+minimum = min(minimum, size)
+maximum = max(maximum, size)
+name    = thisItem.name.split()[0]
+if size not in sizes:
+sizes[size] = nbElements
+if self.csv:
+names[size] = [name, ]
+else:
+sizes[size] += nbElements
+if self.csv:
+names[size].append(name)
+sum        += size
+nbSubItems += nbElements
+number += 1
+progress.inc()
+progress.done()
+if self.outFileName != None:
+plotter = RPlotter(self.outFileName, self._verbosity)
+plotter.setFill(0)
+plotter.setMinimumX(self.xMin)
+plotter.setMaximumX(self.xMax)
+plotter.setXLabel(self.xLab)
+plotter.setYLabel(self.yLab)
+plotter.setBarplot(self.barplot)
+plotter.addLine(sizes)
+plotter.plot()
+if nbSubItems == 0:
+self._logAndRaise("No item found")
+if self.csv:
+csvHandle = open(self.csv, "w")
+for size in range(min(sizes.keys()), max(sizes.keys())+1):
+if size not in sizes:
+csvHandle.write("%d,0,\n" % (size))
+else:
+csvHandle.write("%d,%d,%s\n" % (size, sizes[size], ";".join(names[size])))
+csvHandle.close()
+self.items = number
+self.subItems = nbSubItems
+self.nucleotides = sum
+self.minAvgMedMax = Utils.getMinAvgMedMax(sizes)
+print "%d items" % (number)
+print "%d sub-items" % (nbSubItems)
+print "%d nucleotides" % (sum)
+print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(sizes)
+self._log.info("END getsizes")
+if __name__ == "__main__":
+iGetSizes = GetSizes()
+iGetSizes.setAttributesFromCmdLine()
+iGetSizes.run()
+#TODO: add two more options!!!!!!

Mercurial > repos > yufei-luo > s_mart

comparison SMART/Java/Python/getSizes.py @ 6:769e306b7933