Mercurial > repos > yufei-luo > s_mart
diff SMART/Java/Python/restrictSequenceList.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/Java/Python/restrictSequenceList.py Thu May 02 09:56:47 2013 -0400 @@ -0,0 +1,113 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +"""Restrict a sequence list with some names""" + +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.WriterChooser import WriterChooser +from SMART.Java.Python.misc.Progress import Progress +from SMART.Java.Python.misc import Utils + +class RestrictSequenceList(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + self.exclude = False + + def setInputFileName(self, fileName, format): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + self.parser = chooser.getParser(fileName) + + def setExclusion(self, boolean): + self.exclude = boolean + + def setOutputFileName(self, fileName, format): + chooser = WriterChooser(self.verbosity) + chooser.findFormat(format) + self.writer = chooser.getWriter(fileName) + + def setNamesFileName(self, fileName): + self.namesFileName = fileName + + def _readNames(self): + self.names = [] + handle = open(self.namesFileName) + for name in handle: + self.names.append(name.strip()) + handle.close() + + def _write(self): + nbElements = self.parser.getNbItems() + progress = Progress(nbElements, "Parsing input file", self.verbosity) + nbRead = 0 + nbWritten = 0 + for element in self.parser.getIterator(): + name = element.getName() + nbRead += 1 + if Utils.xor(name in self.names, self.exclude): + self.writer.addElement(element) + nbWritten += 1 + if name in self.names: + self.names.remove(name) + progress.inc() + progress.done() + if self.verbosity > 0: + print "%d read" % (nbRead) + print "%d written (%d%%)" % (nbWritten, 0 if nbRead == 0 else round(float(nbWritten) / nbRead * 100)) + + def run(self): + self._readNames() + self._write() + if self.names: + print "Some names are not present in the file: %s" % ", ".join(self.names) + + + +if __name__ == "__main__": + + description = "Restrict Sequence List v1.0.1: Keep the elements of a list of sequences whose name is mentionned in a given file. [Category: Data Selection]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input", dest="inputFile", action="store", type="string", help="input file [compulsory] [format: file in sequence format given by -f]") + parser.add_option("-f", "--format", dest="format", action="store", default="fasta", type="string", help="format of the input and output files [compulsory] [format: sequence file format] [default: fasta]") + parser.add_option("-n", "--name", dest="names", action="store", type="string", help="names of the transcripts [compulsory] [format: file in TXT format]") + parser.add_option("-o", "--output", dest="outputFile", action="store", type="string", help="output file [format: output file in sequence format given by -f]") + parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="output all those whose name is NOT on the list [format: boolean]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + rsl = RestrictSequenceList(options.verbosity) + rsl.setInputFileName(options.inputFile, options.format) + rsl.setOutputFileName(options.outputFile, options.format) + rsl.setNamesFileName(options.names) + rsl.setExclusion(options.exclude) + rsl.run()