Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/LaunchMap.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/LaunchMap.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,186 @@ +#!/usr/bin/env python + +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +from commons.core.LoggerFactory import LoggerFactory +from commons.core.utils.RepetOptionParser import RepetOptionParser +from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB +from commons.core.seq.FastaUtils import FastaUtils +from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders +from commons.core.utils.FileUtils import FileUtils +import os +import subprocess + +LOG_DEPTH = "repet.tools" + +##Reference launcher implementation +# +class LaunchMap(object): + + def __init__(self, fastaFileName = "", outFileName = "", gapSize = 50, mismatchPenalty = -8, gapOpenPenalty = 16, gapExtendPenalty = 4, doClean = False, verbosity = 0): + self._fastaFileName = fastaFileName + self.setOutFileName(outFileName) + self._gapSize = gapSize + self._mismatchPenalty = mismatchPenalty + self._gapOpenPenalty = gapOpenPenalty + self._gapExtendPenalty = gapExtendPenalty + self._doClean = doClean + self._verbosity = verbosity + self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) + + def setAttributesFromCmdLine(self): +# description = "Launch template to create a launcher." +# epilog = "\nExample 1: launch without verbosity and keep temporary files.\n" +# epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0" +# epilog += "\n\t" +# epilog += "\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\n" +# epilog += "\t$ python LaunchTemplate.py -i file.fa -c -v 2" +# parser = RepetOptionParser(description = description, epilog = epilog) + parser = RepetOptionParser(description = "", epilog = "") + parser.add_option("-i", "--fasta", dest = "fastaFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") + parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.fa_aln]", default = "") + parser.add_option("-s", "--gapSize", dest = "gapSize", action = "store", type = "int", help = "size above which a gap is not penalized anymore [optional] [default: 50]", default = 50) + parser.add_option("-m", "--mismatch", dest = "mismatch", action = "store", type = "int", help = "penalty for a mismatch [optional] [default: -8]", default = -8) + parser.add_option("-O", "--gapOpen", dest = "gapOpen", action = "store", type = "int", help = "penalty for a gap opening [optional] [default: 16]", default = 16) + parser.add_option("-e", "--gapExtend", dest = "gapExtend", action = "store", type = "int", help = "penalty for a gap extension [optional] [default: 4]", default = 4) + parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False) + parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) + options = parser.parse_args()[0] + self._setAttributesFromOptions(options) + + def _setAttributesFromOptions(self, options): + self.setFastaFileName(options.fastaFileName) + self.setOutFileName(options.outFileName) + self.setGapSize(options.gapSize) + self.setMismatchPenalty(options.mismatch) + self.setGapOpenPenalty(options.gapOpen) + self.setGapExtendPenalty(options.gapExtend) + self.setDoClean(options.doClean) + self.setVerbosity(options.verbosity) + + def setFastaFileName(self, fastaFileName): + self._fastaFileName = fastaFileName + + def setOutFileName(self, outFileName): + if outFileName == "": + self._outFileName = "%s.fa_aln" % self._fastaFileName + else: + self._outFileName = outFileName + + def setGapSize(self, gapSize): + self._gapSize = gapSize + + def setMismatchPenalty(self, mismatchPenalty): + self._mismatchPenalty = mismatchPenalty + + def setGapOpenPenalty(self, gapOpenPenalty): + self._gapOpenPenalty = gapOpenPenalty + + def setGapExtendPenalty(self, gapExtendPenalty): + self._gapExtendPenalty = gapExtendPenalty + + def setDoClean(self, doClean): + self._doClean = doClean + + def setVerbosity(self, verbosity): + self._verbosity = verbosity + + def _checkOptions(self): + if self._fastaFileName == "": + self._logAndRaise("ERROR: Missing input fasta file name") + if not FileUtils.isRessourceExists(self._fastaFileName): + self._logAndRaise("ERROR: Input fasta file name %s doesn't exist." % self._fastaFileName) + + def _logAndRaise(self, errorMsg): + self._log.error(errorMsg) + raise Exception(errorMsg) + + def getMapCmd(self): + cmd = "rpt_map" + cmd += " %s.shortH" % self._fastaFileName + cmd += " %i" % self._gapSize + cmd += " %i" % self._mismatchPenalty + cmd += " %i" % self._gapOpenPenalty + cmd += " %i" % self._gapExtendPenalty + cmd += " > %s.shortH.fa_aln" % self._fastaFileName + return cmd + + def run(self): + LoggerFactory.setLevel(self._log, self._verbosity) + self._checkOptions() + self._log.info("START LaunchMap") + self._log.debug("Fasta file name: %s" % self._fastaFileName) + + lInitHeaders = FastaUtils.dbHeaders(self._fastaFileName, self._verbosity - 1) + + csh = ChangeSequenceHeaders() + csh.setInputFile(self._fastaFileName) + csh.setFormat("fasta") + csh.setStep(1) + csh.setPrefix("seq") + csh.setLinkFile("%s.shortHlink" % self._fastaFileName) + csh.setOutputFile("%s.shortH" % self._fastaFileName) + csh.setVerbosityLevel(self._verbosity - 1) + csh.run() + + cmd = self.getMapCmd() + process = subprocess.Popen(cmd, shell = True) + self._log.debug("Running : %s" % cmd) + process.communicate() + if process.returncode != 0: + self._logAndRaise("ERROR when launching '%s'" % cmd) + + csh.setInputFile("%s.shortH.fa_aln" % self._fastaFileName) + csh.setFormat("fasta") + csh.setStep(2) + csh.setLinkFile("%s.shortHlink" % self._fastaFileName) + csh.setOutputFile("%s.shortH.fa_aln.initH" % self._fastaFileName) + csh.setVerbosityLevel(self._verbosity - 1) + csh.run() + + absDB = AlignedBioseqDB("%s.shortH.fa_aln.initH" % self._fastaFileName) + outFileHandler = open(self._outFileName, "w") + for header in lInitHeaders: + bs = absDB.fetch(header) + bs.upCase() + bs.write(outFileHandler) + outFileHandler.close() + if self._doClean: + os.remove("%s.shortH" % self._fastaFileName) + os.remove("%s.shortHlink" % self._fastaFileName) + os.remove("%s.shortH.fa_aln" % self._fastaFileName) + os.remove("%s.shortH.fa_aln.initH" % self._fastaFileName) + self._log.info("END Launch") + +if __name__ == "__main__": + iLaunch = LaunchMap() + iLaunch.setAttributesFromCmdLine() + iLaunch.run() \ No newline at end of file