Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/LaunchMCL.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/LaunchMCL.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,239 @@ +#!/usr/bin/env python + +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +from commons.core.LoggerFactory import LoggerFactory +from commons.core.utils.RepetOptionParser import RepetOptionParser +from commons.core.seq.FastaUtils import FastaUtils +from commons.core.coord.MatchUtils import MatchUtils +import subprocess +import os +import time +import shutil +from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders + +LOG_DEPTH = "repet.base" + +##Launch MCL +# +class LaunchMCL(object): + + def __init__(self, fastaFileName = "", outFilePrefix = "", inflate = 1.5, covThres = 0.0, isJoined = False, isCluster2Map = False, isClusterConsensusHeaders = False, doClean = False, verbosity = 0): + self._fastaFileName = fastaFileName + self.setOutFilePrefix(outFilePrefix) + self._inflate = inflate + self._coverageThreshold = covThres + self._isJoined = isJoined + self._isCluster2Map = isCluster2Map + self._isClusterConsensusHeaders = isClusterConsensusHeaders + self._doClean = doClean + self._verbosity = verbosity + self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) + + def setAttributesFromCmdLine(self): + description = "Launch MCL clustering program." + epilog = "\nExample: launch without verbosity and keep temporary files.\n" + epilog += "\t$ python LaunchMCL.py -i file.fa -v 0\n" + parser = RepetOptionParser(description = description, epilog = epilog) + parser.add_option("-i", "--fasta", dest = "fastaFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") + parser.add_option("-o", "--out", dest = "outFilePrefix", action = "store", type = "string", help = "prefix of the output files [default=input fasta file name]", default = "") + parser.add_option("-I", "--inflate", dest = "inflate", action = "store", type = "float", help = "inflate parameter of MCL [optional] [default: 1.5]", default = 1.5) + parser.add_option("-T", "--coverage", dest = "coverageThreshold", action = "store", type = "float", help = "length coverage threshold (default=0.0, 0.0 <= value <= 1.0)", default = 0.0) + parser.add_option("-j", "--join", dest = "isJoined", action = "store_true", help = "join hits after alignement [optional] [default: False]" , default = False) + parser.add_option("-m", "--map", dest = "isCluster2Map", action = "store_true", help = "generate an additional output file in map format (Warning: only works if MCL's fasta input headers are formated like LTRharvest fasta output)", default = False) + parser.add_option("", "--isClusterConsensusHeaders", dest = "isClusterConsensusHeaders", action="store_true", help = "format headers for Cluster Consensus tool", default = False) + parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False) + parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) + options = parser.parse_args()[0] + self._setAttributesFromOptions(options) + + def _setAttributesFromOptions(self, options): + self.setFastaFileName(options.fastaFileName) + self.setOutFilePrefix(options.outFilePrefix) + self.setInflate(options.inflate) + self.setCoverageThreshold(options.coverageThreshold) + self.setIsJoined(options.isJoined) + self.setIsCluster2Map(options.isCluster2Map) + self.setIsClusterConsensusHeaders(options.isClusterConsensusHeaders) + self.setDoClean(options.doClean) + self.setVerbosity(options.verbosity) + + def setFastaFileName(self, fastaFileName): + self._fastaFileName = fastaFileName + + def setOutFilePrefix(self, outFilePrefix): + if outFilePrefix == "": + self._outFilePrefix = os.path.splitext(self._fastaFileName)[0] + else: + self._outFilePrefix = outFilePrefix + + def setInflate(self, inflate): + self._inflate = inflate + + def setCoverageThreshold(self, covThres): + self._coverageThreshold = float(covThres) + + def setIsJoined(self, isJoined): + self._isJoined = isJoined + + def setDoClean(self, doClean): + self._doClean = doClean + + def setIsCluster2Map(self, isCluster2Map): + self._isCluster2Map = isCluster2Map + + def setIsClusterConsensusHeaders(self, isClusterConsensusHeaders): + self._isClusterConsensusHeaders = isClusterConsensusHeaders + + def setVerbosity(self, verbosity): + self._verbosity = verbosity + + def _checkOptions(self): + if self._fastaFileName == "": + self._logAndRaise("ERROR: Missing input fasta file name") + if self._isCluster2Map and self._isClusterConsensusHeaders: + self._logAndRaise("ERROR: You can't use both '--isClusterConsensusHeaders' and '-m' options") + if self._coverageThreshold > 1 or self._coverageThreshold < 0: + self._logAndRaise("ERROR: Coverage Threshold must be in [0.0 , 1.0]") + + def _logAndRaise(self, errorMsg): + self._log.error(errorMsg) + raise Exception(errorMsg) + + def run(self): + LoggerFactory.setLevel(self._log, self._verbosity) + self._checkOptions() + self._log.info("START Launch MCL") + self._log.debug("With parameters: -i %s -o %s -I %.2f -T %.2f -j %r -m %r -clusterHeaders %r " % (self._fastaFileName, self._outFilePrefix , self._inflate , self._coverageThreshold, self._isJoined, self._isCluster2Map, self._isClusterConsensusHeaders)) + #self._log.debug("With parameters: -i %s -o %s -I %.2f -T %.2f" % (self._fastaFileName, self._outFilePrefix , self._inflate , self._coverageThreshold)) + self._log.debug("Fasta file name: %s" % self._fastaFileName) + workingDir = "MCLtmpDirectory" + if os.path.exists(workingDir): + self._logAndRaise("ERROR: %s already exists." % workingDir) + os.mkdir(workingDir) + os.chdir(workingDir) + linkToFastaFile = "%s2.fa" % os.path.splitext(self._fastaFileName)[0] + os.symlink("../%s" % self._fastaFileName, self._fastaFileName) + fastaFileNameShorten = "%s.shortH" % self._fastaFileName + iChangeSequenceHeaders = ChangeSequenceHeaders(inFile=self._fastaFileName, format="fasta", step=1, outFile=fastaFileNameShorten, verbosity=self._verbosity - 1) + iChangeSequenceHeaders.run() + os.symlink(fastaFileNameShorten, linkToFastaFile) + + self._log.info("START Blaster-Matcher (%s)" % time.strftime("%Y-%m-%d %H:%M:%S")) + cmd = "LaunchBlaster.py" + cmd += " -q %s" % fastaFileNameShorten + cmd += " -s %s" % linkToFastaFile + cmd += " -a" + cmd += " 1>&2 >> blasterMatcher.log" + process = subprocess.Popen(cmd, shell = True) + self._log.debug("Running : %s" % cmd) + process.communicate() + if process.returncode != 0: + self._logAndRaise("ERROR when launching '%s'" % cmd) + outBlasterFileName = "%s.align" % fastaFileNameShorten + + cmd = "matcher" + cmd += " -m %s" % outBlasterFileName + cmd += " -q %s" % fastaFileNameShorten + cmd += " -s %s" % linkToFastaFile + cmd += " -a" + if self._isJoined: + cmd += " -j" + cmd += " 1>&2 >> blasterMatcher.log" + process = subprocess.Popen(cmd, shell=True) + self._log.debug("Running : %s" % cmd) + process.communicate() + if process.returncode != 0: + self._logAndRaise("ERROR when launching '%s'" % cmd) + self._log.info("END Blaster-Matcher (%s)" % time.strftime("%Y-%m-%d %H:%M:%S")) + + outMatcherFileName = "%s.match.tab" % outBlasterFileName + inputABCFileName = "%s.shortH.abc" % os.path.splitext(fastaFileNameShorten)[0] + MatchUtils.convertMatchFileIntoABCFileOnQueryCoverage(outMatcherFileName, inputABCFileName, coverage = self._coverageThreshold) + outMCLPreprocessFileName = "MCLPreprocess.out" + + self._log.info("START MCL (%s)" % time.strftime("%Y-%m-%d %H:%M:%S")) + cmd = "mcxload" + cmd += " -abc %s" % inputABCFileName + cmd += " --stream-mirror" + cmd += " --stream-neg-log10" + cmd += " -stream-tf 'ceil(200)'" + cmd += " -o %s" % outMCLPreprocessFileName + cmd += " -write-tab %s.tab" % outMCLPreprocessFileName + cmd += " 1>&2 > MCLpreprocess.log" + process = subprocess.Popen(cmd, shell = True) + self._log.debug("Running : %s" % cmd) + process.communicate() + if process.returncode != 0: + self._logAndRaise("ERROR when launching '%s'" % cmd) + + outMCLFileName = "out.shortH.mcl" + cmd = "mcl" + cmd += " %s" % outMCLPreprocessFileName + cmd += " -I %s" % self._inflate + cmd += " -use-tab %s.tab" % outMCLPreprocessFileName + cmd += " -o %s" % outMCLFileName + cmd += " 1>&2 > MCL.log" + process = subprocess.Popen(cmd, shell = True) + self._log.debug("Running : %s" % cmd) + process.communicate() + if process.returncode != 0: + self._logAndRaise("ERROR when launching '%s'" % cmd) + self._log.info("END MCL (%s)" % time.strftime("%Y-%m-%d %H:%M:%S")) + + outFastaFileNameShorten = "%s.fa" % os.path.splitext(outMCLFileName)[0] + + FastaUtils.convertClusterFileToFastaFile(outMCLFileName, fastaFileNameShorten, outFastaFileNameShorten, "MCL", verbosity = self._verbosity - 1) + + outFastaFileName = "%s_MCL.fa" % self._outFilePrefix + linkFileName = "%s.newHlink" % self._fastaFileName + headerStyle = "A" + if self._isClusterConsensusHeaders: + headerStyle = "B" + iChangeSequenceHeaders = ChangeSequenceHeaders(inFile=outFastaFileNameShorten, format="fasta", step=2, outFile=outFastaFileName, linkFile=linkFileName, whichCluster = headerStyle, verbosity=self._verbosity - 1) + iChangeSequenceHeaders.run() + + if self._isCluster2Map: + outMapFileName = "%s_MCL.map" % self._outFilePrefix + FastaUtils.convertClusteredFastaFileToMapFile(outFastaFileName, outMapFileName) + shutil.move(outMapFileName, "..") + + shutil.move(outFastaFileName, "..") + os.chdir("..") + if self._doClean: + self._log.warning("Working directory will be cleaned") + shutil.rmtree(workingDir) + self._log.info("END Launch MCL") + +if __name__ == "__main__": + iLaunch = LaunchMCL() + iLaunch.setAttributesFromCmdLine() + iLaunch.run()