Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/LaunchPhyML.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/LaunchPhyML.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,177 @@ +#!/usr/bin/env python + +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +from commons.core.LoggerFactory import LoggerFactory +from commons.core.utils.RepetOptionParser import RepetOptionParser +from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders +import subprocess +import os +from commons.core.seq.Bioseq import Bioseq +import shutil + +LOG_DEPTH = "repet.core.launchers" + + + +class LaunchPhyML(object): + """ + Launch 'PhyML' + """ + def __init__(self, inputFileName="", outFileName="",dataType= "nt", interleavedFormat= True, nbDataSets=1, nbBootDataSets=0, substModel="HKY85", ratioTsTv=4.0, propInvSites= 0.0, nbCat=1, gammaParam=1.0, startTree="BIONJ", paramOptimisation = "tlr", clean=False, verbosity=3 ): + self.inputFileName = inputFileName + self.outFileName=outFileName + self.dataType = dataType #"nt or aa" + self._setSeqFormat(interleavedFormat) #if False -q" + self.nbDataSets = nbDataSets + self.nbBootDataSets = nbBootDataSets + self.substModel = substModel + self.ratioTsTv = ratioTsTv + self.propInvSites = propInvSites # propInvSites="e" replaced by 0.0; should be in [0-1] + self.nbCat = nbCat # Number of categories less than four or higher than eight are not recommended. + self.gammaParam = gammaParam + self.startTree = startTree #by default is BIONJ used reformatedInputFileName+"_phyml_tree.txt" instead + self.paramOptimisation = paramOptimisation # used instead of self.optTopology="y", self.optBranchRate="y" + #This option focuses on specific parameter optimisation. + #tlr : tree topology (t), branch length (l) and rate parameters (r) are optimised. + #tl : tree topology and branch length are optimised. + #lr : branch length and rate parameters are optimised. + #l : branch length are optimised. + #r : rate parameters are optimised. + #n : no parameter is optimised. + + self._clean = clean + self._verbosity = verbosity + self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) + + def _setSeqFormat(self, interleavedFormat): + if not (interleavedFormat) : + self.seqFormat = " -q" + else : + self.seqFormat = "" + + def setAttributesFromCmdLine(self): + description = "usage: LaunchPhyML.py [ options ]" + epilog = "\n -h: this help\n" + epilog += "\t -i: name of the input file (refseq is first, format='fasta')" + epilog += "\n\t" + parser = RepetOptionParser(description = description, epilog = epilog) + parser.add_option("-i", "--fasta", dest = "inputFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") + parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.out]", default = "") + parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) + options = parser.parse_args()[0] + self._setAttributesFromOptions(options) + + def _setAttributesFromOptions(self, options): + self.inputFileName = options.inputFileName + self.setOutFileName = options.outFileName + self._verbosity = options.verbosity + + def _checkOptions(self): + if self.inputFileName == "": + self._logAndRaise("ERROR: Missing input file name") + + if self.outFileName == "": + self.outFileName = "%s_phyml.newick" % (self.inputFileName) + + def _logAndRaise(self, errorMsg): + self._log.error(errorMsg) + raise Exception(errorMsg) + + def _shortenHeaders(self): + self.csh = ChangeSequenceHeaders() + self.csh.setInputFile(self.inputFileName) + self.csh.setFormat("fasta") + self.csh.setStep(1) + self.csh.setPrefix("seq") + self.csh.setLinkFile(self.inputFileName+".shortHlink") + self.csh.setOutputFile(self.inputFileName+".shortH") + self.csh.setVerbosityLevel(self._verbosity-1) + self.csh.run() + self.shortInputFileName = self.inputFileName+".shortH" + + def _renameHeaders(self): + self.csh.setInputFile(self.phyml_tree) + self.csh.setFormat("newick") + self.csh.setStep(2) + self.csh.setLinkFile(self.inputFileName+".shortHlink" ) + self.csh.setOutputFile(self.outFileName) + self.csh.setVerbosityLevel(self._verbosity-1) + self.csh.run() + + def run(self): + LoggerFactory.setLevel(self._log, self._verbosity) + self._checkOptions() + self._log.info("START LaunchPhyML") + self._log.debug("building a multiple alignment from '%s'..." % ( self.inputFileName)) + + inputFileName = "%s/%s" % (os.getcwd(), os.path.basename(self.inputFileName)) + if not os.path.exists(inputFileName): + os.symlink(self.inputFileName, inputFileName) + self.inputFileName = inputFileName + + self._shortenHeaders() + + cmd = "sreformat phylip %s" % (self.shortInputFileName) + + with open (self.reformatedInputFileName, "w") as fPhylip : + + process = subprocess.Popen(cmd.split(' '), stdout= fPhylip , stderr=subprocess.PIPE) + self._log.debug("Running : %s" % cmd) + output = process.communicate() + self._log.debug("Output:\n%s" % output[0]) + if process.returncode != 0: + self._logAndRaise("ERROR when launching '%s'" % cmd) + + self.reformatedInputFileName = "%s.phylip" % self.shortInputFileName + self.phyml_tree = "%s_phyml_tree.txt" %self.reformatedInputFileName + cpyPhyml_tree = "%s_cpy" %self.phyml_tree + shutil.copyfile(self.phyml_tree,cpyPhyml_tree) + + cmd = "phyml -i %s -d %s%s -n %d -b %d -m %s -t %f -v %f -c %d -a %f -u %s -o %s" % (self.reformatedInputFileName, self.dataType, self.seqFormat, self.nbDataSets,self.nbBootDataSets,self.substModel, self.ratioTsTv, self.propInvSites,self.nbCat,self.gammaParam, cpyPhyml_tree , self.paramOptimisation ) + print cmd + process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + self._log.debug("Running : %s" % cmd) + output = process.communicate() + self._log.debug("Output:\n%s" % output[0]) + if process.returncode != 0: + self._logAndRaise("ERROR when launching '%s'" % cmd) + + self._renameHeaders() + + if self._clean: + for f in [ self.shortInputFileName, self.inputFileName+".shortHlink", self.inputFileName+".shortH.phylip",self.inputFileName+".shortH.phylip_phyml_lk.txt", self.phyml_tree ]: + os.remove(f) + os.system( "mv %s.phylip_phyml_stat.txt %s_phyml.txt" % ( self.shortInputFileName, self.inputFileName ) ) + + self._log.info("Finished running LaunchPhyML") + +