18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
4 # http://www.inra.fr
|
|
5 # http://urgi.versailles.inra.fr
|
|
6 #
|
|
7 # This software is governed by the CeCILL license under French law and
|
|
8 # abiding by the rules of distribution of free software. You can use,
|
|
9 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
10 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
11 # "http://www.cecill.info".
|
|
12 #
|
|
13 # As a counterpart to the access to the source code and rights to copy,
|
|
14 # modify and redistribute granted by the license, users are provided only
|
|
15 # with a limited warranty and the software's author, the holder of the
|
|
16 # economic rights, and the successive licensors have only limited
|
|
17 # liability.
|
|
18 #
|
|
19 # In this respect, the user's attention is drawn to the risks associated
|
|
20 # with loading, using, modifying and/or developing or reproducing the
|
|
21 # software by the user in light of its specific status of free software,
|
|
22 # that may mean that it is complicated to manipulate, and that also
|
|
23 # therefore means that it is reserved for developers and experienced
|
|
24 # professionals having in-depth computer knowledge. Users are therefore
|
|
25 # encouraged to load and test the software's suitability as regards their
|
|
26 # requirements in conditions enabling the security of their systems and/or
|
|
27 # data to be ensured and, more generally, to use and operate it in the
|
|
28 # same conditions as regards security.
|
|
29 #
|
|
30 # The fact that you are presently reading this means that you have had
|
|
31 # knowledge of the CeCILL license and that you accept its terms.
|
|
32
|
|
33 from commons.core.LoggerFactory import LoggerFactory
|
|
34 from commons.core.utils.RepetOptionParser import RepetOptionParser
|
|
35 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
|
|
36 import subprocess
|
|
37 import os
|
|
38 from commons.core.seq.Bioseq import Bioseq
|
|
39 import shutil
|
|
40
|
|
41 LOG_DEPTH = "repet.core.launchers"
|
|
42
|
|
43
|
|
44
|
|
45 class LaunchPhyML(object):
|
|
46 """
|
|
47 Launch 'PhyML'
|
|
48 """
|
|
49 def __init__(self, inputFileName="", outFileName="",dataType= "nt", interleavedFormat= True, nbDataSets=1, nbBootDataSets=0, substModel="HKY85", ratioTsTv=4.0, propInvSites= 0.0, nbCat=1, gammaParam=1.0, startTree="BIONJ", paramOptimisation = "tlr", clean=False, verbosity=3 ):
|
|
50 self.inputFileName = inputFileName
|
|
51 self.outFileName=outFileName
|
|
52 self.dataType = dataType #"nt or aa"
|
|
53 self._setSeqFormat(interleavedFormat) #if False -q"
|
|
54 self.nbDataSets = nbDataSets
|
|
55 self.nbBootDataSets = nbBootDataSets
|
|
56 self.substModel = substModel
|
|
57 self.ratioTsTv = ratioTsTv
|
|
58 self.propInvSites = propInvSites # propInvSites="e" replaced by 0.0; should be in [0-1]
|
|
59 self.nbCat = nbCat # Number of categories less than four or higher than eight are not recommended.
|
|
60 self.gammaParam = gammaParam
|
|
61 self.startTree = startTree #by default is BIONJ used reformatedInputFileName+"_phyml_tree.txt" instead
|
|
62 self.paramOptimisation = paramOptimisation # used instead of self.optTopology="y", self.optBranchRate="y"
|
|
63 #This option focuses on specific parameter optimisation.
|
|
64 #tlr : tree topology (t), branch length (l) and rate parameters (r) are optimised.
|
|
65 #tl : tree topology and branch length are optimised.
|
|
66 #lr : branch length and rate parameters are optimised.
|
|
67 #l : branch length are optimised.
|
|
68 #r : rate parameters are optimised.
|
|
69 #n : no parameter is optimised.
|
|
70
|
|
71 self._clean = clean
|
|
72 self._verbosity = verbosity
|
|
73 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
|
|
74
|
|
75 def _setSeqFormat(self, interleavedFormat):
|
|
76 if not (interleavedFormat) :
|
|
77 self.seqFormat = " -q"
|
|
78 else :
|
|
79 self.seqFormat = ""
|
|
80
|
|
81 def setAttributesFromCmdLine(self):
|
|
82 description = "usage: LaunchPhyML.py [ options ]"
|
|
83 epilog = "\n -h: this help\n"
|
|
84 epilog += "\t -i: name of the input file (refseq is first, format='fasta')"
|
|
85 epilog += "\n\t"
|
|
86 parser = RepetOptionParser(description = description, epilog = epilog)
|
|
87 parser.add_option("-i", "--fasta", dest = "inputFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "")
|
|
88 parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.out]", default = "")
|
|
89 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1)
|
|
90 options = parser.parse_args()[0]
|
|
91 self._setAttributesFromOptions(options)
|
|
92
|
|
93 def _setAttributesFromOptions(self, options):
|
|
94 self.inputFileName = options.inputFileName
|
|
95 self.setOutFileName = options.outFileName
|
|
96 self._verbosity = options.verbosity
|
|
97
|
|
98 def _checkOptions(self):
|
|
99 if self.inputFileName == "":
|
|
100 self._logAndRaise("ERROR: Missing input file name")
|
|
101
|
|
102 if self.outFileName == "":
|
|
103 self.outFileName = "%s_phyml.newick" % (self.inputFileName)
|
|
104
|
|
105 def _logAndRaise(self, errorMsg):
|
|
106 self._log.error(errorMsg)
|
|
107 raise Exception(errorMsg)
|
|
108
|
|
109 def _shortenHeaders(self):
|
|
110 self.csh = ChangeSequenceHeaders()
|
|
111 self.csh.setInputFile(self.inputFileName)
|
|
112 self.csh.setFormat("fasta")
|
|
113 self.csh.setStep(1)
|
|
114 self.csh.setPrefix("seq")
|
|
115 self.csh.setLinkFile(self.inputFileName+".shortHlink")
|
|
116 self.csh.setOutputFile(self.inputFileName+".shortH")
|
|
117 self.csh.setVerbosityLevel(self._verbosity-1)
|
|
118 self.csh.run()
|
|
119 self.shortInputFileName = self.inputFileName+".shortH"
|
|
120
|
|
121 def _renameHeaders(self):
|
|
122 self.csh.setInputFile(self.phyml_tree)
|
|
123 self.csh.setFormat("newick")
|
|
124 self.csh.setStep(2)
|
|
125 self.csh.setLinkFile(self.inputFileName+".shortHlink" )
|
|
126 self.csh.setOutputFile(self.outFileName)
|
|
127 self.csh.setVerbosityLevel(self._verbosity-1)
|
|
128 self.csh.run()
|
|
129
|
|
130 def run(self):
|
|
131 LoggerFactory.setLevel(self._log, self._verbosity)
|
|
132 self._checkOptions()
|
|
133 self._log.info("START LaunchPhyML")
|
|
134 self._log.debug("building a multiple alignment from '%s'..." % ( self.inputFileName))
|
|
135
|
|
136 inputFileName = "%s/%s" % (os.getcwd(), os.path.basename(self.inputFileName))
|
|
137 if not os.path.exists(inputFileName):
|
|
138 os.symlink(self.inputFileName, inputFileName)
|
|
139 self.inputFileName = inputFileName
|
|
140
|
|
141 self._shortenHeaders()
|
|
142
|
|
143 cmd = "sreformat phylip %s" % (self.shortInputFileName)
|
|
144
|
|
145 with open (self.reformatedInputFileName, "w") as fPhylip :
|
|
146
|
|
147 process = subprocess.Popen(cmd.split(' '), stdout= fPhylip , stderr=subprocess.PIPE)
|
|
148 self._log.debug("Running : %s" % cmd)
|
|
149 output = process.communicate()
|
|
150 self._log.debug("Output:\n%s" % output[0])
|
|
151 if process.returncode != 0:
|
|
152 self._logAndRaise("ERROR when launching '%s'" % cmd)
|
|
153
|
|
154 self.reformatedInputFileName = "%s.phylip" % self.shortInputFileName
|
|
155 self.phyml_tree = "%s_phyml_tree.txt" %self.reformatedInputFileName
|
|
156 cpyPhyml_tree = "%s_cpy" %self.phyml_tree
|
|
157 shutil.copyfile(self.phyml_tree,cpyPhyml_tree)
|
|
158
|
|
159 cmd = "phyml -i %s -d %s%s -n %d -b %d -m %s -t %f -v %f -c %d -a %f -u %s -o %s" % (self.reformatedInputFileName, self.dataType, self.seqFormat, self.nbDataSets,self.nbBootDataSets,self.substModel, self.ratioTsTv, self.propInvSites,self.nbCat,self.gammaParam, cpyPhyml_tree , self.paramOptimisation )
|
|
160 print cmd
|
|
161 process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
162 self._log.debug("Running : %s" % cmd)
|
|
163 output = process.communicate()
|
|
164 self._log.debug("Output:\n%s" % output[0])
|
|
165 if process.returncode != 0:
|
|
166 self._logAndRaise("ERROR when launching '%s'" % cmd)
|
|
167
|
|
168 self._renameHeaders()
|
|
169
|
|
170 if self._clean:
|
|
171 for f in [ self.shortInputFileName, self.inputFileName+".shortHlink", self.inputFileName+".shortH.phylip",self.inputFileName+".shortH.phylip_phyml_lk.txt", self.phyml_tree ]:
|
|
172 os.remove(f)
|
|
173 os.system( "mv %s.phylip_phyml_stat.txt %s_phyml.txt" % ( self.shortInputFileName, self.inputFileName ) )
|
|
174
|
|
175 self._log.info("Finished running LaunchPhyML")
|
|
176
|
|
177
|