comparison commons/launcher/LaunchMap.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 #!/usr/bin/env python
2
3 # Copyright INRA (Institut National de la Recherche Agronomique)
4 # http://www.inra.fr
5 # http://urgi.versailles.inra.fr
6 #
7 # This software is governed by the CeCILL license under French law and
8 # abiding by the rules of distribution of free software. You can use,
9 # modify and/ or redistribute the software under the terms of the CeCILL
10 # license as circulated by CEA, CNRS and INRIA at the following URL
11 # "http://www.cecill.info".
12 #
13 # As a counterpart to the access to the source code and rights to copy,
14 # modify and redistribute granted by the license, users are provided only
15 # with a limited warranty and the software's author, the holder of the
16 # economic rights, and the successive licensors have only limited
17 # liability.
18 #
19 # In this respect, the user's attention is drawn to the risks associated
20 # with loading, using, modifying and/or developing or reproducing the
21 # software by the user in light of its specific status of free software,
22 # that may mean that it is complicated to manipulate, and that also
23 # therefore means that it is reserved for developers and experienced
24 # professionals having in-depth computer knowledge. Users are therefore
25 # encouraged to load and test the software's suitability as regards their
26 # requirements in conditions enabling the security of their systems and/or
27 # data to be ensured and, more generally, to use and operate it in the
28 # same conditions as regards security.
29 #
30 # The fact that you are presently reading this means that you have had
31 # knowledge of the CeCILL license and that you accept its terms.
32
33 from commons.core.LoggerFactory import LoggerFactory
34 from commons.core.utils.RepetOptionParser import RepetOptionParser
35 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
36 from commons.core.seq.FastaUtils import FastaUtils
37 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
38 from commons.core.utils.FileUtils import FileUtils
39 import os
40 import subprocess
41
42 LOG_DEPTH = "repet.tools"
43
44 ##Reference launcher implementation
45 #
46 class LaunchMap(object):
47
48 def __init__(self, fastaFileName = "", outFileName = "", gapSize = 50, mismatchPenalty = -8, gapOpenPenalty = 16, gapExtendPenalty = 4, doClean = False, verbosity = 0):
49 self._fastaFileName = fastaFileName
50 self.setOutFileName(outFileName)
51 self._gapSize = gapSize
52 self._mismatchPenalty = mismatchPenalty
53 self._gapOpenPenalty = gapOpenPenalty
54 self._gapExtendPenalty = gapExtendPenalty
55 self._doClean = doClean
56 self._verbosity = verbosity
57 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
58
59 def setAttributesFromCmdLine(self):
60 # description = "Launch template to create a launcher."
61 # epilog = "\nExample 1: launch without verbosity and keep temporary files.\n"
62 # epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0"
63 # epilog += "\n\t"
64 # epilog += "\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\n"
65 # epilog += "\t$ python LaunchTemplate.py -i file.fa -c -v 2"
66 # parser = RepetOptionParser(description = description, epilog = epilog)
67 parser = RepetOptionParser(description = "", epilog = "")
68 parser.add_option("-i", "--fasta", dest = "fastaFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "")
69 parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.fa_aln]", default = "")
70 parser.add_option("-s", "--gapSize", dest = "gapSize", action = "store", type = "int", help = "size above which a gap is not penalized anymore [optional] [default: 50]", default = 50)
71 parser.add_option("-m", "--mismatch", dest = "mismatch", action = "store", type = "int", help = "penalty for a mismatch [optional] [default: -8]", default = -8)
72 parser.add_option("-O", "--gapOpen", dest = "gapOpen", action = "store", type = "int", help = "penalty for a gap opening [optional] [default: 16]", default = 16)
73 parser.add_option("-e", "--gapExtend", dest = "gapExtend", action = "store", type = "int", help = "penalty for a gap extension [optional] [default: 4]", default = 4)
74 parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False)
75 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1)
76 options = parser.parse_args()[0]
77 self._setAttributesFromOptions(options)
78
79 def _setAttributesFromOptions(self, options):
80 self.setFastaFileName(options.fastaFileName)
81 self.setOutFileName(options.outFileName)
82 self.setGapSize(options.gapSize)
83 self.setMismatchPenalty(options.mismatch)
84 self.setGapOpenPenalty(options.gapOpen)
85 self.setGapExtendPenalty(options.gapExtend)
86 self.setDoClean(options.doClean)
87 self.setVerbosity(options.verbosity)
88
89 def setFastaFileName(self, fastaFileName):
90 self._fastaFileName = fastaFileName
91
92 def setOutFileName(self, outFileName):
93 if outFileName == "":
94 self._outFileName = "%s.fa_aln" % self._fastaFileName
95 else:
96 self._outFileName = outFileName
97
98 def setGapSize(self, gapSize):
99 self._gapSize = gapSize
100
101 def setMismatchPenalty(self, mismatchPenalty):
102 self._mismatchPenalty = mismatchPenalty
103
104 def setGapOpenPenalty(self, gapOpenPenalty):
105 self._gapOpenPenalty = gapOpenPenalty
106
107 def setGapExtendPenalty(self, gapExtendPenalty):
108 self._gapExtendPenalty = gapExtendPenalty
109
110 def setDoClean(self, doClean):
111 self._doClean = doClean
112
113 def setVerbosity(self, verbosity):
114 self._verbosity = verbosity
115
116 def _checkOptions(self):
117 if self._fastaFileName == "":
118 self._logAndRaise("ERROR: Missing input fasta file name")
119 if not FileUtils.isRessourceExists(self._fastaFileName):
120 self._logAndRaise("ERROR: Input fasta file name %s doesn't exist." % self._fastaFileName)
121
122 def _logAndRaise(self, errorMsg):
123 self._log.error(errorMsg)
124 raise Exception(errorMsg)
125
126 def getMapCmd(self):
127 cmd = "rpt_map"
128 cmd += " %s.shortH" % self._fastaFileName
129 cmd += " %i" % self._gapSize
130 cmd += " %i" % self._mismatchPenalty
131 cmd += " %i" % self._gapOpenPenalty
132 cmd += " %i" % self._gapExtendPenalty
133 cmd += " > %s.shortH.fa_aln" % self._fastaFileName
134 return cmd
135
136 def run(self):
137 LoggerFactory.setLevel(self._log, self._verbosity)
138 self._checkOptions()
139 self._log.info("START LaunchMap")
140 self._log.debug("Fasta file name: %s" % self._fastaFileName)
141
142 lInitHeaders = FastaUtils.dbHeaders(self._fastaFileName, self._verbosity - 1)
143
144 csh = ChangeSequenceHeaders()
145 csh.setInputFile(self._fastaFileName)
146 csh.setFormat("fasta")
147 csh.setStep(1)
148 csh.setPrefix("seq")
149 csh.setLinkFile("%s.shortHlink" % self._fastaFileName)
150 csh.setOutputFile("%s.shortH" % self._fastaFileName)
151 csh.setVerbosityLevel(self._verbosity - 1)
152 csh.run()
153
154 cmd = self.getMapCmd()
155 process = subprocess.Popen(cmd, shell = True)
156 self._log.debug("Running : %s" % cmd)
157 process.communicate()
158 if process.returncode != 0:
159 self._logAndRaise("ERROR when launching '%s'" % cmd)
160
161 csh.setInputFile("%s.shortH.fa_aln" % self._fastaFileName)
162 csh.setFormat("fasta")
163 csh.setStep(2)
164 csh.setLinkFile("%s.shortHlink" % self._fastaFileName)
165 csh.setOutputFile("%s.shortH.fa_aln.initH" % self._fastaFileName)
166 csh.setVerbosityLevel(self._verbosity - 1)
167 csh.run()
168
169 absDB = AlignedBioseqDB("%s.shortH.fa_aln.initH" % self._fastaFileName)
170 outFileHandler = open(self._outFileName, "w")
171 for header in lInitHeaders:
172 bs = absDB.fetch(header)
173 bs.upCase()
174 bs.write(outFileHandler)
175 outFileHandler.close()
176 if self._doClean:
177 os.remove("%s.shortH" % self._fastaFileName)
178 os.remove("%s.shortHlink" % self._fastaFileName)
179 os.remove("%s.shortH.fa_aln" % self._fastaFileName)
180 os.remove("%s.shortH.fa_aln.initH" % self._fastaFileName)
181 self._log.info("END Launch")
182
183 if __name__ == "__main__":
184 iLaunch = LaunchMap()
185 iLaunch.setAttributesFromCmdLine()
186 iLaunch.run()