Mercurial > repos > yufei-luo > s_mart
comparison commons/launcher/LaunchMap.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright INRA (Institut National de la Recherche Agronomique) | |
4 # http://www.inra.fr | |
5 # http://urgi.versailles.inra.fr | |
6 # | |
7 # This software is governed by the CeCILL license under French law and | |
8 # abiding by the rules of distribution of free software. You can use, | |
9 # modify and/ or redistribute the software under the terms of the CeCILL | |
10 # license as circulated by CEA, CNRS and INRIA at the following URL | |
11 # "http://www.cecill.info". | |
12 # | |
13 # As a counterpart to the access to the source code and rights to copy, | |
14 # modify and redistribute granted by the license, users are provided only | |
15 # with a limited warranty and the software's author, the holder of the | |
16 # economic rights, and the successive licensors have only limited | |
17 # liability. | |
18 # | |
19 # In this respect, the user's attention is drawn to the risks associated | |
20 # with loading, using, modifying and/or developing or reproducing the | |
21 # software by the user in light of its specific status of free software, | |
22 # that may mean that it is complicated to manipulate, and that also | |
23 # therefore means that it is reserved for developers and experienced | |
24 # professionals having in-depth computer knowledge. Users are therefore | |
25 # encouraged to load and test the software's suitability as regards their | |
26 # requirements in conditions enabling the security of their systems and/or | |
27 # data to be ensured and, more generally, to use and operate it in the | |
28 # same conditions as regards security. | |
29 # | |
30 # The fact that you are presently reading this means that you have had | |
31 # knowledge of the CeCILL license and that you accept its terms. | |
32 | |
33 from commons.core.LoggerFactory import LoggerFactory | |
34 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
35 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB | |
36 from commons.core.seq.FastaUtils import FastaUtils | |
37 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders | |
38 from commons.core.utils.FileUtils import FileUtils | |
39 import os | |
40 import subprocess | |
41 | |
42 LOG_DEPTH = "repet.tools" | |
43 | |
44 ##Reference launcher implementation | |
45 # | |
46 class LaunchMap(object): | |
47 | |
48 def __init__(self, fastaFileName = "", outFileName = "", gapSize = 50, mismatchPenalty = -8, gapOpenPenalty = 16, gapExtendPenalty = 4, doClean = False, verbosity = 0): | |
49 self._fastaFileName = fastaFileName | |
50 self.setOutFileName(outFileName) | |
51 self._gapSize = gapSize | |
52 self._mismatchPenalty = mismatchPenalty | |
53 self._gapOpenPenalty = gapOpenPenalty | |
54 self._gapExtendPenalty = gapExtendPenalty | |
55 self._doClean = doClean | |
56 self._verbosity = verbosity | |
57 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) | |
58 | |
59 def setAttributesFromCmdLine(self): | |
60 # description = "Launch template to create a launcher." | |
61 # epilog = "\nExample 1: launch without verbosity and keep temporary files.\n" | |
62 # epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0" | |
63 # epilog += "\n\t" | |
64 # epilog += "\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\n" | |
65 # epilog += "\t$ python LaunchTemplate.py -i file.fa -c -v 2" | |
66 # parser = RepetOptionParser(description = description, epilog = epilog) | |
67 parser = RepetOptionParser(description = "", epilog = "") | |
68 parser.add_option("-i", "--fasta", dest = "fastaFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") | |
69 parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.fa_aln]", default = "") | |
70 parser.add_option("-s", "--gapSize", dest = "gapSize", action = "store", type = "int", help = "size above which a gap is not penalized anymore [optional] [default: 50]", default = 50) | |
71 parser.add_option("-m", "--mismatch", dest = "mismatch", action = "store", type = "int", help = "penalty for a mismatch [optional] [default: -8]", default = -8) | |
72 parser.add_option("-O", "--gapOpen", dest = "gapOpen", action = "store", type = "int", help = "penalty for a gap opening [optional] [default: 16]", default = 16) | |
73 parser.add_option("-e", "--gapExtend", dest = "gapExtend", action = "store", type = "int", help = "penalty for a gap extension [optional] [default: 4]", default = 4) | |
74 parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False) | |
75 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) | |
76 options = parser.parse_args()[0] | |
77 self._setAttributesFromOptions(options) | |
78 | |
79 def _setAttributesFromOptions(self, options): | |
80 self.setFastaFileName(options.fastaFileName) | |
81 self.setOutFileName(options.outFileName) | |
82 self.setGapSize(options.gapSize) | |
83 self.setMismatchPenalty(options.mismatch) | |
84 self.setGapOpenPenalty(options.gapOpen) | |
85 self.setGapExtendPenalty(options.gapExtend) | |
86 self.setDoClean(options.doClean) | |
87 self.setVerbosity(options.verbosity) | |
88 | |
89 def setFastaFileName(self, fastaFileName): | |
90 self._fastaFileName = fastaFileName | |
91 | |
92 def setOutFileName(self, outFileName): | |
93 if outFileName == "": | |
94 self._outFileName = "%s.fa_aln" % self._fastaFileName | |
95 else: | |
96 self._outFileName = outFileName | |
97 | |
98 def setGapSize(self, gapSize): | |
99 self._gapSize = gapSize | |
100 | |
101 def setMismatchPenalty(self, mismatchPenalty): | |
102 self._mismatchPenalty = mismatchPenalty | |
103 | |
104 def setGapOpenPenalty(self, gapOpenPenalty): | |
105 self._gapOpenPenalty = gapOpenPenalty | |
106 | |
107 def setGapExtendPenalty(self, gapExtendPenalty): | |
108 self._gapExtendPenalty = gapExtendPenalty | |
109 | |
110 def setDoClean(self, doClean): | |
111 self._doClean = doClean | |
112 | |
113 def setVerbosity(self, verbosity): | |
114 self._verbosity = verbosity | |
115 | |
116 def _checkOptions(self): | |
117 if self._fastaFileName == "": | |
118 self._logAndRaise("ERROR: Missing input fasta file name") | |
119 if not FileUtils.isRessourceExists(self._fastaFileName): | |
120 self._logAndRaise("ERROR: Input fasta file name %s doesn't exist." % self._fastaFileName) | |
121 | |
122 def _logAndRaise(self, errorMsg): | |
123 self._log.error(errorMsg) | |
124 raise Exception(errorMsg) | |
125 | |
126 def getMapCmd(self): | |
127 cmd = "rpt_map" | |
128 cmd += " %s.shortH" % self._fastaFileName | |
129 cmd += " %i" % self._gapSize | |
130 cmd += " %i" % self._mismatchPenalty | |
131 cmd += " %i" % self._gapOpenPenalty | |
132 cmd += " %i" % self._gapExtendPenalty | |
133 cmd += " > %s.shortH.fa_aln" % self._fastaFileName | |
134 return cmd | |
135 | |
136 def run(self): | |
137 LoggerFactory.setLevel(self._log, self._verbosity) | |
138 self._checkOptions() | |
139 self._log.info("START LaunchMap") | |
140 self._log.debug("Fasta file name: %s" % self._fastaFileName) | |
141 | |
142 lInitHeaders = FastaUtils.dbHeaders(self._fastaFileName, self._verbosity - 1) | |
143 | |
144 csh = ChangeSequenceHeaders() | |
145 csh.setInputFile(self._fastaFileName) | |
146 csh.setFormat("fasta") | |
147 csh.setStep(1) | |
148 csh.setPrefix("seq") | |
149 csh.setLinkFile("%s.shortHlink" % self._fastaFileName) | |
150 csh.setOutputFile("%s.shortH" % self._fastaFileName) | |
151 csh.setVerbosityLevel(self._verbosity - 1) | |
152 csh.run() | |
153 | |
154 cmd = self.getMapCmd() | |
155 process = subprocess.Popen(cmd, shell = True) | |
156 self._log.debug("Running : %s" % cmd) | |
157 process.communicate() | |
158 if process.returncode != 0: | |
159 self._logAndRaise("ERROR when launching '%s'" % cmd) | |
160 | |
161 csh.setInputFile("%s.shortH.fa_aln" % self._fastaFileName) | |
162 csh.setFormat("fasta") | |
163 csh.setStep(2) | |
164 csh.setLinkFile("%s.shortHlink" % self._fastaFileName) | |
165 csh.setOutputFile("%s.shortH.fa_aln.initH" % self._fastaFileName) | |
166 csh.setVerbosityLevel(self._verbosity - 1) | |
167 csh.run() | |
168 | |
169 absDB = AlignedBioseqDB("%s.shortH.fa_aln.initH" % self._fastaFileName) | |
170 outFileHandler = open(self._outFileName, "w") | |
171 for header in lInitHeaders: | |
172 bs = absDB.fetch(header) | |
173 bs.upCase() | |
174 bs.write(outFileHandler) | |
175 outFileHandler.close() | |
176 if self._doClean: | |
177 os.remove("%s.shortH" % self._fastaFileName) | |
178 os.remove("%s.shortHlink" % self._fastaFileName) | |
179 os.remove("%s.shortH.fa_aln" % self._fastaFileName) | |
180 os.remove("%s.shortH.fa_aln.initH" % self._fastaFileName) | |
181 self._log.info("END Launch") | |
182 | |
183 if __name__ == "__main__": | |
184 iLaunch = LaunchMap() | |
185 iLaunch.setAttributesFromCmdLine() | |
186 iLaunch.run() |