Mercurial > repos > yufei-luo > s_mart
comparison commons/launcher/LaunchTRF.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright INRA (Institut National de la Recherche Agronomique) | |
4 # http://www.inra.fr | |
5 # http://urgi.versailles.inra.fr | |
6 # | |
7 # This software is governed by the CeCILL license under French law and | |
8 # abiding by the rules of distribution of free software. You can use, | |
9 # modify and/ or redistribute the software under the terms of the CeCILL | |
10 # license as circulated by CEA, CNRS and INRIA at the following URL | |
11 # "http://www.cecill.info". | |
12 # | |
13 # As a counterpart to the access to the source code and rights to copy, | |
14 # modify and redistribute granted by the license, users are provided only | |
15 # with a limited warranty and the software's author, the holder of the | |
16 # economic rights, and the successive licensors have only limited | |
17 # liability. | |
18 # | |
19 # In this respect, the user's attention is drawn to the risks associated | |
20 # with loading, using, modifying and/or developing or reproducing the | |
21 # software by the user in light of its specific status of free software, | |
22 # that may mean that it is complicated to manipulate, and that also | |
23 # therefore means that it is reserved for developers and experienced | |
24 # professionals having in-depth computer knowledge. Users are therefore | |
25 # encouraged to load and test the software's suitability as regards their | |
26 # requirements in conditions enabling the security of their systems and/or | |
27 # data to be ensured and, more generally, to use and operate it in the | |
28 # same conditions as regards security. | |
29 # | |
30 # The fact that you are presently reading this means that you have had | |
31 # knowledge of the CeCILL license and that you accept its terms. | |
32 | |
33 from commons.core.LoggerFactory import LoggerFactory | |
34 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
35 from commons.core.checker.ConfigChecker import ConfigRules | |
36 from commons.core.checker.ConfigChecker import ConfigChecker | |
37 import subprocess | |
38 import glob | |
39 import os | |
40 | |
41 LOG_DEPTH = "repet.launchers" | |
42 | |
43 class LaunchTRF(object): | |
44 | |
45 def __init__(self, inFileName = "", outFileName = "", maxPeriod=15, doClean = False, verbosity = 0): | |
46 self.inFileName = inFileName | |
47 self.setOutFileName(outFileName) | |
48 self.maxPeriod=maxPeriod | |
49 self._doClean = doClean | |
50 self._verbosity = verbosity | |
51 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) | |
52 | |
53 def setAttributesFromCmdLine(self): | |
54 description = "Launch TRF to detect micro-satellites in sequences." | |
55 epilog = "\nExample 1: launch without verbosity and keep temporary files.\n" | |
56 epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0" | |
57 epilog += "\n\t" | |
58 parser = RepetOptionParser(description = description, epilog = epilog) | |
59 parser.add_option("-i", "--in", dest = "inFileName", action = "store", type = "string", help = "input file name [compulsory] [format: fasta]", default = "") | |
60 parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.TRF.set]", default = "") | |
61 parser.add_option("-m", "--maxPeriod", dest = "maxPeriod", action = "store", type = "int", help = " maximum period size to report [default: 15]", default = 15) | |
62 parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False) | |
63 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) | |
64 options = parser.parse_args()[0] | |
65 self._setAttributesFromOptions(options) | |
66 | |
67 def _setAttributesFromOptions(self, options): | |
68 self.setInFileName(options.inFileName) | |
69 self.setOutFileName(options.outFileName) | |
70 self.maxPeriod = options.maxPeriod | |
71 self.setDoClean(options.doClean) | |
72 self.setVerbosity(options.verbosity) | |
73 | |
74 def setInFileName(self, inFileName): | |
75 self.inFileName = inFileName | |
76 | |
77 def setOutFileName(self, outFileName): | |
78 if outFileName == "": | |
79 self._outFileName = "%s.TRF.set" % self.inFileName | |
80 else: | |
81 self._outFileName = outFileName | |
82 | |
83 def setDoClean(self, doClean): | |
84 self._doClean = doClean | |
85 | |
86 def setVerbosity(self, verbosity): | |
87 self._verbosity = verbosity | |
88 | |
89 def _checkOptions(self): | |
90 if self.inFileName == "": | |
91 self._logAndRaise("ERROR: Missing input file name") | |
92 | |
93 def _logAndRaise(self, errorMsg): | |
94 self._log.error(errorMsg) | |
95 raise Exception(errorMsg) | |
96 | |
97 | |
98 def _launchTRF(self): | |
99 cmd = "trf %s 2 3 5 80 10 20 %d -h -d" % (self.inFileName, self.maxPeriod) | |
100 self._log.debug("Running : %s" % cmd) | |
101 process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
102 output = process.communicate() | |
103 self._log.debug("Output:\n%s" % output[0]) | |
104 | |
105 def _parseTRF(self): | |
106 self._log.debug("Parsing TRF output") | |
107 with open( "%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod),'r') as inFile: | |
108 with open(self._outFileName,'w') as outFile: | |
109 nbPatterns = 0 | |
110 nbInSeq = 0 | |
111 for line in inFile.readlines(): | |
112 if line == "": | |
113 break | |
114 data = line.split(" ") | |
115 if len(data) > 1 and "Sequence:" in data[0]: | |
116 nbInSeq += 1 | |
117 seqName = data[1][:-1] | |
118 if len(data) < 14: | |
119 continue | |
120 nbPatterns += 1 | |
121 consensus = data[13] | |
122 copyNb = int( float(data[3]) + 0.5 ) | |
123 start = data[0] | |
124 end = data[1] | |
125 outFile.write( "%i\t(%s)%i\t%s\t%s\t%s\n" % ( nbPatterns, consensus, copyNb, seqName, start, end ) ) | |
126 self._log.debug("Finished Parsing TRF output") | |
127 | |
128 def _clean(self): | |
129 try: | |
130 os.remove("%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod)) | |
131 except:pass | |
132 | |
133 | |
134 def run(self): | |
135 """ | |
136 Launch TRF to detect micro-satellites in sequences. | |
137 """ | |
138 LoggerFactory.setLevel(self._log, self._verbosity) | |
139 self._checkOptions() | |
140 self._log.info("START Launch") | |
141 self._log.debug("Input file name: %s" % self.inFileName) | |
142 | |
143 self._launchTRF() | |
144 self._parseTRF() | |
145 | |
146 if self._doClean: | |
147 self._log.warning("Files will be cleaned") | |
148 self._clean() | |
149 self._log.info("END Launch") | |
150 | |
151 if __name__ == "__main__": | |
152 iLaunchTRF = LaunchTRF() | |
153 iLaunchTRF.setAttributesFromCmdLine() | |
154 iLaunchTRF.run() | |
155 |