comparison commons/launcher/LaunchTRF.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 #!/usr/bin/env python
2
3 # Copyright INRA (Institut National de la Recherche Agronomique)
4 # http://www.inra.fr
5 # http://urgi.versailles.inra.fr
6 #
7 # This software is governed by the CeCILL license under French law and
8 # abiding by the rules of distribution of free software. You can use,
9 # modify and/ or redistribute the software under the terms of the CeCILL
10 # license as circulated by CEA, CNRS and INRIA at the following URL
11 # "http://www.cecill.info".
12 #
13 # As a counterpart to the access to the source code and rights to copy,
14 # modify and redistribute granted by the license, users are provided only
15 # with a limited warranty and the software's author, the holder of the
16 # economic rights, and the successive licensors have only limited
17 # liability.
18 #
19 # In this respect, the user's attention is drawn to the risks associated
20 # with loading, using, modifying and/or developing or reproducing the
21 # software by the user in light of its specific status of free software,
22 # that may mean that it is complicated to manipulate, and that also
23 # therefore means that it is reserved for developers and experienced
24 # professionals having in-depth computer knowledge. Users are therefore
25 # encouraged to load and test the software's suitability as regards their
26 # requirements in conditions enabling the security of their systems and/or
27 # data to be ensured and, more generally, to use and operate it in the
28 # same conditions as regards security.
29 #
30 # The fact that you are presently reading this means that you have had
31 # knowledge of the CeCILL license and that you accept its terms.
32
33 from commons.core.LoggerFactory import LoggerFactory
34 from commons.core.utils.RepetOptionParser import RepetOptionParser
35 from commons.core.checker.ConfigChecker import ConfigRules
36 from commons.core.checker.ConfigChecker import ConfigChecker
37 import subprocess
38 import glob
39 import os
40
41 LOG_DEPTH = "repet.launchers"
42
43 class LaunchTRF(object):
44
45 def __init__(self, inFileName = "", outFileName = "", maxPeriod=15, doClean = False, verbosity = 0):
46 self.inFileName = inFileName
47 self.setOutFileName(outFileName)
48 self.maxPeriod=maxPeriod
49 self._doClean = doClean
50 self._verbosity = verbosity
51 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
52
53 def setAttributesFromCmdLine(self):
54 description = "Launch TRF to detect micro-satellites in sequences."
55 epilog = "\nExample 1: launch without verbosity and keep temporary files.\n"
56 epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0"
57 epilog += "\n\t"
58 parser = RepetOptionParser(description = description, epilog = epilog)
59 parser.add_option("-i", "--in", dest = "inFileName", action = "store", type = "string", help = "input file name [compulsory] [format: fasta]", default = "")
60 parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.TRF.set]", default = "")
61 parser.add_option("-m", "--maxPeriod", dest = "maxPeriod", action = "store", type = "int", help = " maximum period size to report [default: 15]", default = 15)
62 parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False)
63 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1)
64 options = parser.parse_args()[0]
65 self._setAttributesFromOptions(options)
66
67 def _setAttributesFromOptions(self, options):
68 self.setInFileName(options.inFileName)
69 self.setOutFileName(options.outFileName)
70 self.maxPeriod = options.maxPeriod
71 self.setDoClean(options.doClean)
72 self.setVerbosity(options.verbosity)
73
74 def setInFileName(self, inFileName):
75 self.inFileName = inFileName
76
77 def setOutFileName(self, outFileName):
78 if outFileName == "":
79 self._outFileName = "%s.TRF.set" % self.inFileName
80 else:
81 self._outFileName = outFileName
82
83 def setDoClean(self, doClean):
84 self._doClean = doClean
85
86 def setVerbosity(self, verbosity):
87 self._verbosity = verbosity
88
89 def _checkOptions(self):
90 if self.inFileName == "":
91 self._logAndRaise("ERROR: Missing input file name")
92
93 def _logAndRaise(self, errorMsg):
94 self._log.error(errorMsg)
95 raise Exception(errorMsg)
96
97
98 def _launchTRF(self):
99 cmd = "trf %s 2 3 5 80 10 20 %d -h -d" % (self.inFileName, self.maxPeriod)
100 self._log.debug("Running : %s" % cmd)
101 process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
102 output = process.communicate()
103 self._log.debug("Output:\n%s" % output[0])
104
105 def _parseTRF(self):
106 self._log.debug("Parsing TRF output")
107 with open( "%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod),'r') as inFile:
108 with open(self._outFileName,'w') as outFile:
109 nbPatterns = 0
110 nbInSeq = 0
111 for line in inFile.readlines():
112 if line == "":
113 break
114 data = line.split(" ")
115 if len(data) > 1 and "Sequence:" in data[0]:
116 nbInSeq += 1
117 seqName = data[1][:-1]
118 if len(data) < 14:
119 continue
120 nbPatterns += 1
121 consensus = data[13]
122 copyNb = int( float(data[3]) + 0.5 )
123 start = data[0]
124 end = data[1]
125 outFile.write( "%i\t(%s)%i\t%s\t%s\t%s\n" % ( nbPatterns, consensus, copyNb, seqName, start, end ) )
126 self._log.debug("Finished Parsing TRF output")
127
128 def _clean(self):
129 try:
130 os.remove("%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod))
131 except:pass
132
133
134 def run(self):
135 """
136 Launch TRF to detect micro-satellites in sequences.
137 """
138 LoggerFactory.setLevel(self._log, self._verbosity)
139 self._checkOptions()
140 self._log.info("START Launch")
141 self._log.debug("Input file name: %s" % self.inFileName)
142
143 self._launchTRF()
144 self._parseTRF()
145
146 if self._doClean:
147 self._log.warning("Files will be cleaned")
148 self._clean()
149 self._log.info("END Launch")
150
151 if __name__ == "__main__":
152 iLaunchTRF = LaunchTRF()
153 iLaunchTRF.setAttributesFromCmdLine()
154 iLaunchTRF.run()
155