comparison commons/launcher/LaunchNucmer.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 #! /usr/bin/env python
2
3 # Copyright INRA (Institut National de la Recherche Agronomique)
4 # http://www.inra.fr
5 # http://urgi.versailles.inra.fr
6 #
7 # This software is governed by the CeCILL license under French law and
8 # abiding by the rules of distribution of free software. You can use,
9 # modify and/ or redistribute the software under the terms of the CeCILL
10 # license as circulated by CEA, CNRS and INRIA at the following URL
11 # "http://www.cecill.info".
12 #
13 # As a counterpart to the access to the source code and rights to copy,
14 # modify and redistribute granted by the license, users are provided only
15 # with a limited warranty and the software's author, the holder of the
16 # economic rights, and the successive licensors have only limited
17 # liability.
18 #
19 # In this respect, the user's attention is drawn to the risks associated
20 # with loading, using, modifying and/or developing or reproducing the
21 # software by the user in light of its specific status of free software,
22 # that may mean that it is complicated to manipulate, and that also
23 # therefore means that it is reserved for developers and experienced
24 # professionals having in-depth computer knowledge. Users are therefore
25 # encouraged to load and test the software's suitability as regards their
26 # requirements in conditions enabling the security of their systems and/or
27 # data to be ensured and, more generally, to use and operate it in the
28 # same conditions as regards security.
29 #
30 # The fact that you are presently reading this means that you have had
31 # knowledge of the CeCILL license and that you accept its terms.
32
33 from commons.core.checker.CheckerUtils import CheckerUtils
34 from commons.core.utils.FileUtils import FileUtils
35 from commons.core.utils.RepetOptionParser import RepetOptionParser
36 import subprocess
37 from commons.core.LoggerFactory import LoggerFactory
38 import os
39
40 LOG_DEPTH = "repet.tools"
41
42 class LaunchNucmer(object):
43
44 def __init__(self,queryFileName="", refFileName ="", prefix = None, genCoords=False, showCoords = False, mum=False, maxGaps=90, minMatch=20, nooptimize=False,mincluster=65, minIdentity=50, minLength=100, verbosity=0):
45 self._queryFileName = queryFileName
46 self._refFileName = refFileName
47 self._prefix = prefix
48 self._genCoords = genCoords
49 self._showCoords = showCoords
50 self._mum = mum
51 self._maxgaps = maxGaps
52 self._minMatch = minMatch
53 self._nooptimize = nooptimize
54 self._mincluster = mincluster
55 self._minIdentity = minIdentity
56 self._minLength = minLength
57 self.verbosity = verbosity
58 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
59
60 def setMincluster(self, value):
61 self._mincluster = value
62 def getMincluster(self):
63 return self._mincluster
64
65 mincluster = property(getMincluster, setMincluster)
66
67 def setAttributesFromCmdLine(self):
68 description = "LaunchNucmer runs the Nucmer program (part of the mummer package) ."
69 parser = RepetOptionParser(description = description)
70 parser.add_option("-q", "--query", dest="queryFileName", default = "", action="store", type="string", help="input query file [compulsory] [format: fasta]")
71 parser.add_option("-r", "--ref", dest="refFileName", default = "", action="store", type="string", help="input ref file [compulsory] [format: fasta]")
72 parser.add_option("-p", "--prefix", dest="prefix", default = None, action="store", type="string", help="prefix name [optional]")
73 parser.add_option("-o","--gencoords", dest="genCoords",action="store_true", help="generate coords file with minimal option (show-coords -r) [optional] ")
74 parser.add_option("-s","--showcoords", dest="showCoords",action="store_true", help="generate coords file with: show-coords -r -c -l -d -I 50 -L 100 -T [optional] ")
75 parser.add_option("-m", "--mum", dest="mum", action="store_true", help="Use anchor matches that are unique in both the reference and query [optional] ")
76 parser.add_option("-g", "--maxgaps", dest="maxgaps", default = 90, action="store", type="int", help="Maximum gap between two adjacent matches in a cluster (default 90) [optional] ")
77 parser.add_option("-l", "--minmatch", dest="minMatch", default = 20, action="store", type="int", help="Minimum length of an maximal exact match (default 20) [optional] ")
78 parser.add_option("-n", "--nooptimize", dest="nooptimize", action="store_true", help="nooptimize (default --optimize) [optional] ")
79 parser.add_option("-j", "--mincluster", dest="mincluster", default = 65, action="store", type="int", help="Minimum length of a cluster of matches (default 65) [optional] ")
80
81 parser.add_option("-i", "--minIdentity", dest="minIdentity", default = 50, action="store", type="int", help="Minimum identity for show_coords (default 50) [optional] ")
82 parser.add_option("-u", "--minLength", dest="minLength", default = 100, action="store", type="int", help="Minimum alignment length for show_coords (default 100) [optional] ")
83 parser.add_option("-v", "--verbosity", dest="verbosity", default = 0, action="store", type="int", help="verbosity [optional] ")
84
85 (self._options, args) = parser.parse_args()
86 self._setAttributesFromOptions(self._options)
87
88 def _setAttributesFromOptions(self, options):
89 self._queryFileName = options.queryFileName
90 self._refFileName = options.refFileName
91 self._prefix = options.prefix
92 self._genCoords = options.genCoords
93 self._showCoords = options.showCoords
94 self._mum = options.mum
95 self._maxgaps = options.maxgaps
96 self._minMatch = options.minMatch
97 self._nooptimize = options.nooptimize
98 self._mincluster = options.mincluster
99
100 self._minIdentity = options.minIdentity
101 self._minLength = options.minLength
102
103 self.verbosity = options.verbosity
104
105 def _logAndRaise(self, errorMsg):
106 self._log.error(errorMsg)
107 raise Exception(errorMsg)
108
109 def checkOptions(self):
110 if self._queryFileName != "":
111 if not FileUtils.isRessourceExists(self._queryFileName):
112 self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName)
113 else:
114 self._logAndRaise("ERROR: No specified --query option!")
115
116 if self._refFileName != "":
117 if not FileUtils.isRessourceExists(self._refFileName):
118 self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName)
119 else:
120 self._logAndRaise("ERROR: No specified --ref option!")
121
122 def run(self):
123 LoggerFactory.setLevel(self._log, self.verbosity)
124 if not CheckerUtils.isExecutableInUserPath("nucmer") :
125 self._logAndRaise("ERROR: nucmer must be in your path")
126 self.checkOptions()
127
128 genCoords = ""
129 if self._genCoords:
130 genCoords = "-o"
131 mum = ""
132 if self._mum:
133 mum = "--mum"
134 nooptimize = "--optimize"
135 if self._nooptimize:
136 nooptimize = "--nooptimize"
137 prefix = ""
138 if self._prefix is not None:
139 prefix = "--prefix=%s" %(self._prefix)
140 cmd = "nucmer %s %s %s %s %s -g=%d -l=%d %s -c=%d" % (self._refFileName,self._queryFileName, prefix, genCoords, mum, self._maxgaps, self._minMatch, nooptimize, self._mincluster)
141 self._log.debug("Running nucmer with following commands : %s" %cmd)
142 cmd = cmd.split()
143 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
144 process.wait()
145
146 if self._showCoords:
147 #use of os.system because redirect on process is broken in python < 3.0
148 cmd = "show-coords -r -c -l -d -I %d -L %d -T %s.delta > %s.coords" % (self._minIdentity, self._minLength, self._prefix, self._prefix)
149 self._log.debug("Running show-coords with following commands : %s" %cmd)
150 os.system(cmd)
151
152
153 return process.returncode
154
155 if __name__ == "__main__":
156 iLaunchNucmer = LaunchNucmer()
157 iLaunchNucmer.setAttributesFromCmdLine()
158 iLaunchNucmer.run()