18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards.
|
|
5 """
|
|
6
|
|
7 import os
|
|
8 import sys
|
|
9 import getopt
|
|
10 import exceptions
|
|
11 import logging
|
|
12 import ConfigParser
|
|
13
|
|
14 if not os.environ.has_key( "REPET_PATH" ):
|
|
15 print "*** Error: no environment variable REPET_PATH"
|
|
16 sys.exit(1)
|
|
17 sys.path.append( os.environ["REPET_PATH"] )
|
|
18
|
|
19 import pyRepet.launcher.programLauncher
|
|
20 import pyRepet.seq.fastaDB
|
|
21
|
|
22 #-----------------------------------------------------------------------------
|
|
23
|
|
24 def help():
|
|
25
|
|
26 """
|
|
27 Give the list of the command-line options.
|
|
28 """
|
|
29
|
|
30 print
|
|
31 print "usage:",sys.argv[0]," [ options ]"
|
|
32 print "options:"
|
|
33 print " -h: this help"
|
|
34 print " -q: fasta filename of the queries"
|
|
35 print " -s: fasta filename of the subjects (same as queries if not specified)"
|
|
36 print " -Q: queue name on the cluster"
|
|
37 print " -d: absolute path to the temporary directory"
|
|
38 print " -C: configuration file"
|
|
39 print " -n: max. number of jobs (default=10,given a min. of 1 query per job)"
|
|
40 print " -m: mix of Blaster and/or Matcher"
|
|
41 print " 1: launch Blaster only"
|
|
42 print " 2: launch Matcher only (on '*.align' query files)"
|
|
43 print " 3: launch Blaster+Matcher in the same job (default)"
|
|
44 print " -B: parameters for Blaster (e.g. \"-a -n tblastx\")"
|
|
45 print " -M: parameters for Matcher (e.g. \"-j\")"
|
|
46 print " -Z: collect all the results into a single file (format 'align', 'path' or 'tab')"
|
|
47 print " -c: clean"
|
|
48 print " -v: verbose (default=0/1/2)"
|
|
49 print
|
|
50
|
|
51 #-----------------------------------------------------------------------------
|
|
52
|
|
53 def main():
|
|
54
|
|
55 """
|
|
56 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards.
|
|
57 """
|
|
58
|
|
59 qryFileName = ""
|
|
60 sbjFileName = ""
|
|
61 queue = ""
|
|
62 tmpDir = ""
|
|
63 configFileName = ""
|
|
64 maxNbJobs = 10
|
|
65 minQryPerJob = 1
|
|
66 mix = "3"
|
|
67 paramBlaster = ""
|
|
68 paramMatcher = ""
|
|
69 collectFormat = ""
|
|
70 clean = False
|
|
71 verbose = 0
|
|
72
|
|
73 try:
|
|
74 opts, args = getopt.getopt(sys.argv[1:],"hq:s:Q:d:C:n:m:B:M:Z:cv:")
|
|
75 except getopt.GetoptError, err:
|
|
76 print str(err)
|
|
77 help()
|
|
78 sys.exit(1)
|
|
79 for o,a in opts:
|
|
80 if o == "-h":
|
|
81 help()
|
|
82 sys.exit(0)
|
|
83 elif o == "-q":
|
|
84 qryFileName = a
|
|
85 elif o == "-s":
|
|
86 sbjFileName = a
|
|
87 elif o == "-Q":
|
|
88 queue = a
|
|
89 elif o == "-d":
|
|
90 tmpDir = a
|
|
91 elif o == "-C":
|
|
92 configFileName = a
|
|
93 elif o == "-n":
|
|
94 maxNbJobs = int(a)
|
|
95 elif o == "-m":
|
|
96 mix = a
|
|
97 elif o == "-B":
|
|
98 paramBlaster = a
|
|
99 elif o == "-M":
|
|
100 paramMatcher = a
|
|
101 elif o == "-Z":
|
|
102 collectFormat = a
|
|
103 elif o == "-c":
|
|
104 clean = True
|
|
105 elif o == "-v":
|
|
106 verbose = int(a)
|
|
107
|
|
108 if qryFileName == "" or configFileName == "" or collectFormat == "":
|
|
109 print "*** Error: missing compulsory options"
|
|
110 help()
|
|
111 sys.exit(1)
|
|
112
|
|
113 if verbose > 0:
|
|
114 print "\nbeginning of %s" % (sys.argv[0].split("/")[-1])
|
|
115 sys.stdout.flush()
|
|
116
|
|
117 if not os.path.exists( qryFileName ):
|
|
118 print "*** Error: query file '%s' doesn't exist" % ( qryFileName )
|
|
119 sys.exit(1)
|
|
120 if sbjFileName != "":
|
|
121 if not os.path.exists( sbjFileName ):
|
|
122 print "*** Error: subject file '%s' doesn't exist" % ( sbjFileName )
|
|
123 sys.exit(1)
|
|
124 else:
|
|
125 sbjFileName = qryFileName
|
|
126
|
|
127 pL = pyRepet.launcher.programLauncher.programLauncher()
|
|
128
|
|
129 nbSeqQry = pyRepet.seq.fastaDB.dbSize( qryFileName )
|
|
130 qryPerJob = nbSeqQry / float(maxNbJobs)
|
|
131
|
|
132 # split the input query file in single files into a new directory
|
|
133 prg = os.environ["REPET_PATH"] + "/bin/dbSplit.py"
|
|
134 cmd = prg
|
|
135 cmd += " -i %s" % ( qryFileName )
|
|
136 if qryPerJob <= 1.0:
|
|
137 cmd += " -n %i" % ( minQryPerJob )
|
|
138 else:
|
|
139 cmd += " -n %i" % ( qryPerJob + 1 )
|
|
140 cmd += " -d"
|
|
141 pL.launch( prg, cmd )
|
|
142
|
|
143 # prepare the subject databank
|
|
144 if sbjFileName != qryFileName:
|
|
145 prg = "blaster"
|
|
146 cmd = prg
|
|
147 cmd += " -q %s" % ( sbjFileName )
|
|
148 cmd += " -P"
|
|
149 pL.launch( prg, cmd )
|
|
150
|
|
151 # launch Blaster+Matcher in parallel
|
|
152 prg = "srptBlasterMatcher.py"
|
|
153 cmd = prg
|
|
154 cmd += " -g %s_vs_%s" % ( qryFileName, sbjFileName )
|
|
155 cmd += " -q %s/batches" % ( os.getcwd() )
|
|
156 cmd += " -s %s/%s" % ( os.getcwd(), sbjFileName )
|
|
157 cmd += " -Q '%s'" % ( queue )
|
|
158 if tmpDir != "":
|
|
159 cmd += " -d %s" % ( tmpDir )
|
|
160 cmd += " -m %s" % ( mix )
|
|
161 if paramBlaster != "":
|
|
162 cmd += " -B \"%s\"" % ( paramBlaster )
|
|
163 if paramMatcher != "":
|
|
164 cmd += " -M \"%s\"" % ( paramMatcher )
|
|
165 cmd += " -Z %s" % ( collectFormat )
|
|
166 cmd += " -C %s" % ( configFileName )
|
|
167 if clean == True:
|
|
168 cmd += " -c"
|
|
169 cmd += " -v %i" % ( verbose - 1 )
|
|
170 pL.launch( prg, cmd )
|
|
171
|
|
172 suffix = ""
|
|
173 if mix in ["2","3"]:
|
|
174 if "-a" in paramMatcher:
|
|
175 suffix = "match.%s" % ( collectFormat )
|
|
176 else:
|
|
177 suffix = "clean_match.%s" % ( collectFormat )
|
|
178 os.system( "mv %s_vs_%s.%s %s_vs_%s.align.%s" % ( qryFileName, sbjFileName, collectFormat, qryFileName, sbjFileName, suffix ) )
|
|
179
|
|
180 # clean
|
|
181 if clean == True:
|
|
182 prg = "rm"
|
|
183 cmd = prg
|
|
184 cmd += " -rf batches formatdb.log %s_cut* %s.Nstretch.map" % ( sbjFileName, sbjFileName )
|
|
185 pL.launch( prg, cmd )
|
|
186
|
|
187 if verbose > 0:
|
|
188 print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
|
|
189 sys.stdout.flush()
|
|
190
|
|
191 return 0
|
|
192
|
|
193 #----------------------------------------------------------------------------
|
|
194
|
|
195 if __name__ == '__main__':
|
|
196 main()
|