comparison commons/launcher/launchBlasterMatcherPerQuery.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 #!/usr/bin/env python
2
3 """
4 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards.
5 """
6
7 import os
8 import sys
9 import getopt
10 import exceptions
11 import logging
12 import ConfigParser
13
14 if not os.environ.has_key( "REPET_PATH" ):
15 print "*** Error: no environment variable REPET_PATH"
16 sys.exit(1)
17 sys.path.append( os.environ["REPET_PATH"] )
18
19 import pyRepet.launcher.programLauncher
20 import pyRepet.seq.fastaDB
21
22 #-----------------------------------------------------------------------------
23
24 def help():
25
26 """
27 Give the list of the command-line options.
28 """
29
30 print
31 print "usage:",sys.argv[0]," [ options ]"
32 print "options:"
33 print " -h: this help"
34 print " -q: fasta filename of the queries"
35 print " -s: fasta filename of the subjects (same as queries if not specified)"
36 print " -Q: queue name on the cluster"
37 print " -d: absolute path to the temporary directory"
38 print " -C: configuration file"
39 print " -n: max. number of jobs (default=10,given a min. of 1 query per job)"
40 print " -m: mix of Blaster and/or Matcher"
41 print " 1: launch Blaster only"
42 print " 2: launch Matcher only (on '*.align' query files)"
43 print " 3: launch Blaster+Matcher in the same job (default)"
44 print " -B: parameters for Blaster (e.g. \"-a -n tblastx\")"
45 print " -M: parameters for Matcher (e.g. \"-j\")"
46 print " -Z: collect all the results into a single file (format 'align', 'path' or 'tab')"
47 print " -c: clean"
48 print " -v: verbose (default=0/1/2)"
49 print
50
51 #-----------------------------------------------------------------------------
52
53 def main():
54
55 """
56 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards.
57 """
58
59 qryFileName = ""
60 sbjFileName = ""
61 queue = ""
62 tmpDir = ""
63 configFileName = ""
64 maxNbJobs = 10
65 minQryPerJob = 1
66 mix = "3"
67 paramBlaster = ""
68 paramMatcher = ""
69 collectFormat = ""
70 clean = False
71 verbose = 0
72
73 try:
74 opts, args = getopt.getopt(sys.argv[1:],"hq:s:Q:d:C:n:m:B:M:Z:cv:")
75 except getopt.GetoptError, err:
76 print str(err)
77 help()
78 sys.exit(1)
79 for o,a in opts:
80 if o == "-h":
81 help()
82 sys.exit(0)
83 elif o == "-q":
84 qryFileName = a
85 elif o == "-s":
86 sbjFileName = a
87 elif o == "-Q":
88 queue = a
89 elif o == "-d":
90 tmpDir = a
91 elif o == "-C":
92 configFileName = a
93 elif o == "-n":
94 maxNbJobs = int(a)
95 elif o == "-m":
96 mix = a
97 elif o == "-B":
98 paramBlaster = a
99 elif o == "-M":
100 paramMatcher = a
101 elif o == "-Z":
102 collectFormat = a
103 elif o == "-c":
104 clean = True
105 elif o == "-v":
106 verbose = int(a)
107
108 if qryFileName == "" or configFileName == "" or collectFormat == "":
109 print "*** Error: missing compulsory options"
110 help()
111 sys.exit(1)
112
113 if verbose > 0:
114 print "\nbeginning of %s" % (sys.argv[0].split("/")[-1])
115 sys.stdout.flush()
116
117 if not os.path.exists( qryFileName ):
118 print "*** Error: query file '%s' doesn't exist" % ( qryFileName )
119 sys.exit(1)
120 if sbjFileName != "":
121 if not os.path.exists( sbjFileName ):
122 print "*** Error: subject file '%s' doesn't exist" % ( sbjFileName )
123 sys.exit(1)
124 else:
125 sbjFileName = qryFileName
126
127 pL = pyRepet.launcher.programLauncher.programLauncher()
128
129 nbSeqQry = pyRepet.seq.fastaDB.dbSize( qryFileName )
130 qryPerJob = nbSeqQry / float(maxNbJobs)
131
132 # split the input query file in single files into a new directory
133 prg = os.environ["REPET_PATH"] + "/bin/dbSplit.py"
134 cmd = prg
135 cmd += " -i %s" % ( qryFileName )
136 if qryPerJob <= 1.0:
137 cmd += " -n %i" % ( minQryPerJob )
138 else:
139 cmd += " -n %i" % ( qryPerJob + 1 )
140 cmd += " -d"
141 pL.launch( prg, cmd )
142
143 # prepare the subject databank
144 if sbjFileName != qryFileName:
145 prg = "blaster"
146 cmd = prg
147 cmd += " -q %s" % ( sbjFileName )
148 cmd += " -P"
149 pL.launch( prg, cmd )
150
151 # launch Blaster+Matcher in parallel
152 prg = "srptBlasterMatcher.py"
153 cmd = prg
154 cmd += " -g %s_vs_%s" % ( qryFileName, sbjFileName )
155 cmd += " -q %s/batches" % ( os.getcwd() )
156 cmd += " -s %s/%s" % ( os.getcwd(), sbjFileName )
157 cmd += " -Q '%s'" % ( queue )
158 if tmpDir != "":
159 cmd += " -d %s" % ( tmpDir )
160 cmd += " -m %s" % ( mix )
161 if paramBlaster != "":
162 cmd += " -B \"%s\"" % ( paramBlaster )
163 if paramMatcher != "":
164 cmd += " -M \"%s\"" % ( paramMatcher )
165 cmd += " -Z %s" % ( collectFormat )
166 cmd += " -C %s" % ( configFileName )
167 if clean == True:
168 cmd += " -c"
169 cmd += " -v %i" % ( verbose - 1 )
170 pL.launch( prg, cmd )
171
172 suffix = ""
173 if mix in ["2","3"]:
174 if "-a" in paramMatcher:
175 suffix = "match.%s" % ( collectFormat )
176 else:
177 suffix = "clean_match.%s" % ( collectFormat )
178 os.system( "mv %s_vs_%s.%s %s_vs_%s.align.%s" % ( qryFileName, sbjFileName, collectFormat, qryFileName, sbjFileName, suffix ) )
179
180 # clean
181 if clean == True:
182 prg = "rm"
183 cmd = prg
184 cmd += " -rf batches formatdb.log %s_cut* %s.Nstretch.map" % ( sbjFileName, sbjFileName )
185 pL.launch( prg, cmd )
186
187 if verbose > 0:
188 print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
189 sys.stdout.flush()
190
191 return 0
192
193 #----------------------------------------------------------------------------
194
195 if __name__ == '__main__':
196 main()