Mercurial > repos > yufei-luo > s_mart
comparison commons/launcher/launchBlasterMatcherPerQuery.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards. | |
5 """ | |
6 | |
7 import os | |
8 import sys | |
9 import getopt | |
10 import exceptions | |
11 import logging | |
12 import ConfigParser | |
13 | |
14 if not os.environ.has_key( "REPET_PATH" ): | |
15 print "*** Error: no environment variable REPET_PATH" | |
16 sys.exit(1) | |
17 sys.path.append( os.environ["REPET_PATH"] ) | |
18 | |
19 import pyRepet.launcher.programLauncher | |
20 import pyRepet.seq.fastaDB | |
21 | |
22 #----------------------------------------------------------------------------- | |
23 | |
24 def help(): | |
25 | |
26 """ | |
27 Give the list of the command-line options. | |
28 """ | |
29 | |
30 print | |
31 print "usage:",sys.argv[0]," [ options ]" | |
32 print "options:" | |
33 print " -h: this help" | |
34 print " -q: fasta filename of the queries" | |
35 print " -s: fasta filename of the subjects (same as queries if not specified)" | |
36 print " -Q: queue name on the cluster" | |
37 print " -d: absolute path to the temporary directory" | |
38 print " -C: configuration file" | |
39 print " -n: max. number of jobs (default=10,given a min. of 1 query per job)" | |
40 print " -m: mix of Blaster and/or Matcher" | |
41 print " 1: launch Blaster only" | |
42 print " 2: launch Matcher only (on '*.align' query files)" | |
43 print " 3: launch Blaster+Matcher in the same job (default)" | |
44 print " -B: parameters for Blaster (e.g. \"-a -n tblastx\")" | |
45 print " -M: parameters for Matcher (e.g. \"-j\")" | |
46 print " -Z: collect all the results into a single file (format 'align', 'path' or 'tab')" | |
47 print " -c: clean" | |
48 print " -v: verbose (default=0/1/2)" | |
49 print | |
50 | |
51 #----------------------------------------------------------------------------- | |
52 | |
53 def main(): | |
54 | |
55 """ | |
56 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards. | |
57 """ | |
58 | |
59 qryFileName = "" | |
60 sbjFileName = "" | |
61 queue = "" | |
62 tmpDir = "" | |
63 configFileName = "" | |
64 maxNbJobs = 10 | |
65 minQryPerJob = 1 | |
66 mix = "3" | |
67 paramBlaster = "" | |
68 paramMatcher = "" | |
69 collectFormat = "" | |
70 clean = False | |
71 verbose = 0 | |
72 | |
73 try: | |
74 opts, args = getopt.getopt(sys.argv[1:],"hq:s:Q:d:C:n:m:B:M:Z:cv:") | |
75 except getopt.GetoptError, err: | |
76 print str(err) | |
77 help() | |
78 sys.exit(1) | |
79 for o,a in opts: | |
80 if o == "-h": | |
81 help() | |
82 sys.exit(0) | |
83 elif o == "-q": | |
84 qryFileName = a | |
85 elif o == "-s": | |
86 sbjFileName = a | |
87 elif o == "-Q": | |
88 queue = a | |
89 elif o == "-d": | |
90 tmpDir = a | |
91 elif o == "-C": | |
92 configFileName = a | |
93 elif o == "-n": | |
94 maxNbJobs = int(a) | |
95 elif o == "-m": | |
96 mix = a | |
97 elif o == "-B": | |
98 paramBlaster = a | |
99 elif o == "-M": | |
100 paramMatcher = a | |
101 elif o == "-Z": | |
102 collectFormat = a | |
103 elif o == "-c": | |
104 clean = True | |
105 elif o == "-v": | |
106 verbose = int(a) | |
107 | |
108 if qryFileName == "" or configFileName == "" or collectFormat == "": | |
109 print "*** Error: missing compulsory options" | |
110 help() | |
111 sys.exit(1) | |
112 | |
113 if verbose > 0: | |
114 print "\nbeginning of %s" % (sys.argv[0].split("/")[-1]) | |
115 sys.stdout.flush() | |
116 | |
117 if not os.path.exists( qryFileName ): | |
118 print "*** Error: query file '%s' doesn't exist" % ( qryFileName ) | |
119 sys.exit(1) | |
120 if sbjFileName != "": | |
121 if not os.path.exists( sbjFileName ): | |
122 print "*** Error: subject file '%s' doesn't exist" % ( sbjFileName ) | |
123 sys.exit(1) | |
124 else: | |
125 sbjFileName = qryFileName | |
126 | |
127 pL = pyRepet.launcher.programLauncher.programLauncher() | |
128 | |
129 nbSeqQry = pyRepet.seq.fastaDB.dbSize( qryFileName ) | |
130 qryPerJob = nbSeqQry / float(maxNbJobs) | |
131 | |
132 # split the input query file in single files into a new directory | |
133 prg = os.environ["REPET_PATH"] + "/bin/dbSplit.py" | |
134 cmd = prg | |
135 cmd += " -i %s" % ( qryFileName ) | |
136 if qryPerJob <= 1.0: | |
137 cmd += " -n %i" % ( minQryPerJob ) | |
138 else: | |
139 cmd += " -n %i" % ( qryPerJob + 1 ) | |
140 cmd += " -d" | |
141 pL.launch( prg, cmd ) | |
142 | |
143 # prepare the subject databank | |
144 if sbjFileName != qryFileName: | |
145 prg = "blaster" | |
146 cmd = prg | |
147 cmd += " -q %s" % ( sbjFileName ) | |
148 cmd += " -P" | |
149 pL.launch( prg, cmd ) | |
150 | |
151 # launch Blaster+Matcher in parallel | |
152 prg = "srptBlasterMatcher.py" | |
153 cmd = prg | |
154 cmd += " -g %s_vs_%s" % ( qryFileName, sbjFileName ) | |
155 cmd += " -q %s/batches" % ( os.getcwd() ) | |
156 cmd += " -s %s/%s" % ( os.getcwd(), sbjFileName ) | |
157 cmd += " -Q '%s'" % ( queue ) | |
158 if tmpDir != "": | |
159 cmd += " -d %s" % ( tmpDir ) | |
160 cmd += " -m %s" % ( mix ) | |
161 if paramBlaster != "": | |
162 cmd += " -B \"%s\"" % ( paramBlaster ) | |
163 if paramMatcher != "": | |
164 cmd += " -M \"%s\"" % ( paramMatcher ) | |
165 cmd += " -Z %s" % ( collectFormat ) | |
166 cmd += " -C %s" % ( configFileName ) | |
167 if clean == True: | |
168 cmd += " -c" | |
169 cmd += " -v %i" % ( verbose - 1 ) | |
170 pL.launch( prg, cmd ) | |
171 | |
172 suffix = "" | |
173 if mix in ["2","3"]: | |
174 if "-a" in paramMatcher: | |
175 suffix = "match.%s" % ( collectFormat ) | |
176 else: | |
177 suffix = "clean_match.%s" % ( collectFormat ) | |
178 os.system( "mv %s_vs_%s.%s %s_vs_%s.align.%s" % ( qryFileName, sbjFileName, collectFormat, qryFileName, sbjFileName, suffix ) ) | |
179 | |
180 # clean | |
181 if clean == True: | |
182 prg = "rm" | |
183 cmd = prg | |
184 cmd += " -rf batches formatdb.log %s_cut* %s.Nstretch.map" % ( sbjFileName, sbjFileName ) | |
185 pL.launch( prg, cmd ) | |
186 | |
187 if verbose > 0: | |
188 print "%s finished successfully\n" % (sys.argv[0].split("/")[-1]) | |
189 sys.stdout.flush() | |
190 | |
191 return 0 | |
192 | |
193 #---------------------------------------------------------------------------- | |
194 | |
195 if __name__ == '__main__': | |
196 main() |