annotate commons/launcher/launchBlasterMatcherPerQuery.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 import exceptions
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 import logging
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 import ConfigParser
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 if not os.environ.has_key( "REPET_PATH" ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 print "*** Error: no environment variable REPET_PATH"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 sys.path.append( os.environ["REPET_PATH"] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 import pyRepet.launcher.programLauncher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 import pyRepet.seq.fastaDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 #-----------------------------------------------------------------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 def help():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 Give the list of the command-line options.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 print "usage:",sys.argv[0]," [ options ]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 print " -q: fasta filename of the queries"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 print " -s: fasta filename of the subjects (same as queries if not specified)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 print " -Q: queue name on the cluster"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 print " -d: absolute path to the temporary directory"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 print " -C: configuration file"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 print " -n: max. number of jobs (default=10,given a min. of 1 query per job)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 print " -m: mix of Blaster and/or Matcher"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 print " 1: launch Blaster only"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 print " 2: launch Matcher only (on '*.align' query files)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 print " 3: launch Blaster+Matcher in the same job (default)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 print " -B: parameters for Blaster (e.g. \"-a -n tblastx\")"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 print " -M: parameters for Matcher (e.g. \"-j\")"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 print " -Z: collect all the results into a single file (format 'align', 'path' or 'tab')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 print " -c: clean"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 print " -v: verbose (default=0/1/2)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 #-----------------------------------------------------------------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 def main():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 qryFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 sbjFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 queue = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 tmpDir = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 configFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 maxNbJobs = 10
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 minQryPerJob = 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 mix = "3"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 paramBlaster = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 paramMatcher = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 collectFormat = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 clean = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 opts, args = getopt.getopt(sys.argv[1:],"hq:s:Q:d:C:n:m:B:M:Z:cv:")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 print str(err)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 elif o == "-q":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 qryFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 elif o == "-s":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 sbjFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 elif o == "-Q":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 queue = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 elif o == "-d":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 tmpDir = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 elif o == "-C":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 configFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 elif o == "-n":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 maxNbJobs = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 elif o == "-m":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 mix = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 elif o == "-B":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 paramBlaster = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 elif o == "-M":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 paramMatcher = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 elif o == "-Z":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 collectFormat = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 elif o == "-c":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 clean = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 verbose = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 if qryFileName == "" or configFileName == "" or collectFormat == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 print "*** Error: missing compulsory options"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 print "\nbeginning of %s" % (sys.argv[0].split("/")[-1])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 if not os.path.exists( qryFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 print "*** Error: query file '%s' doesn't exist" % ( qryFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 if sbjFileName != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 if not os.path.exists( sbjFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 print "*** Error: subject file '%s' doesn't exist" % ( sbjFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 sbjFileName = qryFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 pL = pyRepet.launcher.programLauncher.programLauncher()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 nbSeqQry = pyRepet.seq.fastaDB.dbSize( qryFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 qryPerJob = nbSeqQry / float(maxNbJobs)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 # split the input query file in single files into a new directory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 prg = os.environ["REPET_PATH"] + "/bin/dbSplit.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 cmd += " -i %s" % ( qryFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 if qryPerJob <= 1.0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 cmd += " -n %i" % ( minQryPerJob )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 cmd += " -n %i" % ( qryPerJob + 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 cmd += " -d"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 # prepare the subject databank
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 if sbjFileName != qryFileName:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 prg = "blaster"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 cmd += " -q %s" % ( sbjFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 cmd += " -P"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 # launch Blaster+Matcher in parallel
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 prg = "srptBlasterMatcher.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 cmd += " -g %s_vs_%s" % ( qryFileName, sbjFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 cmd += " -q %s/batches" % ( os.getcwd() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 cmd += " -s %s/%s" % ( os.getcwd(), sbjFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 cmd += " -Q '%s'" % ( queue )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 if tmpDir != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 cmd += " -d %s" % ( tmpDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 cmd += " -m %s" % ( mix )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 if paramBlaster != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 cmd += " -B \"%s\"" % ( paramBlaster )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 if paramMatcher != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 cmd += " -M \"%s\"" % ( paramMatcher )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 cmd += " -Z %s" % ( collectFormat )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 cmd += " -C %s" % ( configFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 if clean == True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 cmd += " -c"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 cmd += " -v %i" % ( verbose - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 suffix = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 if mix in ["2","3"]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 if "-a" in paramMatcher:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 suffix = "match.%s" % ( collectFormat )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 suffix = "clean_match.%s" % ( collectFormat )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 os.system( "mv %s_vs_%s.%s %s_vs_%s.align.%s" % ( qryFileName, sbjFileName, collectFormat, qryFileName, sbjFileName, suffix ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 # clean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 if clean == True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 prg = "rm"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 cmd += " -rf batches formatdb.log %s_cut* %s.Nstretch.map" % ( sbjFileName, sbjFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191 return 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 #----------------------------------------------------------------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 if __name__ == '__main__':
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 main()