Mercurial > repos > yufei-luo > s_mart
comparison commons/launcher/launchBlasterMatcherPerQuery.py @ 18:94ab73e8a190
Uploaded
| author | m-zytnicki |
|---|---|
| date | Mon, 29 Apr 2013 03:20:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 17:b0e8584489e6 | 18:94ab73e8a190 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards. | |
| 5 """ | |
| 6 | |
| 7 import os | |
| 8 import sys | |
| 9 import getopt | |
| 10 import exceptions | |
| 11 import logging | |
| 12 import ConfigParser | |
| 13 | |
| 14 if not os.environ.has_key( "REPET_PATH" ): | |
| 15 print "*** Error: no environment variable REPET_PATH" | |
| 16 sys.exit(1) | |
| 17 sys.path.append( os.environ["REPET_PATH"] ) | |
| 18 | |
| 19 import pyRepet.launcher.programLauncher | |
| 20 import pyRepet.seq.fastaDB | |
| 21 | |
| 22 #----------------------------------------------------------------------------- | |
| 23 | |
| 24 def help(): | |
| 25 | |
| 26 """ | |
| 27 Give the list of the command-line options. | |
| 28 """ | |
| 29 | |
| 30 print | |
| 31 print "usage:",sys.argv[0]," [ options ]" | |
| 32 print "options:" | |
| 33 print " -h: this help" | |
| 34 print " -q: fasta filename of the queries" | |
| 35 print " -s: fasta filename of the subjects (same as queries if not specified)" | |
| 36 print " -Q: queue name on the cluster" | |
| 37 print " -d: absolute path to the temporary directory" | |
| 38 print " -C: configuration file" | |
| 39 print " -n: max. number of jobs (default=10,given a min. of 1 query per job)" | |
| 40 print " -m: mix of Blaster and/or Matcher" | |
| 41 print " 1: launch Blaster only" | |
| 42 print " 2: launch Matcher only (on '*.align' query files)" | |
| 43 print " 3: launch Blaster+Matcher in the same job (default)" | |
| 44 print " -B: parameters for Blaster (e.g. \"-a -n tblastx\")" | |
| 45 print " -M: parameters for Matcher (e.g. \"-j\")" | |
| 46 print " -Z: collect all the results into a single file (format 'align', 'path' or 'tab')" | |
| 47 print " -c: clean" | |
| 48 print " -v: verbose (default=0/1/2)" | |
| 49 print | |
| 50 | |
| 51 #----------------------------------------------------------------------------- | |
| 52 | |
| 53 def main(): | |
| 54 | |
| 55 """ | |
| 56 This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards. | |
| 57 """ | |
| 58 | |
| 59 qryFileName = "" | |
| 60 sbjFileName = "" | |
| 61 queue = "" | |
| 62 tmpDir = "" | |
| 63 configFileName = "" | |
| 64 maxNbJobs = 10 | |
| 65 minQryPerJob = 1 | |
| 66 mix = "3" | |
| 67 paramBlaster = "" | |
| 68 paramMatcher = "" | |
| 69 collectFormat = "" | |
| 70 clean = False | |
| 71 verbose = 0 | |
| 72 | |
| 73 try: | |
| 74 opts, args = getopt.getopt(sys.argv[1:],"hq:s:Q:d:C:n:m:B:M:Z:cv:") | |
| 75 except getopt.GetoptError, err: | |
| 76 print str(err) | |
| 77 help() | |
| 78 sys.exit(1) | |
| 79 for o,a in opts: | |
| 80 if o == "-h": | |
| 81 help() | |
| 82 sys.exit(0) | |
| 83 elif o == "-q": | |
| 84 qryFileName = a | |
| 85 elif o == "-s": | |
| 86 sbjFileName = a | |
| 87 elif o == "-Q": | |
| 88 queue = a | |
| 89 elif o == "-d": | |
| 90 tmpDir = a | |
| 91 elif o == "-C": | |
| 92 configFileName = a | |
| 93 elif o == "-n": | |
| 94 maxNbJobs = int(a) | |
| 95 elif o == "-m": | |
| 96 mix = a | |
| 97 elif o == "-B": | |
| 98 paramBlaster = a | |
| 99 elif o == "-M": | |
| 100 paramMatcher = a | |
| 101 elif o == "-Z": | |
| 102 collectFormat = a | |
| 103 elif o == "-c": | |
| 104 clean = True | |
| 105 elif o == "-v": | |
| 106 verbose = int(a) | |
| 107 | |
| 108 if qryFileName == "" or configFileName == "" or collectFormat == "": | |
| 109 print "*** Error: missing compulsory options" | |
| 110 help() | |
| 111 sys.exit(1) | |
| 112 | |
| 113 if verbose > 0: | |
| 114 print "\nbeginning of %s" % (sys.argv[0].split("/")[-1]) | |
| 115 sys.stdout.flush() | |
| 116 | |
| 117 if not os.path.exists( qryFileName ): | |
| 118 print "*** Error: query file '%s' doesn't exist" % ( qryFileName ) | |
| 119 sys.exit(1) | |
| 120 if sbjFileName != "": | |
| 121 if not os.path.exists( sbjFileName ): | |
| 122 print "*** Error: subject file '%s' doesn't exist" % ( sbjFileName ) | |
| 123 sys.exit(1) | |
| 124 else: | |
| 125 sbjFileName = qryFileName | |
| 126 | |
| 127 pL = pyRepet.launcher.programLauncher.programLauncher() | |
| 128 | |
| 129 nbSeqQry = pyRepet.seq.fastaDB.dbSize( qryFileName ) | |
| 130 qryPerJob = nbSeqQry / float(maxNbJobs) | |
| 131 | |
| 132 # split the input query file in single files into a new directory | |
| 133 prg = os.environ["REPET_PATH"] + "/bin/dbSplit.py" | |
| 134 cmd = prg | |
| 135 cmd += " -i %s" % ( qryFileName ) | |
| 136 if qryPerJob <= 1.0: | |
| 137 cmd += " -n %i" % ( minQryPerJob ) | |
| 138 else: | |
| 139 cmd += " -n %i" % ( qryPerJob + 1 ) | |
| 140 cmd += " -d" | |
| 141 pL.launch( prg, cmd ) | |
| 142 | |
| 143 # prepare the subject databank | |
| 144 if sbjFileName != qryFileName: | |
| 145 prg = "blaster" | |
| 146 cmd = prg | |
| 147 cmd += " -q %s" % ( sbjFileName ) | |
| 148 cmd += " -P" | |
| 149 pL.launch( prg, cmd ) | |
| 150 | |
| 151 # launch Blaster+Matcher in parallel | |
| 152 prg = "srptBlasterMatcher.py" | |
| 153 cmd = prg | |
| 154 cmd += " -g %s_vs_%s" % ( qryFileName, sbjFileName ) | |
| 155 cmd += " -q %s/batches" % ( os.getcwd() ) | |
| 156 cmd += " -s %s/%s" % ( os.getcwd(), sbjFileName ) | |
| 157 cmd += " -Q '%s'" % ( queue ) | |
| 158 if tmpDir != "": | |
| 159 cmd += " -d %s" % ( tmpDir ) | |
| 160 cmd += " -m %s" % ( mix ) | |
| 161 if paramBlaster != "": | |
| 162 cmd += " -B \"%s\"" % ( paramBlaster ) | |
| 163 if paramMatcher != "": | |
| 164 cmd += " -M \"%s\"" % ( paramMatcher ) | |
| 165 cmd += " -Z %s" % ( collectFormat ) | |
| 166 cmd += " -C %s" % ( configFileName ) | |
| 167 if clean == True: | |
| 168 cmd += " -c" | |
| 169 cmd += " -v %i" % ( verbose - 1 ) | |
| 170 pL.launch( prg, cmd ) | |
| 171 | |
| 172 suffix = "" | |
| 173 if mix in ["2","3"]: | |
| 174 if "-a" in paramMatcher: | |
| 175 suffix = "match.%s" % ( collectFormat ) | |
| 176 else: | |
| 177 suffix = "clean_match.%s" % ( collectFormat ) | |
| 178 os.system( "mv %s_vs_%s.%s %s_vs_%s.align.%s" % ( qryFileName, sbjFileName, collectFormat, qryFileName, sbjFileName, suffix ) ) | |
| 179 | |
| 180 # clean | |
| 181 if clean == True: | |
| 182 prg = "rm" | |
| 183 cmd = prg | |
| 184 cmd += " -rf batches formatdb.log %s_cut* %s.Nstretch.map" % ( sbjFileName, sbjFileName ) | |
| 185 pL.launch( prg, cmd ) | |
| 186 | |
| 187 if verbose > 0: | |
| 188 print "%s finished successfully\n" % (sys.argv[0].split("/")[-1]) | |
| 189 sys.stdout.flush() | |
| 190 | |
| 191 return 0 | |
| 192 | |
| 193 #---------------------------------------------------------------------------- | |
| 194 | |
| 195 if __name__ == '__main__': | |
| 196 main() |
