annotate ctat_trinity_wrapper.py @ 0:045dadbbb0a2 draft default tip

Upload ctat tools.
author trinity_ctat
date Tue, 17 Jul 2018 11:50:42 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
1 #!/usr/bin/env python
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
2
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
3 '''
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
4 trinity_runner.py
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
5 This program is used as a wrapper for Trinity to allow an automatic rerun of failed jobs. It takes arguments for a typical Trinity run:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
6 ~ Required args ~
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
7 Input files - single or paired (left and right)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
8 File type (fasta, fastq)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
9 Max memory - this I need to derive somehow from the dynamic runner using Galaxy slots
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
10
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
11 ~ Optional args ~
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
12 Output directory - this allows users to run the same job over in case it walltime'd out or failed for recoverable reasons.
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
13
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
14 --
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
15 Created Tuesday, 7 March 2017.
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
16 Carrie Ganote
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
17
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
18 Licensed to Indiana University under Creative Commons 3.0
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
19 '''
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
20 import subprocess32
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
21 import argparse
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
22 import logging as log
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
23 import sys
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
24 import os
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
25 import errno
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
26 from datetime import datetime
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
27
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
28 TRINITY_OUT_DIR = "trinity_out_dir"
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
29
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
30 def main(*args):
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
31 parser = argparse.ArgumentParser(description="")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
32 parser.add_argument("-o","--output", help="Name of output directory")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
33 parser.add_argument("-q","--seqType", help="Type of reads; fa or fq")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
34 parser.add_argument("-m","--max_memory", help="How much memory to allocate? Or maybe how many cpus?")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
35 parser.add_argument("-p","--mem_per_cpu", help="Memory PER CPU, in GB, in case we want to multiply mem x cpu at runtime")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
36 parser.add_argument("-s","--single", help="Single read file input")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
37 parser.add_argument("-l","--left", help="Left read file from paired inputs")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
38 parser.add_argument("-r","--right", help="Right read file from paired inputs")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
39 parser.add_argument("-v","--verbose", help="Enable debugging messages to be displayed", action='store_true')
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
40 parser.add_argument("-g","--log", help="Log file")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
41 parser.add_argument("-t","--timing", help="Timing file, if it exists", default=None)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
42 parser.add_argument("-d","--dir", help="if supplying a rerunnable job, this is the (hopefully unique) name of the directory to run it in.")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
43 parser.add_argument("-u","--user", help="Username to run job under")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
44 parser.add_argument("-f","--fullpath", help="if supplying a rerunnable job, this is the full path (except the user and dir names) to run the job in.")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
45 parser.add_argument("-c","--CPU", help="CPUs, either a hard coded numer or from Galaxy slots")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
46 # parser.add_argument("-","--", help="")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
47 args = parser.parse_args()
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
48
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
49 if args.verbose:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
50 log.basicConfig(format='%(message)s',level=log.DEBUG)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
51 cmd = ["Trinity"]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
52
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
53 ### Add rerun ability ###########################################
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
54 # This variable tells us later whether to copy the files back to the job working directory
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
55 copyback = False
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
56 if args.dir and args.user and args.fullpath:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
57 cleandir = args.dir
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
58 chars = "\\`*_{}[]()>#+-.!$&;| "
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
59 for c in chars:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
60 if c in cleandir:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
61 cleandir = cleandir.replace(c, "_")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
62 rerunPath = "%s/%s/%s" % (args.fullpath, args.user, cleandir)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
63 print "Rerunpath is ",rerunPath
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
64 try:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
65 os.makedirs(rerunPath)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
66 print "Created dir ",rerunPath
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
67 except OSError as exc:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
68 if exc.errno == errno.EEXIST and os.path.isdir(rerunPath):
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
69 pass
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
70 else:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
71 raise
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
72 copyback = os.getcwd()
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
73 outdir = copyback + "/" + TRINITY_OUT_DIR
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
74 try:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
75 os.makedirs(outdir)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
76 print "Created dir ",outdir
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
77 except OSError as exc:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
78 if exc.errno == errno.EEXIST and os.path.isdir(outdir):
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
79 pass
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
80 else:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
81 raise
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
82 os.chdir(rerunPath)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
83
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
84 ### Add information for reads ###################################
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
85 if args.left and args.right:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
86 cmd += ["--left",args.left,"--right", args.right]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
87 elif args.single:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
88 cmd += ["--single",args.single]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
89 else:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
90 raise Exception ("Need input files in order to run Trinity!")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
91
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
92 ### Add seqtype ##################################################
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
93 if args.seqType:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
94 cmd += ["--seqType",args.seqType]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
95 else:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
96 raise Exception ("Please specify a file type for your reads!")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
97
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
98 ### Memory and CPU management ####################################
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
99 if args.mem_per_cpu and not args.max_memory:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
100 if args.CPU:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
101 memry = int(args.CPU) * int(args.mem_per_cpu)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
102 memstr = "%dG" % (memry)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
103 cmd += ["--max_memory",memstr]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
104 else:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
105 memry = 2 * int(args.mem_per_cpu)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
106 memstr = "%dG" % (memry)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
107 cmd += ["--max_memory",memstr]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
108 elif args.max_memory and not args.mem_per_cpu:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
109 cmd += ["--max_memory",args.max_memory]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
110 else:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
111 raise Exception ("Please pick Memory per cpu, or max mem, but not both.")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
112 if args.CPU:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
113 cmd += ["--CPU", args.CPU]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
114
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
115 ### Enough args, let's run it ####################################
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
116 print "About to write to %s" % args.log
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
117 out = open(args.log, 'w')
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
118 totalattempts = attempts = 2
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
119 ec = 1
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
120 finish = 1
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
121 out.write("Command is:\n%s\n" % (" ".join(cmd)))
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
122
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
123 ### There is definitely some value in running the job more than once, especially if it dies for stupid reasons.. ###
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
124 while ec != 0 and attempts > 0 and finish != 0:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
125
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
126 dt = datetime.now()
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
127 dtstr = dt.strftime("%d/%m/%y %H:%M")
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
128 out.write("Beginning attempt %d of Trinity job at %s\n" % (totalattempts - attempts +1, dtstr) )
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
129 attempts -= 1
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
130 ec = subprocess32.call(cmd, shell=False, stdin=None, stdout=out, stderr=out, timeout=None)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
131 out.write("Trinity exited with status %d\n" % ec)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
132
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
133 greplog = open("greplog", 'w')
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
134 cmds = ["grep", 'All commands completed successfully', args.log]
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
135 finish = subprocess32.call(cmds,shell=False, stdin=None, stdout=greplog, stderr=greplog, timeout=None)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
136 greplog.close()
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
137 out.write("Finished and found the success command with grep code %d\n" % finish)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
138
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
139 if ec == 0 and args.timing is not None:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
140 if copyback is not False:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
141 cwd = os.getcwd()
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
142 dest = copyback + "/" + TRINITY_OUT_DIR + "/Trinity.fasta"
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
143 src = cwd + "/" + TRINITY_OUT_DIR + "/Trinity.fasta"
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
144 print "copying trinity outputs from %s to %s" % (src, dest)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
145 os.symlink(src, dest)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
146
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
147 #copy the timing file into the log
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
148 try:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
149 handle = open (args.timing, 'r')
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
150 for line in handle:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
151 out.write(line)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
152 handle.close()
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
153 except (OSError, IOError) as e:
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
154 print "Oops, no timing file found? ",e
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
155
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
156
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
157 out.close()
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
158 exit (ec)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
159
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
160 if __name__ == "__main__":
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
161 main(*sys.argv)
045dadbbb0a2 Upload ctat tools.
trinity_ctat
parents:
diff changeset
162