comparison scripts/ReMatCh/modules/utils.py @ 0:965517909457 draft

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Wed, 22 Jan 2020 08:41:44 -0500
parents
children 0cbed1c0a762
comparison
equal deleted inserted replaced
-1:000000000000 0:965517909457
1 import pickle
2 import traceback
3 import shlex
4 import subprocess
5 from threading import Timer
6 import shutil
7 import time
8 import functools
9 import os.path
10 import sys
11
12
13 def start_logger(workdir):
14 time_str = time.strftime("%Y%m%d-%H%M%S")
15 sys.stdout = Logger(workdir, time_str)
16 logfile = sys.stdout.getLogFile()
17 return logfile, time_str
18
19
20 class Logger(object):
21 def __init__(self, out_directory, time_str):
22 self.logfile = os.path.join(out_directory, str('run.' + time_str + '.log'))
23 self.terminal = sys.stdout
24 self.log = open(self.logfile, "w")
25
26 def write(self, message):
27 self.terminal.write(message)
28 self.log.write(message)
29 self.log.flush()
30
31 def flush(self):
32 pass
33
34 def getLogFile(self):
35 return self.logfile
36
37
38 def get_cpu_information(outdir, time_str):
39 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.cpu.txt'), 'wt') as writer:
40 command = ['cat', '/proc/cpuinfo']
41 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, None, False)
42 if run_successfully:
43 writer.write(stdout)
44
45 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.slurm.txt'), 'wt') as writer:
46 for environment in sorted(os.environ):
47 if environment.startswith('SLURM_'):
48 writer.write('#' + environment + '\n' + os.environ[environment] + '\n')
49
50
51 def setPATHvariable(doNotUseProvidedSoftware, script_path):
52 path_variable = os.environ['PATH']
53 script_folder = os.path.dirname(script_path)
54 # Set path to use provided softwares
55 if not doNotUseProvidedSoftware:
56 bowtie2 = os.path.join(script_folder, 'src', 'bowtie2-2.2.9')
57 samtools = os.path.join(script_folder, 'src', 'samtools-1.3.1', 'bin')
58 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin')
59
60 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable]))
61
62 # Print PATH variable
63 print '\n' + 'PATH variable:'
64 print os.environ['PATH']
65
66
67 def checkPrograms(programs_version_dictionary):
68 print '\n' + 'Checking dependencies...'
69 programs = programs_version_dictionary
70 which_program = ['which', '']
71 listMissings = []
72 for program in programs:
73 which_program[1] = program
74 run_successfully, stdout, stderr = runCommandPopenCommunicate(which_program, False, None, False)
75 if not run_successfully:
76 listMissings.append(program + ' not found in PATH.')
77 else:
78 print stdout.splitlines()[0]
79 if programs[program][0] is None:
80 print program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0]
81 else:
82 if program.endswith('.jar'):
83 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]]
84 programs[program].append(stdout.splitlines()[0])
85 else:
86 check_version = [stdout.splitlines()[0], programs[program][0]]
87 run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False)
88 if stdout == '':
89 stdout = stderr
90 if program == 'wget':
91 version_line = stdout.splitlines()[0].split(' ', 3)[2]
92 else:
93 version_line = stdout.splitlines()[0].split(' ')[-1]
94 replace_characters = ['"', 'v', 'V', '+']
95 for i in replace_characters:
96 version_line = version_line.replace(i, '')
97 print program + ' (' + version_line + ') found'
98 if programs[program][1] == '>=':
99 program_found_version = version_line.split('.')
100 program_version_required = programs[program][2].split('.')
101 if len(program_version_required) == 3:
102 if len(program_found_version) == 2:
103 program_found_version.append(0)
104 else:
105 program_found_version[2] = program_found_version[2].split('_')[0]
106 for i in range(0, len(program_version_required)):
107 if int(program_found_version[i]) < int(program_version_required[i]):
108 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2])
109 else:
110 if version_line != programs[program][2]:
111 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2])
112 return listMissings
113
114
115 def requiredPrograms(asperaKey, downloadCramBam):
116 programs_version_dictionary = {}
117 programs_version_dictionary['wget'] = ['--version', '>=', '1.12']
118 programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9']
119 programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1']
120 programs_version_dictionary['bcftools'] = ['--version', '==', '1.3.1']
121 if asperaKey is not None:
122 programs_version_dictionary['ascp'] = ['--version', '>=', '3.6.1']
123 if downloadCramBam:
124 programs_version_dictionary['gzip'] = ['--version', '>=', '1.6']
125 missingPrograms = checkPrograms(programs_version_dictionary)
126 if len(missingPrograms) > 0:
127 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms))
128
129
130 def general_information(logfile, version, outdir, time_str, doNotUseProvidedSoftware, asperaKey, downloadCramBam):
131 # Check if output directory exists
132
133 print '\n' + '==========> ReMatCh <=========='
134 print '\n' + 'Program start: ' + time.ctime()
135
136 # Tells where the logfile will be stored
137 print '\n' + 'LOGFILE:'
138 print logfile
139
140 # Print command
141 print '\n' + 'COMMAND:'
142 script_path = os.path.abspath(sys.argv[0])
143 print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:])
144
145 # Print directory where programme was lunch
146 print '\n' + 'PRESENT DIRECTORY:'
147 present_directory = os.path.abspath(os.getcwd())
148 print present_directory
149
150 # Print program version
151 print '\n' + 'VERSION:'
152 scriptVersionGit(version, present_directory, script_path)
153
154 # Get CPU information
155 get_cpu_information(outdir, time_str)
156
157 # Set and print PATH variable
158 setPATHvariable(doNotUseProvidedSoftware, script_path)
159
160 # Check programms
161 requiredPrograms(asperaKey, downloadCramBam)
162
163 return script_path
164
165
166 def scriptVersionGit(version, directory, script_path):
167 print 'Version ' + version
168
169 try:
170 os.chdir(os.path.dirname(script_path))
171 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"']
172 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False)
173 print stdout
174 command = ['git', 'remote', 'show', 'origin']
175 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False)
176 print stdout
177 os.chdir(directory)
178 except:
179 print 'HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be obtained.'
180
181
182 def runTime(start_time):
183 end_time = time.time()
184 time_taken = end_time - start_time
185 hours, rest = divmod(time_taken, 3600)
186 minutes, seconds = divmod(rest, 60)
187 print 'Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's'
188 return round(time_taken, 2)
189
190
191 def timer(function, name):
192 @functools.wraps(function)
193 def wrapper(*args, **kwargs):
194 print('\n' + 'RUNNING {0}\n'.format(name))
195 start_time = time.time()
196
197 results = list(function(*args, **kwargs)) # guarantees return is a list to allow .insert()
198
199 time_taken = runTime(start_time)
200 print('END {0}'.format(name))
201
202 results.insert(0, time_taken)
203 return results
204 return wrapper
205
206
207 def removeDirectory(directory):
208 if os.path.isdir(directory):
209 shutil.rmtree(directory)
210
211
212 def saveVariableToPickle(variableToStore, outdir, prefix):
213 pickleFile = os.path.join(outdir, str(prefix + '.pkl'))
214 with open(pickleFile, 'wb') as writer:
215 pickle.dump(variableToStore, writer)
216
217
218 def extractVariableFromPickle(pickleFile):
219 with open(pickleFile, 'rb') as reader:
220 variable = pickle.load(reader)
221 return variable
222
223
224 def trace_unhandled_exceptions(func):
225 @functools.wraps(func)
226 def wrapped_func(*args, **kwargs):
227 try:
228 func(*args, **kwargs)
229 except:
230 print 'Exception in ' + func.__name__
231 traceback.print_exc()
232 return wrapped_func
233
234
235 def kill_subprocess_Popen(subprocess_Popen, command):
236 print 'Command run out of time: ' + str(command)
237 subprocess_Popen.kill()
238
239
240 def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True):
241 run_successfully = False
242 if not isinstance(command, basestring):
243 command = ' '.join(command)
244 command = shlex.split(command)
245
246 if print_comand_True:
247 print 'Running: ' + ' '.join(command)
248
249 if shell_True:
250 command = ' '.join(command)
251 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
252 else:
253 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
254
255 not_killed_by_timer = True
256 if timeout_sec_None is None:
257 stdout, stderr = proc.communicate()
258 else:
259 timer = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,))
260 timer.start()
261 stdout, stderr = proc.communicate()
262 timer.cancel()
263 not_killed_by_timer = timer.isAlive()
264
265 if proc.returncode == 0:
266 run_successfully = True
267 else:
268 if not print_comand_True and not_killed_by_timer:
269 print 'Running: ' + str(command)
270 if len(stdout) > 0:
271 print 'STDOUT'
272 print stdout.decode("utf-8")
273 if len(stderr) > 0:
274 print 'STDERR'
275 print stderr.decode("utf-8")
276 return run_successfully, stdout, stderr
277
278
279 def rchop(string, ending):
280 if string.endswith(ending):
281 string = string[:-len(ending)]
282 return string
283
284
285 def reverse_complement(seq):
286 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'}
287
288 reverse_complement = ''
289
290 seq = reversed(list(seq.upper()))
291
292 for base in seq:
293 reverse_complement += complement[base]
294
295 return reverse_complement