Mercurial > repos > peterjc > tmhmm_and_signalp
annotate tools/protein_analysis/seq_analysis_utils.py @ 22:e1afa4b0b682 draft
"This is v0.2.12 with black formating and Python 3 next fix etc"
author | peterjc |
---|---|
date | Thu, 17 Jun 2021 08:34:58 +0000 |
parents | 238eae32483c |
children | e1996f0f4e85 |
rev | line source |
---|---|
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
1 """A few useful functions for working with FASTA files and running jobs. |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
2 |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
3 This module was originally written to hold common code used in both the TMHMM |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
4 and SignalP wrappers in Galaxy. |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
5 |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
6 Given Galaxy currently supports Python 2.4+ this cannot use the Python module |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
7 multiprocessing so the function run_jobs instead is a simple pool approach |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
8 using just the subprocess library. |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
9 """ |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
10 |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
11 from __future__ import print_function |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
12 |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
13 import os |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
14 import subprocess |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
15 import sys |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
16 |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
17 from time import sleep |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
18 |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
19 if sys.version_info[0] < 3: |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
20 range = xrange # noqa: F821 |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
21 |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
22 __version__ = "0.0.4" |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
23 |
9 | 24 try: |
25 from multiprocessing import cpu_count | |
26 except ImportError: | |
19 | 27 # Must be under Python 2.5, this is copied from multiprocessing: |
9 | 28 def cpu_count(): |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
29 """Return the number of CPUs in the system.""" |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
30 if sys.platform == "win32": |
9 | 31 try: |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
32 num = int(os.environ["NUMBER_OF_PROCESSORS"]) |
9 | 33 except (ValueError, KeyError): |
34 num = 0 | |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
35 elif "bsd" in sys.platform or sys.platform == "darwin": |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
36 comm = "/sbin/sysctl -n hw.ncpu" |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
37 if sys.platform == "darwin": |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
38 comm = "/usr" + comm |
9 | 39 try: |
40 with os.popen(comm) as p: | |
41 num = int(p.read()) | |
42 except ValueError: | |
43 num = 0 | |
44 else: | |
45 try: | |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
46 num = os.sysconf("SC_NPROCESSORS_ONLN") |
9 | 47 except (ValueError, OSError, AttributeError): |
48 num = 0 | |
49 | |
50 if num >= 1: | |
51 return num | |
52 else: | |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
53 raise NotImplementedError("cannot determine number of cpus") |
9 | 54 |
55 | |
56 def thread_count(command_line_arg, default=1): | |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
57 """Determine number of threads to use from the command line args.""" |
9 | 58 try: |
59 num = int(command_line_arg) | |
19 | 60 except ValueError: |
9 | 61 num = default |
62 if num < 1: | |
19 | 63 sys.exit("Threads argument %r is not a positive integer" % command_line_arg) |
64 # Cap this with the pysical limit of the machine, | |
9 | 65 try: |
66 num = min(num, cpu_count()) | |
67 except NotImplementedError: | |
68 pass | |
19 | 69 # For debugging, |
70 # hostname = os.environ.get("HOSTNAME", "this machine") | |
71 # sys.stderr.write("Using %i cores on %s\n" % (num, hostname)) | |
9 | 72 return num |
73 | |
74 | |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
75 def fasta_iterator(filename, max_len=None, truncate=None): |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
76 """Parse FASTA file yielding tuples of (name, sequence).""" |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
77 handle = open(filename) |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
78 title, seq = "", "" |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
79 for line in handle: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
80 if line.startswith(">"): |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
81 if title: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
82 if truncate: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
83 seq = seq[:truncate] |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
84 if max_len and len(seq) > max_len: |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
85 raise ValueError( |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
86 "Sequence %s is length %i, max length %i" |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
87 % (title.split()[0], len(seq), max_len) |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
88 ) |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
89 yield title, seq |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
90 title = line[1:].rstrip() |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
91 seq = "" |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
92 elif title: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
93 seq += line.strip() |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
94 elif not line.strip() or line.startswith("#"): |
19 | 95 # Ignore blank lines, and any comment lines |
96 # between records (starting with hash). | |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
97 pass |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
98 else: |
17
e6cc27d182a8
Uploaded v0.2.6, embedded citations and uses $GALAXY_SLOTS
peterjc
parents:
9
diff
changeset
|
99 handle.close() |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
100 raise ValueError("Bad FASTA line %r" % line) |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
101 handle.close() |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
102 if title: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
103 if truncate: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
104 seq = seq[:truncate] |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
105 if max_len and len(seq) > max_len: |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
106 raise ValueError( |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
107 "Sequence %s is length %i, max length %i" |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
108 % (title.split()[0], len(seq), max_len) |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
109 ) |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
110 yield title, seq |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
111 raise StopIteration |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
112 |
19 | 113 |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
114 def split_fasta( |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
115 input_filename, |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
116 output_filename_base, |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
117 n=500, |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
118 truncate=None, |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
119 keep_descr=False, |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
120 max_len=None, |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
121 ): |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
122 """Split FASTA file into sub-files each of at most n sequences. |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
123 |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
124 Returns a list of the filenames used (based on the input filename). |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
125 Each sequence can also be truncated (since we only need the start for |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
126 SignalP), and have its description discarded (since we don't usually |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
127 care about it and some tools don't like very long title lines). |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
128 |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
129 If a max_len is given and any sequence exceeds it no temp files are |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
130 created and an exception is raised. |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
131 """ |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
132 iterator = fasta_iterator(input_filename, max_len, truncate) |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
133 files = [] |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
134 try: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
135 while True: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
136 records = [] |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
137 for i in range(n): |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
138 try: |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
139 records.append(next(iterator)) |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
140 except StopIteration: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
141 break |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
142 if not records: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
143 break |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
144 new_filename = "%s.%i.tmp" % (output_filename_base, len(files)) |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
145 handle = open(new_filename, "w") |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
146 if keep_descr: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
147 for title, seq in records: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
148 handle.write(">%s\n" % title) |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
149 for i in range(0, len(seq), 60): |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
150 handle.write(seq[i : i + 60] + "\n") |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
151 else: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
152 for title, seq in records: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
153 handle.write(">%s\n" % title.split()[0]) |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
154 for i in range(0, len(seq), 60): |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
155 handle.write(seq[i : i + 60] + "\n") |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
156 handle.close() |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
157 files.append(new_filename) |
19 | 158 # print "%i records in %s" % (len(records), new_filename) |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
159 except ValueError as err: |
19 | 160 # Max length failure from parser - clean up |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
161 try: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
162 handle.close() |
19 | 163 except Exception: |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
164 pass |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
165 for f in files: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
166 if os.path.isfile(f): |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
167 os.remove(f) |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
168 raise err |
7
9b45a8743100
Uploaded v0.1.0, which adds a wrapper for Promoter 2.0 (DNA tool) and enables use of Galaxy's <parallelism> tag for SignalP, TMHMM X Promoter wrappers.
peterjc
parents:
6
diff
changeset
|
169 for f in files: |
9b45a8743100
Uploaded v0.1.0, which adds a wrapper for Promoter 2.0 (DNA tool) and enables use of Galaxy's <parallelism> tag for SignalP, TMHMM X Promoter wrappers.
peterjc
parents:
6
diff
changeset
|
170 assert os.path.isfile(f), "Missing split file %r (!??)" % f |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
171 return files |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
172 |
19 | 173 |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
174 def run_jobs(jobs, threads, pause=10, verbose=False, fast_fail=True): |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
175 """Take list of cmd strings, return dict with error levels.""" |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
176 pending = jobs[:] |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
177 running = [] |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
178 results = {} |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
179 skipped = [] |
9 | 180 if threads == 1: |
19 | 181 # Special case this for speed, don't need the waits |
9 | 182 for cmd in jobs: |
183 results[cmd] = subprocess.call(cmd, shell=True) | |
184 return results | |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
185 failed = False |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
186 while pending or running: |
19 | 187 # See if any have finished |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
188 for (cmd, process) in running: |
19 | 189 return_code = process.poll() # non-blocking |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
190 if return_code is not None: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
191 results[cmd] = return_code |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
192 if return_code: |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
193 failed = True |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
194 running = [(cmd, process) for (cmd, process) in running if cmd not in results] |
3
f3b373a41f81
Migrated tool version 0.0.6 from old tool shed archive to new tool shed repository
peterjc
parents:
0
diff
changeset
|
195 if verbose: |
21
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
196 print( |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
197 "%i jobs pending, %i running, %i completed" |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
198 % (len(pending), len(running), len(results)) |
238eae32483c
"Check this is up to date with all 2020 changes (black etc)"
peterjc
parents:
20
diff
changeset
|
199 ) |
19 | 200 # See if we can start any new threads |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
201 if pending and failed and fast_fail: |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
202 # Don't start any more jobs |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
203 if verbose: |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
204 print("Failed, will not start remaining %i jobs" % len(pending)) |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
205 skipped = pending |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
206 pending = [] |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
207 while pending and len(running) < threads: |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
208 cmd = pending.pop(0) |
3
f3b373a41f81
Migrated tool version 0.0.6 from old tool shed archive to new tool shed repository
peterjc
parents:
0
diff
changeset
|
209 if verbose: |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
210 print(cmd) |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
211 process = subprocess.Popen(cmd, shell=True) |
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
212 running.append((cmd, process)) |
19 | 213 # Loop... |
6
a290c6d4e658
Migrated tool version 0.0.9 from old tool shed archive to new tool shed repository
peterjc
parents:
3
diff
changeset
|
214 sleep(pause) |
3
f3b373a41f81
Migrated tool version 0.0.6 from old tool shed archive to new tool shed repository
peterjc
parents:
0
diff
changeset
|
215 if verbose: |
20
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
216 print("%i jobs completed" % len(results)) |
a19b3ded8f33
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents:
19
diff
changeset
|
217 assert set(jobs) == set(results).union(skipped) |
0
bca9bc7fdaef
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
218 return results |