Mercurial > repos > peterjc > tmhmm_and_signalp
diff tools/protein_analysis/seq_analysis_utils.py @ 9:e52220a9ddad draft
Uploaded v0.1.2
Use the new <stdio> settings in the XML wrappers to catch errors.
Obeys SGE style XNSLOTS environment variable for thread count (otherwise default to 4).
author | peterjc |
---|---|
date | Fri, 25 Jan 2013 06:08:31 -0500 |
parents | 9b45a8743100 |
children | e6cc27d182a8 |
line wrap: on
line diff
--- a/tools/protein_analysis/seq_analysis_utils.py Mon Jul 30 12:56:54 2012 -0400 +++ b/tools/protein_analysis/seq_analysis_utils.py Fri Jan 25 06:08:31 2013 -0500 @@ -19,6 +19,56 @@ sys.stderr.write("%s\n" % msg) sys.exit(error_level) +try: + from multiprocessing import cpu_count +except ImportError: + #Must be under Python 2.5, this is copied from multiprocessing: + def cpu_count(): + """Returns the number of CPUs in the system.""" + if sys.platform == 'win32': + try: + num = int(os.environ['NUMBER_OF_PROCESSORS']) + except (ValueError, KeyError): + num = 0 + elif 'bsd' in sys.platform or sys.platform == 'darwin': + comm = '/sbin/sysctl -n hw.ncpu' + if sys.platform == 'darwin': + comm = '/usr' + comm + try: + with os.popen(comm) as p: + num = int(p.read()) + except ValueError: + num = 0 + else: + try: + num = os.sysconf('SC_NPROCESSORS_ONLN') + except (ValueError, OSError, AttributeError): + num = 0 + + if num >= 1: + return num + else: + raise NotImplementedError('cannot determine number of cpus') + + +def thread_count(command_line_arg, default=1): + try: + num = int(command_line_arg) + except: + num = default + if num < 1: + stop_err("Threads argument %r is not a positive integer" % command_line_arg) + #Cap this with the pysical limit of the machine, + try: + num = min(num, cpu_count()) + except NotImplementedError: + pass + #For debugging, + #hostname = os.environ.get("HOSTNAME", "this machine") + #sys.stderr.write("Using %i cores on %s\n" % (num, hostname)) + return num + + def fasta_iterator(filename, max_len=None, truncate=None): """Simple FASTA parser yielding tuples of (title, sequence) strings.""" handle = open(filename) @@ -109,6 +159,11 @@ pending = jobs[:] running = [] results = {} + if threads == 1: + #Special case this for speed, don't need the waits + for cmd in jobs: + results[cmd] = subprocess.call(cmd, shell=True) + return results while pending or running: #See if any have finished for (cmd, process) in running: