Mercurial > repos > peterjc > tmhmm_and_signalp
diff tools/protein_analysis/psortb.py @ 19:f3ecd80850e2 draft
v0.2.9 Python style improvements
author | peterjc |
---|---|
date | Wed, 01 Feb 2017 09:46:42 -0500 |
parents | eb6ac44d4b8e |
children | a19b3ded8f33 |
line wrap: on
line diff
--- a/tools/protein_analysis/psortb.py Tue Sep 01 09:56:36 2015 -0400 +++ b/tools/protein_analysis/psortb.py Wed Feb 01 09:46:42 2017 -0500 @@ -24,7 +24,7 @@ import sys import os import tempfile -from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count +from seq_analysis_utils import split_fasta, run_jobs, thread_count FASTA_CHUNK = 500 @@ -33,7 +33,7 @@ sys.exit(os.system("psort --version")) if len(sys.argv) != 8: - sys_exit("Require 7 arguments, number of threads (int), type (e.g. archaea), " + sys.exit("Require 7 arguments, number of threads (int), type (e.g. archaea), " "output (e.g. terse/normal/long), cutoff, divergent, input protein " "FASTA file & output tabular file") @@ -56,10 +56,10 @@ if out_type == "terse": header = ['SeqID', 'Localization', 'Score'] elif out_type == "normal": - sys_exit("Normal output not implemented yet, sorry.") + sys.exit("Normal output not implemented yet, sorry.") elif out_type == "long": if org_type == "-n": - #Gram negative bacteria + # Gram negative bacteria header = ['SeqID', 'CMSVM-_Localization', 'CMSVM-_Details', 'CytoSVM-_Localization', 'CytoSVM-_Details', 'ECSVM-_Localization', 'ECSVM-_Details', 'ModHMM-_Localization', 'ModHMM-_Details', 'Motif-_Localization', 'Motif-_Details', 'OMPMotif-_Localization', 'OMPMotif-_Details', @@ -71,7 +71,7 @@ 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', 'Secondary_Localization', 'PSortb_Version'] elif org_type == "-p": - #Gram positive bacteria + # Gram positive bacteria header = ['SeqID', 'CMSVM+_Localization', 'CMSVM+_Details', 'CWSVM+_Localization', 'CWSVM+_Details', 'CytoSVM+_Localization', 'CytoSVM+_Details', 'ECSVM+_Localization', 'ECSVM+_Details', 'ModHMM+_Localization', 'ModHMM+_Details', 'Motif+_Localization', 'Motif+_Details', @@ -82,7 +82,7 @@ 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', 'Secondary_Localization', 'PSortb_Version'] elif org_type == "-a": - #Archaea + # Archaea header = ['SeqID', 'CMSVM_a_Localization', 'CMSVM_a_Details', 'CWSVM_a_Localization', 'CWSVM_a_Details', 'CytoSVM_a_Localization', 'CytoSVM_a_Details', 'ECSVM_a_Localization', 'ECSVM_a_Details', 'ModHMM_a_Localization', 'ModHMM_a_Details', 'Motif_a_Localization', 'Motif_a_Details', @@ -93,27 +93,28 @@ 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', 'Secondary_Localization', 'PSortb_Version'] else: - sys_exit("Expected -n, -p or -a for the organism type, not %r" % org_type) + sys.exit("Expected -n, -p or -a for the organism type, not %r" % org_type) else: - sys_exit("Expected terse, normal or long for the output type, not %r" % out_type) + sys.exit("Expected terse, normal or long for the output type, not %r" % out_type) tmp_dir = tempfile.mkdtemp() + def clean_tabular(raw_handle, out_handle): """Clean up tabular TMHMM output, returns output line count.""" global header count = 0 for line in raw_handle: if not line.strip() or line.startswith("#"): - #Ignore any blank lines or comment lines + # Ignore any blank lines or comment lines continue parts = [x.strip() for x in line.rstrip("\r\n").split("\t")] if parts == header: - #Ignore the header line + # Ignore the header line continue if not parts[-1] and len(parts) == len(header) + 1: - #Ignore dummy blank extra column, e.g. - #"...2.0\t\tPSORTb version 3.0\t\n" + # Ignore dummy blank extra column, e.g. + # "...2.0\t\tPSORTb version 3.0\t\n" parts = parts[:-1] assert len(parts) == len(header), \ "%i fields, not %i, in line:\n%r" % (len(line), len(header), line) @@ -121,24 +122,25 @@ count += 1 return count -#Note that if the input FASTA file contains no sequences, -#split_fasta returns an empty list (i.e. zero temp files). +# Note that if the input FASTA file contains no sequences, +# split_fasta returns an empty list (i.e. zero temp files). fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) -temp_files = [f+".out" for f in fasta_files] +temp_files = [f + ".out" for f in fasta_files] jobs = ["psort %s %s %s -o %s %s > %s" % (org_type, cutoff, divergent, out_type, fasta, temp) for fasta, temp in zip(fasta_files, temp_files)] + def clean_up(file_list): for f in file_list: if os.path.isfile(f): os.remove(f) try: os.rmdir(tmp_dir) - except: + except Exception: pass if len(jobs) > 1 and num_threads > 1: - #A small "info" message for Galaxy to show the user. + # A small "info" message for Galaxy to show the user. print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) results = run_jobs(jobs, num_threads) for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): @@ -149,7 +151,7 @@ except IOError: output = "" clean_up(fasta_files + temp_files) - sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), + sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), error_level) del results del jobs @@ -163,7 +165,7 @@ data_handle.close() if not count: clean_up(fasta_files + temp_files) - sys_exit("No output from psortb") + sys.exit("No output from psortb") out_handle.close() print "%i records" % count