Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
diff orthologs/hmmsearch.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/orthologs/hmmsearch.py Tue Mar 11 12:19:13 2014 -0700 @@ -0,0 +1,116 @@ +import os +import optparse +import subprocess +from multiprocessing import Pool + +results_dir = "./data" +results = "results.data" +result_extension = ".out" +model_extension = ".hmm" +database = "" + + +def unescape(string): + mapped_chars = { + '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '@': '__at__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__' + } + + for key, value in mapped_chars.iteritems(): + string = string.replace(value, key) + + return string + + +def unpackData(models): + with open(models) as f: + for line in f: + hmm = HMM(line) + with open(results_dir + os.sep + hmm.name + model_extension, "a") as p: + # print(hmm.model) + p.write(hmm.model) + + +class HMM: + def __init__(self, string): + lis = string.split('\t') + # print lis + self.model = self.restoreNewLines(lis[1]) + self.name = lis[0] + + def restoreNewLines(self, string): + return string.replace('\\n', '\n') + + +def toData(text): + # lis = text.split() + # name = lis[index_of_name_in_hmm] + # text = name + "\t" + text.replace("\n", "\\n") + # text = text.replace("\n", "\\n") + return text + + +def hmmsearch(input): + file_name = results_dir + os.sep + input + # print file_name + # return subprocess.Popen(['hmmbuild', "--informat", "afa", file_name + ".hmm", file_name], stdout=subprocess.PIPE).communicate()[0] # ./muscle + pop = subprocess.Popen(['hmmsearch', "-o", file_name + result_extension, file_name, database]) + pop.wait() + + +def main(): + usage = """%prog [options] +options (listed below) default to 'None' if omitted + """ + parser = optparse.OptionParser(usage=usage) + + parser.add_option( + '-i', '--hmm', + dest='hmm', + action='store', + type='string', + metavar="FILE", + help='Name of input hmm models.') + + parser.add_option( + '-d', '--database', + dest='database', + action='store', + type='string', + metavar="FILE", + help='Name of sequence database.') + + options, args = parser.parse_args() + + global database + models = unescape(options.hmm) + database = unescape(options.database) + + os.mkdir(results_dir) + + unpackData(models) + + list_of_files = [file for file in os.listdir(results_dir) if file.lower().endswith(model_extension)] + + pool = Pool() + pool.map(hmmsearch, list_of_files) + + result = [file for file in os.listdir(results_dir) if file.lower().endswith(result_extension)] + with open(results_dir + os.sep + results, "a") as f: + for file in result: + with open(results_dir + os.sep + file, "r") as r: + f.write(toData(r.read()) + "\n") + +if __name__ == '__main__': + main()