Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
view orthologs/hmmsearch.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line source
import os import optparse import subprocess from multiprocessing import Pool results_dir = "./data" results = "results.data" result_extension = ".out" model_extension = ".hmm" database = "" def unescape(string): mapped_chars = { '>': '__gt__', '<': '__lt__', "'": '__sq__', '"': '__dq__', '[': '__ob__', ']': '__cb__', '{': '__oc__', '}': '__cc__', '@': '__at__', '\n': '__cn__', '\r': '__cr__', '\t': '__tc__', '#': '__pd__' } for key, value in mapped_chars.iteritems(): string = string.replace(value, key) return string def unpackData(models): with open(models) as f: for line in f: hmm = HMM(line) with open(results_dir + os.sep + hmm.name + model_extension, "a") as p: # print(hmm.model) p.write(hmm.model) class HMM: def __init__(self, string): lis = string.split('\t') # print lis self.model = self.restoreNewLines(lis[1]) self.name = lis[0] def restoreNewLines(self, string): return string.replace('\\n', '\n') def toData(text): # lis = text.split() # name = lis[index_of_name_in_hmm] # text = name + "\t" + text.replace("\n", "\\n") # text = text.replace("\n", "\\n") return text def hmmsearch(input): file_name = results_dir + os.sep + input # print file_name # return subprocess.Popen(['hmmbuild', "--informat", "afa", file_name + ".hmm", file_name], stdout=subprocess.PIPE).communicate()[0] # ./muscle pop = subprocess.Popen(['hmmsearch', "-o", file_name + result_extension, file_name, database]) pop.wait() def main(): usage = """%prog [options] options (listed below) default to 'None' if omitted """ parser = optparse.OptionParser(usage=usage) parser.add_option( '-i', '--hmm', dest='hmm', action='store', type='string', metavar="FILE", help='Name of input hmm models.') parser.add_option( '-d', '--database', dest='database', action='store', type='string', metavar="FILE", help='Name of sequence database.') options, args = parser.parse_args() global database models = unescape(options.hmm) database = unescape(options.database) os.mkdir(results_dir) unpackData(models) list_of_files = [file for file in os.listdir(results_dir) if file.lower().endswith(model_extension)] pool = Pool() pool.map(hmmsearch, list_of_files) result = [file for file in os.listdir(results_dir) if file.lower().endswith(result_extension)] with open(results_dir + os.sep + results, "a") as f: for file in result: with open(results_dir + os.sep + file, "r") as r: f.write(toData(r.read()) + "\n") if __name__ == '__main__': main()