Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison phylogenies/phytab_clearcut.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
| author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
|---|---|
| date | Tue, 11 Mar 2014 12:19:13 -0700 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5b9a38ec4a39 |
|---|---|
| 1 import os | |
| 2 import optparse | |
| 3 import subprocess | |
| 4 from multiprocessing import Pool | |
| 5 | |
| 6 directory = "" | |
| 7 results = "results.data" | |
| 8 extension = ".fs" | |
| 9 aligned_extension = ".tre" | |
| 10 datatype = "" | |
| 11 | |
| 12 def unescape(string): | |
| 13 mapped_chars = { | |
| 14 '>': '__gt__', | |
| 15 '<': '__lt__', | |
| 16 "'": '__sq__', | |
| 17 '"': '__dq__', | |
| 18 '[': '__ob__', | |
| 19 ']': '__cb__', | |
| 20 '{': '__oc__', | |
| 21 '}': '__cc__', | |
| 22 '@': '__at__', | |
| 23 '\n': '__cn__', | |
| 24 '\r': '__cr__', | |
| 25 '\t': '__tc__', | |
| 26 '#': '__pd__' | |
| 27 } | |
| 28 | |
| 29 for key, value in mapped_chars.iteritems(): | |
| 30 string = string.replace(value, key) | |
| 31 | |
| 32 return string | |
| 33 | |
| 34 | |
| 35 def isTabular(file): | |
| 36 with open(file) as f: | |
| 37 for line in f: | |
| 38 if line[0] == '>': | |
| 39 return False | |
| 40 return True | |
| 41 | |
| 42 def toData(text, name): | |
| 43 name = name.replace("fasta", "") #file name has fasta when fasta file called | |
| 44 text = name.replace(".fs.tre", "") + "\t" + text.replace(" " , "") | |
| 45 return text | |
| 46 | |
| 47 # | |
| 48 #def toData(text): | |
| 49 # text = text.split('\n') | |
| 50 # result = '' | |
| 51 # for line in text: | |
| 52 # if '>' in line: | |
| 53 # line = '\n' + line.replace('>', "") + '\t' | |
| 54 # line = line.replace(" ", "\t") | |
| 55 # result += line | |
| 56 # return result[1:] # Index past the first newline char | |
| 57 | |
| 58 def clearcut(input): | |
| 59 file_name = directory + os.sep + input | |
| 60 popen = subprocess.Popen(['clearcut', "--in=" + file_name, "--out="+file_name + aligned_extension, "--alignment","-k", indata]) | |
| 61 popen.wait() | |
| 62 | |
| 63 class Sequence: | |
| 64 def __init__(self, string): | |
| 65 lis = string.split() | |
| 66 self.species = lis[0] | |
| 67 self.family = lis[1] | |
| 68 self.name = lis[2] | |
| 69 self.header = ' '.join(lis[:-1]) | |
| 70 self.sequence = lis[-1] | |
| 71 self.string = string | |
| 72 | |
| 73 def printFASTA(self): | |
| 74 return '>' + self.header + '\n' + self.sequence + '\n' | |
| 75 | |
| 76 def saveMulti(tabFile): | |
| 77 with open(tabFile) as f: | |
| 78 for line in f: | |
| 79 seq = Sequence(line) | |
| 80 with open(directory + os.sep + seq.family + extension, "a") as p: | |
| 81 p.write(seq.printFASTA()) | |
| 82 | |
| 83 def saveSingle(fastaFile): | |
| 84 with open(fastaFile) as f: | |
| 85 for line in f: | |
| 86 with open(directory + os.sep + "fasta" + extension, "a") as p: | |
| 87 p.write(line) | |
| 88 | |
| 89 def main(): | |
| 90 usage = """%prog [options] | |
| 91 options (listed below) default to 'None' if omitted | |
| 92 """ | |
| 93 parser = optparse.OptionParser(usage=usage) | |
| 94 | |
| 95 parser.add_option( | |
| 96 '-d', '--directory', | |
| 97 metavar="PATH", | |
| 98 dest='path', | |
| 99 default='.', | |
| 100 help='Path to working directory.') | |
| 101 | |
| 102 parser.add_option( | |
| 103 '-i', '--in', | |
| 104 dest='input', | |
| 105 action='store', | |
| 106 type='string', | |
| 107 metavar="FILE", | |
| 108 help='Name of input data.') | |
| 109 | |
| 110 parser.add_option( | |
| 111 '-t', '--type', | |
| 112 dest='datatype', | |
| 113 action='store', | |
| 114 type='string', | |
| 115 help='-P for protein. -D for DNA.') | |
| 116 | |
| 117 options, args = parser.parse_args() | |
| 118 | |
| 119 global directory | |
| 120 global indata | |
| 121 inputFile = unescape(options.input) | |
| 122 directory = unescape(options.path) + os.sep + "data" | |
| 123 indata = "-" + unescape(options.datatype) | |
| 124 | |
| 125 os.mkdir(directory) | |
| 126 | |
| 127 if isTabular(inputFile): | |
| 128 saveMulti(inputFile) | |
| 129 else: | |
| 130 saveSingle(inputFile) | |
| 131 | |
| 132 pool = Pool() | |
| 133 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)] | |
| 134 pool.map(clearcut, list_of_files) | |
| 135 | |
| 136 result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)] | |
| 137 with open(directory + os.sep + results, "a") as f: | |
| 138 for file in result: | |
| 139 with open(directory + os.sep + file, "r") as r: | |
| 140 f.write(toData(r.read(),file)) | |
| 141 | |
| 142 if __name__ == '__main__': | |
| 143 main() |
