Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
view alignment/phytab_aliscorecut.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line source
import os import optparse import subprocess from multiprocessing import Pool import shutil results_dir = "./data" results = "results.data" fasta_extension = ".afa" alicut_prefix = "ALICUT_" familyList = [] galaxy_tool_dir = "/home/galaxy/bin/" forbidden_chars = { '(': '__rb__', ')': '__lb__', ':': '__co__', ';': '__sc__', ',': '__cm__', '--': '__dd__', '*': '__st__', '|': '__pi__', ' ': '__sp__' } def unescape(string): mapped_chars = { '>': '__gt__', '<': '__lt__', "'": '__sq__', '"': '__dq__', '[': '__ob__', ']': '__cb__', '{': '__oc__', '}': '__cc__', '@': '__at__', '\n': '__cn__', '\r': '__cr__', '\t': '__tc__', '#': '__pd__' } for key, value in mapped_chars.iteritems(): string = string.replace(value, key) return string def unpackData(families): with open(families) as f: for line in f: seq = Sequence(line) with open(results_dir + os.sep + seq.family + fasta_extension, "a") as p: p.write(seq.printFASTA()) class Sequence: def __init__(self, string): lis = string.split('\t') self.species = lis[0] self.family = lis[1] self.name = lis[2] self.header = ' '.join(lis[:-1]) self.sequence = lis[-1] self.string = string def escapedHeader(self): string = self.header for key, value in forbidden_chars.iteritems(): string = string.replace(key, value) return string def printFASTA(self): return '>' + self.escapedHeader() + '\n' + self.sequence + '\n' def unescapeHeader(header): string = header for key, value in forbidden_chars.iteritems(): string = string.replace(value, key) return string def toData(text): text = text.split('\n') result = '' for line in text: if '>' in line: line = '\n' + unescapeHeader(line.replace('>', "")) + '\t' line = line.replace(" ", "\t") result += line return result[1:] # Index past the first newline char def aliscore(input): file_name = results_dir + os.sep + input # print file_name pop = subprocess.Popen(["perl", "-I", galaxy_tool_dir, galaxy_tool_dir + "Aliscore.02.pl", "-i", file_name]) pop.wait() def main(): usage = """%prog [options] options (listed below) default to 'None' if omitted """ parser = optparse.OptionParser(usage=usage) parser.add_option( '-i', '--input', dest='families', action='store', type='string', metavar="FILE", help='Name of input sequences.') options, args = parser.parse_args() families = unescape(options.families) os.mkdir(results_dir) unpackData(families) list_of_files = [file for file in os.listdir(results_dir) if file.lower().endswith(fasta_extension)] pool = Pool() pool.map(aliscore, list_of_files) alicut = "ALICUT_V2.0_modified.pl" shutil.copy(galaxy_tool_dir + alicut, results_dir + os.sep + alicut) os.chdir(results_dir) pop = subprocess.Popen(["perl", "./" + alicut]) pop.wait() os.chdir("../") result = [file for file in os.listdir(results_dir) if file.startswith(alicut_prefix)] with open(results_dir + os.sep + results, "a") as f: for file in result: if file.endswith(fasta_extension): with open(results_dir + os.sep + file, "r") as r: f.write(toData(r.read()) + "\n") if __name__ == '__main__': main()