diff alignment/phytab_aliscorecut.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alignment/phytab_aliscorecut.py	Tue Mar 11 12:19:13 2014 -0700
@@ -0,0 +1,144 @@
+import os
+import optparse
+import subprocess
+from multiprocessing import Pool
+import shutil
+
+results_dir = "./data"
+results = "results.data"
+fasta_extension = ".afa"
+alicut_prefix = "ALICUT_"
+familyList = []
+galaxy_tool_dir = "/home/galaxy/bin/"
+forbidden_chars = {
+    '(': '__rb__',
+    ')': '__lb__',
+    ':': '__co__',
+    ';': '__sc__',
+    ',': '__cm__',
+    '--': '__dd__',
+    '*': '__st__',
+    '|': '__pi__',
+    ' ': '__sp__'
+}
+
+
+def unescape(string):
+    mapped_chars = {
+        '>': '__gt__',
+        '<': '__lt__',
+        "'": '__sq__',
+        '"': '__dq__',
+        '[': '__ob__',
+        ']': '__cb__',
+        '{': '__oc__',
+        '}': '__cc__',
+        '@': '__at__',
+        '\n': '__cn__',
+        '\r': '__cr__',
+        '\t': '__tc__',
+        '#': '__pd__'
+        }
+
+    for key, value in mapped_chars.iteritems():
+        string = string.replace(value, key)
+
+    return string
+
+
+def unpackData(families):
+    with open(families) as f:
+        for line in f:
+            seq = Sequence(line)
+            with open(results_dir + os.sep + seq.family + fasta_extension, "a") as p:
+                p.write(seq.printFASTA())
+
+
+class Sequence:
+    def __init__(self, string):
+        lis = string.split('\t')
+        self.species = lis[0]
+        self.family = lis[1]
+        self.name = lis[2]
+        self.header = ' '.join(lis[:-1])
+        self.sequence = lis[-1]
+        self.string = string
+
+    def escapedHeader(self):
+        string = self.header
+        for key, value in forbidden_chars.iteritems():
+            string = string.replace(key, value)
+        return string
+
+    def printFASTA(self):
+        return '>' + self.escapedHeader() + '\n' + self.sequence + '\n'
+
+
+def unescapeHeader(header):
+    string = header
+    for key, value in forbidden_chars.iteritems():
+        string = string.replace(value, key)
+    return string
+
+
+def toData(text):
+    text = text.split('\n')
+    result = ''
+    for line in text:
+        if '>' in line:
+            line = '\n' + unescapeHeader(line.replace('>', "")) + '\t'
+        line = line.replace(" ", "\t")
+        result += line
+    return result[1:]  # Index past the first newline char
+
+
+def aliscore(input):
+    file_name = results_dir + os.sep + input
+    # print file_name
+    pop = subprocess.Popen(["perl", "-I", galaxy_tool_dir, galaxy_tool_dir + "Aliscore.02.pl", "-i", file_name])
+    pop.wait()
+
+
+def main():
+    usage = """%prog [options]
+options (listed below) default to 'None' if omitted
+    """
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option(
+        '-i', '--input',
+        dest='families',
+        action='store',
+        type='string',
+        metavar="FILE",
+        help='Name of input sequences.')
+
+    options, args = parser.parse_args()
+
+    families = unescape(options.families)
+
+    os.mkdir(results_dir)
+
+    unpackData(families)
+
+    list_of_files = [file for file in os.listdir(results_dir) if file.lower().endswith(fasta_extension)]
+
+    pool = Pool()
+    pool.map(aliscore, list_of_files)
+
+    alicut = "ALICUT_V2.0_modified.pl"
+    shutil.copy(galaxy_tool_dir + alicut, results_dir + os.sep + alicut)
+    os.chdir(results_dir)
+    pop = subprocess.Popen(["perl", "./" + alicut])
+    pop.wait()
+    os.chdir("../")
+
+    result = [file for file in os.listdir(results_dir) if file.startswith(alicut_prefix)]
+    with open(results_dir + os.sep + results, "a") as f:
+        for file in result:
+            if file.endswith(fasta_extension):
+                with open(results_dir + os.sep + file, "r") as r:
+                    f.write(toData(r.read()) + "\n")
+
+if __name__ == '__main__':
+    main()