view alignment/phytab_mafft.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
line wrap: on
line source

#!/usr/bin/env python

import os
import optparse
import subprocess
from multiprocessing import Pool

directory = ""
results = "results.data"
extension = ".fs"
aligned_extension = ".afa"


def unescape(string):
    mapped_chars = {
        '>': '__gt__',
        '<': '__lt__',
        "'": '__sq__',
        '"': '__dq__',
        '[': '__ob__',
        ']': '__cb__',
        '{': '__oc__',
        '}': '__cc__',
        '@': '__at__',
        '\n': '__cn__',
        '\r': '__cr__',
        '\t': '__tc__',
        '#': '__pd__'
        }

    for key, value in mapped_chars.iteritems():
        string = string.replace(value, key)

    return string


def isTabular(file):
    with open(file) as f:
        for line in f:
            if line[0] == '>':
                return False
    return True


def toData(text):
	text = text.split('\n')
	result = ''
	for line in text:
	    if '>' in line:
	        line = '\n' + line.replace('> ', "") + '\t'
	    line = line.replace(" ", "\t")
	    result += line
	return result[1:]  # Index past the first newline char

def toDataSingle(text):
	text = text.split('\n')
	result = ''
	for line in text:
		line = line + '\n'
 		result += line
	return result[1:]  # Index past the first newline char

def mafftauto(input):
    file_name = directory + os.sep + input
    aln = file_name + aligned_extension
    call = subprocess.call(['mafft', '--auto', '--out', aln, file_name])  

def mafft1(input):
    file_name = directory + os.sep + input
    aln = file_name + aligned_extension
    call = subprocess.call(['mafft', '--retree', '1', '--out', aln, file_name])  

def mafft2(input):
    file_name = directory + os.sep + input
    aln = file_name + aligned_extension
    call = subprocess.call(['mafft', '--retree', '2', '--out', aln, file_name])  

def maffti(input):
    file_name = directory + os.sep + input
    aln = file_name + aligned_extension
    call = subprocess.call(['mafft-fftnsi', '--out', aln, file_name])  

def maffteinsi(input):
    file_name = directory + os.sep + input
    aln = file_name + aligned_extension
    call = subprocess.call(['mafft-einsi', '--out', aln, file_name])  

def mafftlinsi(input):
    file_name = directory + os.sep + input
    aln = file_name + aligned_extension
    call = subprocess.call(['mafft-linsi', '--out', aln, file_name])  

def mafftginsi(input):
    file_name = directory + os.sep + input
    aln = file_name + aligned_extension
    call = subprocess.call(['mafft-ginsi', '--out', aln, file_name])  

def mafftqinsi(input):
    file_name = directory + os.sep + input
    aln = file_name + aligned_extension
    call = subprocess.call(['mafft-qinsi', '--out', aln, file_name])  


class Sequence:
    def __init__(self, string):
        lis = string.split()
        self.species = lis[0]
        self.family = lis[1]
        self.name = lis[2]
        self.header = ' '.join(lis[:-1])
        self.sequence = lis[-1]
        self.string = string

    def printFASTA(self):
        return '> ' + self.header + '\n' + self.sequence + '\n'


def saveMulti(tabFile):
    with open(tabFile) as f:
        for line in f:
            seq = Sequence(line)
            with open(directory + os.sep + seq.family + extension, "a") as p:
                p.write(seq.printFASTA())


def saveSingle(fastaFile):
    with open(fastaFile) as f:
        for line in f:
            with open(directory + os.sep + "fasta" + extension, "a") as p:
                p.write(line)


def main():
    usage = """%prog [options]
options (listed below) default to 'None' if omitted
    """
    parser = optparse.OptionParser(usage=usage)

    parser.add_option(
        '-d', '--directory',
        metavar="PATH",
        dest='path',
        default='.',
        help='Path to working directory.')

    parser.add_option(
        '-i', '--in',
        dest='input',
        action='store',
        type='string',
        metavar="FILE",
        help='Name of input data.')
    
    parser.add_option(
        '-s', '--strat',
        dest='strategy',
        action='store',
        type='string',
        help='Alignement algorithm to use.')

    options, args = parser.parse_args()

    global directory
    inputFile = unescape(options.input)
    directory = unescape(options.path) + os.sep + "data"
    strategy = unescape(options.strategy)
    
    os.mkdir(directory)

    if isTabular(inputFile):
        saveMulti(inputFile)
    else:
        saveSingle(inputFile)

    pool = Pool()
    list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
    list_of_files = sorted(list_of_files)
    if strategy == 'Auto':
        pool.map(mafftauto, list_of_files)
        
    elif strategy == 'FFT-NS-1':
        pool.map(mafft1, list_of_files)
    
    elif strategy == 'FFT-NS-2':
        pool.map(mafft2, list_of_files)

    elif strategy == 'FFT-NS-i':
        pool.map(maffti, list_of_files)

    elif strategy == 'E-INS-i':
        pool.map(maffteinsi, list_of_files)

    elif strategy == 'L-INS-i':
        pool.map(mafftlinsi, list_of_files)

    elif strategy == 'G-INS-i':
        pool.map(mafftginsi, list_of_files)

    elif strategy == 'Q-INS-i':
        pool.map(mafftqinsi, list_of_files)
    
    result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)]
    if isTabular(inputFile):
	with open(directory + os.sep + results, "a") as f:
	    for file in result:
	        with open(directory + os.sep + file, "r") as r:
	            f.write(toData(r.read()) + "\n")
    else:
	with open(directory + os.sep + results, "a") as f:
	    for file in result:
	        with open(directory + os.sep + file, "r") as r:
	            f.write(toDataSingle(r.read()) + "\n")

if __name__ == '__main__':
    main()