view mytools/revcompl.py @ 11:92eac44fe40d

Uploaded
author xuebing
date Fri, 16 Mar 2012 15:27:31 -0400
parents 39217fa39ff2
children
line wrap: on
line source

import sys

compldna = {'A':'T',
        'C':'G',
        'G':'C',
        'T':'A',
        'U':'A',
        'M':'K',
        'K':'M',
        'W':'W',
        'S':'S',
        'R':'Y',
        'Y':'R',
        'N':'N'}
complrna = {'A':'U',
        'C':'G',
        'G':'C',
        'T':'A',
        'U':'A',
        'M':'K',
        'K':'M',
        'W':'W',
        'S':'S',
        'R':'Y',
        'Y':'R',
        'N':'N'}
def complement(seq,compl):  
    complseq = [compl[base] for base in seq]  
    return complseq
    
def reverse_complement(seq,compl):  
    seq = list(seq)  
    seq.reverse()  
    return ''.join(complement(seq,compl)) 
            
def readFastaFile(infile,outfile,compl):

    fin = open(infile)
    out = open(outfile,'w')
    
    currSeq=''
    currSeqname=None
    for line in fin:
        if '>' in line:
            if  currSeqname !=None:
                out.write(currSeqname+reverse_complement(currSeq,compl)+'\n')
                currSeqname=None
                currSeq=''
            currSeqname=line
        else:
            currSeq=currSeq+line.strip().upper()

    if currSeqname!=None:
        out.write(currSeqname+reverse_complement(currSeq,compl)+'\n')
    
    fin.close()
    out.close()

def readrawseq(infile,outfile,compl):
    '''
    each line is a sequence
    '''
    fin = open(infile)
    out = open(outfile,'w')
    for line in fin:
        out.write(reverse_complement(line.strip().upper(),compl)+'\n')
    fin.close()
    out.close()
    
def main():
    seqfile = sys.argv[1]
    outfile = sys.argv[2]
    fasta = sys.argv[3]
    rna = sys.argv[4]
    if rna == 'rna':
        compl = complrna
    else:
        compl = compldna
    if fasta == 'fasta':
        readFastaFile(seqfile,outfile,compl)
    else:
        readrawseq(seqfile,outfile,compl)

main()