diff mytools/revcompl.py @ 0:39217fa39ff2

Uploaded
author xuebing
date Tue, 13 Mar 2012 23:34:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mytools/revcompl.py	Tue Mar 13 23:34:52 2012 -0400
@@ -0,0 +1,84 @@
+import sys
+
+compldna = {'A':'T',
+        'C':'G',
+        'G':'C',
+        'T':'A',
+        'U':'A',
+        'M':'K',
+        'K':'M',
+        'W':'W',
+        'S':'S',
+        'R':'Y',
+        'Y':'R',
+        'N':'N'}
+complrna = {'A':'U',
+        'C':'G',
+        'G':'C',
+        'T':'A',
+        'U':'A',
+        'M':'K',
+        'K':'M',
+        'W':'W',
+        'S':'S',
+        'R':'Y',
+        'Y':'R',
+        'N':'N'}
+def complement(seq,compl):  
+    complseq = [compl[base] for base in seq]  
+    return complseq
+    
+def reverse_complement(seq,compl):  
+    seq = list(seq)  
+    seq.reverse()  
+    return ''.join(complement(seq,compl)) 
+            
+def readFastaFile(infile,outfile,compl):
+
+    fin = open(infile)
+    out = open(outfile,'w')
+    
+    currSeq=''
+    currSeqname=None
+    for line in fin:
+        if '>' in line:
+            if  currSeqname !=None:
+                out.write(currSeqname+reverse_complement(currSeq,compl)+'\n')
+                currSeqname=None
+                currSeq=''
+            currSeqname=line
+        else:
+            currSeq=currSeq+line.strip().upper()
+
+    if currSeqname!=None:
+        out.write(currSeqname+reverse_complement(currSeq,compl)+'\n')
+    
+    fin.close()
+    out.close()
+
+def readrawseq(infile,outfile,compl):
+    '''
+    each line is a sequence
+    '''
+    fin = open(infile)
+    out = open(outfile,'w')
+    for line in fin:
+        out.write(reverse_complement(line.strip().upper(),compl)+'\n')
+    fin.close()
+    out.close()
+    
+def main():
+    seqfile = sys.argv[1]
+    outfile = sys.argv[2]
+    fasta = sys.argv[3]
+    rna = sys.argv[4]
+    if rna == 'rna':
+        compl = complrna
+    else:
+        compl = compldna
+    if fasta == 'fasta':
+        readFastaFile(seqfile,outfile,compl)
+    else:
+        readrawseq(seqfile,outfile,compl)
+
+main()