comparison mytools/revcompl.py @ 0:39217fa39ff2

Uploaded
author xuebing
date Tue, 13 Mar 2012 23:34:52 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:39217fa39ff2
1 import sys
2
3 compldna = {'A':'T',
4 'C':'G',
5 'G':'C',
6 'T':'A',
7 'U':'A',
8 'M':'K',
9 'K':'M',
10 'W':'W',
11 'S':'S',
12 'R':'Y',
13 'Y':'R',
14 'N':'N'}
15 complrna = {'A':'U',
16 'C':'G',
17 'G':'C',
18 'T':'A',
19 'U':'A',
20 'M':'K',
21 'K':'M',
22 'W':'W',
23 'S':'S',
24 'R':'Y',
25 'Y':'R',
26 'N':'N'}
27 def complement(seq,compl):
28 complseq = [compl[base] for base in seq]
29 return complseq
30
31 def reverse_complement(seq,compl):
32 seq = list(seq)
33 seq.reverse()
34 return ''.join(complement(seq,compl))
35
36 def readFastaFile(infile,outfile,compl):
37
38 fin = open(infile)
39 out = open(outfile,'w')
40
41 currSeq=''
42 currSeqname=None
43 for line in fin:
44 if '>' in line:
45 if currSeqname !=None:
46 out.write(currSeqname+reverse_complement(currSeq,compl)+'\n')
47 currSeqname=None
48 currSeq=''
49 currSeqname=line
50 else:
51 currSeq=currSeq+line.strip().upper()
52
53 if currSeqname!=None:
54 out.write(currSeqname+reverse_complement(currSeq,compl)+'\n')
55
56 fin.close()
57 out.close()
58
59 def readrawseq(infile,outfile,compl):
60 '''
61 each line is a sequence
62 '''
63 fin = open(infile)
64 out = open(outfile,'w')
65 for line in fin:
66 out.write(reverse_complement(line.strip().upper(),compl)+'\n')
67 fin.close()
68 out.close()
69
70 def main():
71 seqfile = sys.argv[1]
72 outfile = sys.argv[2]
73 fasta = sys.argv[3]
74 rna = sys.argv[4]
75 if rna == 'rna':
76 compl = complrna
77 else:
78 compl = compldna
79 if fasta == 'fasta':
80 readFastaFile(seqfile,outfile,compl)
81 else:
82 readrawseq(seqfile,outfile,compl)
83
84 main()