annotate revcompl.py @ 12:2f4ea569f048

Uploaded
author xuebing
date Sat, 10 Mar 2012 08:10:44 -0500
parents b7f1d9f8f3bc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
1 import sys
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
2
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
3 compldna = {'A':'T',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
4 'C':'G',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
5 'G':'C',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
6 'T':'A',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
7 'U':'A',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
8 'M':'K',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
9 'K':'M',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
10 'W':'W',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
11 'S':'S',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
12 'R':'Y',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
13 'Y':'R',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
14 'N':'N'}
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
15 complrna = {'A':'U',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
16 'C':'G',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
17 'G':'C',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
18 'T':'A',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
19 'U':'A',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
20 'M':'K',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
21 'K':'M',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
22 'W':'W',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
23 'S':'S',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
24 'R':'Y',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
25 'Y':'R',
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
26 'N':'N'}
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
27 def complement(seq,compl):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
28 complseq = [compl[base] for base in seq]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
29 return complseq
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
30
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
31 def reverse_complement(seq,compl):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
32 seq = list(seq)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
33 seq.reverse()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
34 return ''.join(complement(seq,compl))
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
35
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
36 def readFastaFile(infile,outfile,compl):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
37
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
38 fin = open(infile)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
39 out = open(outfile,'w')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
40
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
41 currSeq=''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
42 currSeqname=None
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
43 for line in fin:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
44 if '>' in line:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
45 if currSeqname !=None:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
46 out.write(currSeqname+reverse_complement(currSeq,compl)+'\n')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
47 currSeqname=None
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
48 currSeq=''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
49 currSeqname=line
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
50 else:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
51 currSeq=currSeq+line.strip().upper()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
52
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
53 if currSeqname!=None:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
54 out.write(currSeqname+reverse_complement(currSeq,compl)+'\n')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
55
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
56 fin.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
57 out.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
58
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
59 def readrawseq(infile,outfile,compl):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
60 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
61 each line is a sequence
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
62 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
63 fin = open(infile)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
64 out = open(outfile,'w')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
65 for line in fin:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
66 out.write(reverse_complement(line.strip().upper(),compl)+'\n')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
67 fin.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
68 out.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
69
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
70 def main():
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
71 seqfile = sys.argv[1]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
72 outfile = sys.argv[2]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
73 fasta = sys.argv[3]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
74 rna = sys.argv[4]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
75 if rna == 'rna':
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
76 compl = complrna
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
77 else:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
78 compl = compldna
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
79 if fasta == 'fasta':
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
80 readFastaFile(seqfile,outfile,compl)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
81 else:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
82 readrawseq(seqfile,outfile,compl)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
83
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
84 main()