annotate tools/regVariation/substitutions.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 #Guruprasad ANanda
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Fetches substitutions from pairwise alignments.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 from galaxy.tools.util import maf_utilities
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 import bx.align.maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 import sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 import os, fileinput
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 def stop_err(msg):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 sys.stderr.write(msg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 if len(sys.argv) < 3:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 stop_err("Incorrect number of arguments.")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 inp_file = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 out_file = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 fout = open(out_file, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 def fetchSubs(block):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 src1 = block.components[0].src
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 sequence1 = block.components[0].text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 start1 = block.components[0].start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 end1 = block.components[0].end
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 len1 = int(end1)-int(start1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 len1_withgap = len(sequence1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 for seq in range (1,len(block.components)):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 src2 = block.components[seq].src
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 sequence2 = block.components[seq].text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 start2 = block.components[seq].start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 end2 = block.components[seq].end
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 len2 = int(end2)-int(start2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 sub_begin = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 sub_end = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 begin = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 for nt in range(len1_withgap):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 if sequence1[nt].upper() != sequence2[nt].upper():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 if not(begin):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 sub_begin = nt
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 begin = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 sub_end = nt
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if begin:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),start1+sub_end-sequence1[0:sub_end].count('-'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),start2+sub_end-sequence2[0:sub_end].count('-'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 begin = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 if begin:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),end1+sub_end-sequence1[0:sub_end].count('-'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),end2+sub_end-sequence2[0:sub_end].count('-'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 begin = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 ended = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 skipped = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 not_pairwise = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 stop_err("Your MAF file appears to be malformed.")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 print >>fout, "#Chr\tStart\tEnd"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 for block in maf_reader:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 if len(block.components) != 2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 not_pairwise += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 fetchSubs(block)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 skipped += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 if not_pairwise:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 print "Skipped %d non-pairwise blocks" %(not_pairwise)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 if skipped:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 print "Skipped %d blocks" %(skipped)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 main()