comparison tools/regVariation/substitutions.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/env python
2 #Guruprasad ANanda
3 """
4 Fetches substitutions from pairwise alignments.
5 """
6
7 from galaxy import eggs
8
9 from galaxy.tools.util import maf_utilities
10
11 import bx.align.maf
12 import sys
13 import os, fileinput
14 def stop_err(msg):
15 sys.stderr.write(msg)
16 sys.exit()
17
18 if len(sys.argv) < 3:
19 stop_err("Incorrect number of arguments.")
20
21 inp_file = sys.argv[1]
22 out_file = sys.argv[2]
23 fout = open(out_file, 'w')
24
25 def fetchSubs(block):
26
27 src1 = block.components[0].src
28 sequence1 = block.components[0].text
29 start1 = block.components[0].start
30 end1 = block.components[0].end
31 len1 = int(end1)-int(start1)
32 len1_withgap = len(sequence1)
33
34 for seq in range (1,len(block.components)):
35 src2 = block.components[seq].src
36 sequence2 = block.components[seq].text
37 start2 = block.components[seq].start
38 end2 = block.components[seq].end
39 len2 = int(end2)-int(start2)
40 sub_begin = None
41 sub_end = None
42 begin = False
43
44 for nt in range(len1_withgap):
45 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
46 if sequence1[nt].upper() != sequence2[nt].upper():
47 if not(begin):
48 sub_begin = nt
49 begin = True
50 sub_end = nt
51 else:
52 if begin:
53 print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),start1+sub_end-sequence1[0:sub_end].count('-'))
54 print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),start2+sub_end-sequence2[0:sub_end].count('-'))
55 begin = False
56
57 else:
58 if begin:
59 print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),end1+sub_end-sequence1[0:sub_end].count('-'))
60 print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),end2+sub_end-sequence2[0:sub_end].count('-'))
61 begin = False
62 ended = False
63
64
65 def main():
66 skipped = 0
67 not_pairwise = 0
68 try:
69 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
70 except:
71 stop_err("Your MAF file appears to be malformed.")
72 print >>fout, "#Chr\tStart\tEnd"
73 for block in maf_reader:
74 if len(block.components) != 2:
75 not_pairwise += 1
76 continue
77 try:
78 fetchSubs(block)
79 except:
80 skipped += 1
81
82 if not_pairwise:
83 print "Skipped %d non-pairwise blocks" %(not_pairwise)
84 if skipped:
85 print "Skipped %d blocks" %(skipped)
86 if __name__ == "__main__":
87 main()