annotate substitutions.py @ 0:c54f5d0bbb58 draft

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:50:41 -0400
parents
children aca54f2b2151
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
2 #Guruprasad ANanda
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
3 """
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
4 Fetches substitutions from pairwise alignments.
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
5 """
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
6
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
7 from galaxy import eggs
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
8
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
9 from galaxy.tools.util import maf_utilities
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
10
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
11 import bx.align.maf
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
12 import sys
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
13
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
14 def stop_err(msg):
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
15 sys.stderr.write(msg)
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
16 sys.exit()
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
17
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
18
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
19 if len(sys.argv) < 3:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
20 stop_err("Incorrect number of arguments.")
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
21
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
22 inp_file = sys.argv[1]
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
23 out_file = sys.argv[2]
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
24 fout = open(out_file, 'w')
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
25
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
26 def fetchSubs(block):
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
27 src1 = block.components[0].src
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
28 sequence1 = block.components[0].text
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
29 start1 = block.components[0].start
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
30 end1 = block.components[0].end
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
31 len1_withgap = len(sequence1)
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
32
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
33 for seq in range (1, len(block.components)):
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
34 src2 = block.components[seq].src
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
35 sequence2 = block.components[seq].text
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
36 start2 = block.components[seq].start
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
37 end2 = block.components[seq].end
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
38 sub_begin = None
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
39 sub_end = None
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
40 begin = False
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
41
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
42 for nt in range(len1_withgap):
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
43 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': # Not a gap or masked character
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
44 if sequence1[nt].upper() != sequence2[nt].upper():
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
45 if not(begin):
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
46 sub_begin = nt
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
47 begin = True
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
48 sub_end = nt
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
49 else:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
50 if begin:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
51 print >> fout, "%s\t%s\t%s" % ( src1, start1+sub_begin-sequence1[0:sub_begin].count('-'), start1+sub_end-sequence1[0:sub_end].count('-') )
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
52 print >> fout, "%s\t%s\t%s" % ( src2, start2+sub_begin-sequence2[0:sub_begin].count('-'), start2+sub_end-sequence2[0:sub_end].count('-') )
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
53 begin = False
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
54 else:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
55 if begin:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
56 print >> fout, "%s\t%s\t%s" % ( src1, start1+sub_begin-sequence1[0:sub_begin].count('-'), end1+sub_end-sequence1[0:sub_end].count('-') )
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
57 print >> fout, "%s\t%s\t%s" % ( src2, start2+sub_begin-sequence2[0:sub_begin].count('-'), end2+sub_end-sequence2[0:sub_end].count('-') )
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
58 begin = False
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
59
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
60
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
61 def main():
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
62 skipped = 0
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
63 not_pairwise = 0
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
64 try:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
65 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
66 except:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
67 stop_err("Your MAF file appears to be malformed.")
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
68 print >> fout, "#Chr\tStart\tEnd"
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
69 for block in maf_reader:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
70 if len(block.components) != 2:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
71 not_pairwise += 1
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
72 continue
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
73 try:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
74 fetchSubs(block)
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
75 except:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
76 skipped += 1
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
77
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
78 if not_pairwise:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
79 print "Skipped %d non-pairwise blocks" % (not_pairwise)
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
80 if skipped:
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
81 print "Skipped %d blocks" % (skipped)
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
82
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
83
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
84 if __name__ == "__main__":
c54f5d0bbb58 Imported from capsule None
devteam
parents:
diff changeset
85 main()