Mercurial > repos > xuebing > sharplabtool
comparison tools/regVariation/getIndels.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Estimate INDELs for pair-wise alignments. | |
5 | |
6 usage: %prog maf_input out_file1 out_file2 | |
7 """ | |
8 | |
9 from __future__ import division | |
10 from galaxy import eggs | |
11 import pkg_resources | |
12 pkg_resources.require( "bx-python" ) | |
13 try: | |
14 pkg_resources.require("numpy") | |
15 except: | |
16 pass | |
17 import psyco_full | |
18 import sys | |
19 from bx.cookbook import doc_optparse | |
20 from galaxy.tools.exception_handling import * | |
21 import bx.align.maf | |
22 | |
23 assert sys.version_info[:2] >= ( 2, 4 ) | |
24 | |
25 def main(): | |
26 # Parsing Command Line here | |
27 options, args = doc_optparse.parse( __doc__ ) | |
28 | |
29 try: | |
30 inp_file, out_file1 = args | |
31 except: | |
32 print >> sys.stderr, "Tool initialization error." | |
33 sys.exit() | |
34 | |
35 try: | |
36 fin = open(inp_file,'r') | |
37 except: | |
38 print >> sys.stderr, "Unable to open input file" | |
39 sys.exit() | |
40 try: | |
41 fout1 = open(out_file1,'w') | |
42 #fout2 = open(out_file2,'w') | |
43 except: | |
44 print >> sys.stderr, "Unable to open output file" | |
45 sys.exit() | |
46 | |
47 try: | |
48 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') ) | |
49 except: | |
50 print >>sys.stderr, "Your MAF file appears to be malformed." | |
51 sys.exit() | |
52 maf_count = 0 | |
53 | |
54 print >>fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length" | |
55 for block_ind, block in enumerate(maf_reader): | |
56 if len(block.components) < 2: | |
57 continue | |
58 seq1 = block.components[0].text | |
59 src1 = block.components[0].src | |
60 start1 = block.components[0].start | |
61 if len(block.components) == 2: | |
62 seq2 = block.components[1].text | |
63 src2 = block.components[1].src | |
64 start2 = block.components[1].start | |
65 #for pos in range(len(seq1)): | |
66 nt_pos1 = start1-1 #position of the nucleotide (without counting gaps) | |
67 nt_pos2 = start2-1 | |
68 pos = 0 #character column position | |
69 gaplen1 = 0 | |
70 gaplen2 = 0 | |
71 prev_pos_gap1 = 0 | |
72 prev_pos_gap2 = 0 | |
73 while pos < len(seq1): | |
74 if prev_pos_gap1 == 0: | |
75 gaplen1 = 0 | |
76 if prev_pos_gap2 == 0: | |
77 gaplen2 = 0 | |
78 | |
79 if seq1[pos] == '-': | |
80 if seq2[pos] != '-': | |
81 nt_pos2 += 1 | |
82 gaplen1 += 1 | |
83 prev_pos_gap1 = 1 | |
84 #write 2 | |
85 if prev_pos_gap2 == 1: | |
86 prev_pos_gap2 = 0 | |
87 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1,nt_pos1+1,nt_pos2-1,nt_pos2-1+gaplen2,gaplen2) | |
88 if pos == len(seq1)-1: | |
89 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1,nt_pos1+1,nt_pos2+1-gaplen1,nt_pos2+1,gaplen1) | |
90 else: | |
91 prev_pos_gap1 = 0 | |
92 prev_pos_gap2 = 0 | |
93 """ | |
94 if prev_pos_gap1 == 1: | |
95 prev_pos_gap1 = 0 | |
96 print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,gaplen1) | |
97 elif prev_pos_gap2 == 1: | |
98 prev_pos_gap2 = 0 | |
99 print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos2-1,nt_pos2,gaplen2) | |
100 """ | |
101 else: | |
102 nt_pos1 += 1 | |
103 if seq2[pos] != '-': | |
104 nt_pos2 += 1 | |
105 #write both | |
106 if prev_pos_gap1 == 1: | |
107 prev_pos_gap1 = 0 | |
108 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2-gaplen1,nt_pos2,gaplen1) | |
109 elif prev_pos_gap2 == 1: | |
110 prev_pos_gap2 = 0 | |
111 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1-gaplen2,nt_pos1,nt_pos2-1,nt_pos2,gaplen2) | |
112 else: | |
113 gaplen2 += 1 | |
114 prev_pos_gap2 = 1 | |
115 #write 1 | |
116 if prev_pos_gap1 == 1: | |
117 prev_pos_gap1 = 0 | |
118 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2,nt_pos2+gaplen1,gaplen1) | |
119 if pos == len(seq1)-1: | |
120 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1+1-gaplen2,nt_pos1+1,nt_pos2,nt_pos2+1,gaplen2) | |
121 pos += 1 | |
122 if __name__ == "__main__": | |
123 main() |