Mercurial > repos > devteam > getindels_2way
comparison getIndels.py @ 0:91655316fcf0
Imported from capsule None
| author | devteam |
|---|---|
| date | Tue, 01 Apr 2014 10:52:09 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:91655316fcf0 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 Estimate INDELs for pair-wise alignments. | |
| 5 | |
| 6 usage: %prog maf_input out_file1 out_file2 | |
| 7 """ | |
| 8 | |
| 9 from __future__ import division | |
| 10 import sys | |
| 11 from bx.cookbook import doc_optparse | |
| 12 import bx.align.maf | |
| 13 | |
| 14 assert sys.version_info[:2] >= ( 2, 4 ) | |
| 15 | |
| 16 def main(): | |
| 17 # Parsing Command Line here | |
| 18 options, args = doc_optparse.parse( __doc__ ) | |
| 19 | |
| 20 try: | |
| 21 inp_file, out_file1 = args | |
| 22 except: | |
| 23 print >> sys.stderr, "Tool initialization error." | |
| 24 sys.exit() | |
| 25 | |
| 26 try: | |
| 27 open(inp_file, 'r') | |
| 28 except: | |
| 29 print >> sys.stderr, "Unable to open input file" | |
| 30 sys.exit() | |
| 31 try: | |
| 32 fout1 = open(out_file1, 'w') | |
| 33 #fout2 = open(out_file2, 'w') | |
| 34 except: | |
| 35 print >> sys.stderr, "Unable to open output file" | |
| 36 sys.exit() | |
| 37 | |
| 38 try: | |
| 39 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') ) | |
| 40 except: | |
| 41 print >> sys.stderr, "Your MAF file appears to be malformed." | |
| 42 sys.exit() | |
| 43 | |
| 44 print >> fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length" | |
| 45 for block_ind, block in enumerate(maf_reader): | |
| 46 if len(block.components) < 2: | |
| 47 continue | |
| 48 seq1 = block.components[0].text | |
| 49 src1 = block.components[0].src | |
| 50 start1 = block.components[0].start | |
| 51 if len(block.components) == 2: | |
| 52 seq2 = block.components[1].text | |
| 53 src2 = block.components[1].src | |
| 54 start2 = block.components[1].start | |
| 55 #for pos in range(len(seq1)): | |
| 56 nt_pos1 = start1-1 #position of the nucleotide (without counting gaps) | |
| 57 nt_pos2 = start2-1 | |
| 58 pos = 0 #character column position | |
| 59 gaplen1 = 0 | |
| 60 gaplen2 = 0 | |
| 61 prev_pos_gap1 = 0 | |
| 62 prev_pos_gap2 = 0 | |
| 63 while pos < len(seq1): | |
| 64 if prev_pos_gap1 == 0: | |
| 65 gaplen1 = 0 | |
| 66 if prev_pos_gap2 == 0: | |
| 67 gaplen2 = 0 | |
| 68 | |
| 69 if seq1[pos] == '-': | |
| 70 if seq2[pos] != '-': | |
| 71 nt_pos2 += 1 | |
| 72 gaplen1 += 1 | |
| 73 prev_pos_gap1 = 1 | |
| 74 #write 2 | |
| 75 if prev_pos_gap2 == 1: | |
| 76 prev_pos_gap2 = 0 | |
| 77 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1, nt_pos1+1, nt_pos2-1, nt_pos2-1+gaplen2, gaplen2 ) | |
| 78 if pos == len(seq1)-1: | |
| 79 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1, nt_pos1+1, nt_pos2+1-gaplen1, nt_pos2+1, gaplen1 ) | |
| 80 else: | |
| 81 prev_pos_gap1 = 0 | |
| 82 prev_pos_gap2 = 0 | |
| 83 """ | |
| 84 if prev_pos_gap1 == 1: | |
| 85 prev_pos_gap1 = 0 | |
| 86 print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, gaplen1 ) | |
| 87 elif prev_pos_gap2 == 1: | |
| 88 prev_pos_gap2 = 0 | |
| 89 print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos2-1, nt_pos2, gaplen2 ) | |
| 90 """ | |
| 91 else: | |
| 92 nt_pos1 += 1 | |
| 93 if seq2[pos] != '-': | |
| 94 nt_pos2 += 1 | |
| 95 #write both | |
| 96 if prev_pos_gap1 == 1: | |
| 97 prev_pos_gap1 = 0 | |
| 98 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2-gaplen1, nt_pos2, gaplen1 ) | |
| 99 elif prev_pos_gap2 == 1: | |
| 100 prev_pos_gap2 = 0 | |
| 101 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1-gaplen2, nt_pos1, nt_pos2-1, nt_pos2, gaplen2 ) | |
| 102 else: | |
| 103 gaplen2 += 1 | |
| 104 prev_pos_gap2 = 1 | |
| 105 #write 1 | |
| 106 if prev_pos_gap1 == 1: | |
| 107 prev_pos_gap1 = 0 | |
| 108 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2, nt_pos2+gaplen1, gaplen1 ) | |
| 109 if pos == len(seq1)-1: | |
| 110 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1+1-gaplen2, nt_pos1+1, nt_pos2, nt_pos2+1, gaplen2 ) | |
| 111 pos += 1 | |
| 112 | |
| 113 | |
| 114 if __name__ == "__main__": | |
| 115 main() |
