Mercurial > repos > devteam > getindelrates_3way
comparison getIndelRates_3way.py @ 0:d427e5acb9ee draft default tip
Imported from capsule None
| author | devteam |
|---|---|
| date | Tue, 01 Apr 2014 10:52:01 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d427e5acb9ee |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 #Guruprasad Ananda | |
| 3 | |
| 4 import sys, os, tempfile | |
| 5 import fileinput | |
| 6 from warnings import warn | |
| 7 | |
| 8 from galaxy.tools.util.galaxyops import * | |
| 9 from bx.intervals.io import * | |
| 10 | |
| 11 from bx.intervals.operations import quicksect | |
| 12 | |
| 13 def stop_err(msg): | |
| 14 sys.stderr.write(msg) | |
| 15 sys.exit() | |
| 16 | |
| 17 | |
| 18 def counter(node, start, end, sort_col): | |
| 19 global full, blk_len, blk_list | |
| 20 if node.start < start: | |
| 21 if node.right: | |
| 22 counter(node.right, start, end, sort_col) | |
| 23 elif start <= node.start <= end and start <= node.end <= end: | |
| 24 full += 1 | |
| 25 if node.other[0] not in blk_list: | |
| 26 blk_list.append(node.other[0]) | |
| 27 blk_len += int(node.other[sort_col+2]) | |
| 28 if node.left and node.left.maxend > start: | |
| 29 counter(node.left, start, end, sort_col) | |
| 30 if node.right: | |
| 31 counter(node.right, start, end, sort_col) | |
| 32 elif node.start > end: | |
| 33 if node.left: | |
| 34 counter(node.left, start, end, sort_col) | |
| 35 | |
| 36 | |
| 37 infile = sys.argv[1] | |
| 38 fout = open(sys.argv[2],'w') | |
| 39 int_file = sys.argv[3] | |
| 40 if int_file != "None": #User has specified an interval file | |
| 41 try: | |
| 42 fint = open(int_file, 'r') | |
| 43 dbkey_i = sys.argv[4] | |
| 44 chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] ) | |
| 45 except: | |
| 46 stop_err("Unable to open input Interval file") | |
| 47 | |
| 48 | |
| 49 def main(): | |
| 50 for i, line in enumerate( file ( infile )): | |
| 51 line = line.rstrip('\r\n') | |
| 52 if len( line )>0 and not line.startswith( '#' ): | |
| 53 elems = line.split( '\t' ) | |
| 54 break | |
| 55 if i == 30: | |
| 56 break # Hopefully we'll never get here... | |
| 57 | |
| 58 if len( elems ) != 18: | |
| 59 stop_err( "This tool only works on tabular data output by 'Fetch Indels from 3-way alignments' tool. The data in your input dataset is either missing or not formatted properly." ) | |
| 60 | |
| 61 for i, line in enumerate( file ( infile )): | |
| 62 line = line.rstrip('\r\n') | |
| 63 elems = line.split('\t') | |
| 64 try: | |
| 65 assert int(elems[0]) | |
| 66 assert len(elems) == 18 | |
| 67 if int_file != "None": | |
| 68 if dbkey_i not in elems[3] and dbkey_i not in elems[8] and dbkey_i not in elems[13]: | |
| 69 stop_err("The species build corresponding to your interval file is not present in the Indel file.") | |
| 70 if dbkey_i in elems[3]: | |
| 71 sort_col = 4 | |
| 72 elif dbkey_i in elems[8]: | |
| 73 sort_col = 9 | |
| 74 elif dbkey_i in elems[13]: | |
| 75 sort_col = 14 | |
| 76 else: | |
| 77 species = [] | |
| 78 species.append( elems[3].split('.')[0] ) | |
| 79 species.append( elems[8].split('.')[0] ) | |
| 80 species.append( elems[13].split('.')[0] ) | |
| 81 sort_col = 0 #Based on block numbers | |
| 82 break | |
| 83 except: | |
| 84 continue | |
| 85 | |
| 86 fin = open(infile, 'r') | |
| 87 skipped = 0 | |
| 88 | |
| 89 if int_file == "None": | |
| 90 sorted_infile = tempfile.NamedTemporaryFile() | |
| 91 cmdline = "sort -n -k"+str(1)+" -o "+sorted_infile.name+" "+infile | |
| 92 try: | |
| 93 os.system(cmdline) | |
| 94 except: | |
| 95 stop_err("Encountered error while sorting the input file.") | |
| 96 print >> fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" % ( species[0], species[1], species[2], species[0], species[1], species[2] ) | |
| 97 prev_bnum = -1 | |
| 98 sorted_infile.seek(0) | |
| 99 for line in sorted_infile.readlines(): | |
| 100 line = line.rstrip('\r\n') | |
| 101 elems = line.split('\t') | |
| 102 try: | |
| 103 assert int(elems[0]) | |
| 104 assert len(elems) == 18 | |
| 105 new_bnum = int(elems[0]) | |
| 106 if new_bnum != prev_bnum: | |
| 107 if prev_bnum != -1: | |
| 108 irate = [] | |
| 109 drate = [] | |
| 110 for i, elem in enumerate(inserts): | |
| 111 try: | |
| 112 irate.append(str("%.2e" % (inserts[i]/blen[i]))) | |
| 113 except: | |
| 114 irate.append('0') | |
| 115 try: | |
| 116 drate.append(str("%.2e" % (deletes[i]/blen[i]))) | |
| 117 except: | |
| 118 drate.append('0') | |
| 119 print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) ) | |
| 120 inserts = [0.0, 0.0, 0.0] | |
| 121 deletes = [0.0, 0.0, 0.0] | |
| 122 blen = [] | |
| 123 blen.append( int(elems[6]) ) | |
| 124 blen.append( int(elems[11]) ) | |
| 125 blen.append( int(elems[16]) ) | |
| 126 line_sp = elems[1].split('.')[0] | |
| 127 sp_ind = species.index(line_sp) | |
| 128 if elems[1].endswith('insert'): | |
| 129 inserts[sp_ind] += 1 | |
| 130 elif elems[1].endswith('delete'): | |
| 131 deletes[sp_ind] += 1 | |
| 132 prev_bnum = new_bnum | |
| 133 except Exception, ei: | |
| 134 #print >>sys.stderr, ei | |
| 135 continue | |
| 136 irate = [] | |
| 137 drate = [] | |
| 138 for i, elem in enumerate(inserts): | |
| 139 try: | |
| 140 irate.append(str("%.2e" % (inserts[i]/blen[i]))) | |
| 141 except: | |
| 142 irate.append('0') | |
| 143 try: | |
| 144 drate.append(str("%.2e" % (deletes[i]/blen[i]))) | |
| 145 except: | |
| 146 drate.append('0') | |
| 147 print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) ) | |
| 148 sys.exit() | |
| 149 | |
| 150 inf = open(infile, 'r') | |
| 151 start_met = False | |
| 152 end_met = False | |
| 153 sp_file = tempfile.NamedTemporaryFile() | |
| 154 for n, line in enumerate(inf): | |
| 155 line = line.rstrip('\r\n') | |
| 156 elems = line.split('\t') | |
| 157 try: | |
| 158 assert int(elems[0]) | |
| 159 assert len(elems) == 18 | |
| 160 if dbkey_i not in elems[1]: | |
| 161 if not(start_met): | |
| 162 continue | |
| 163 else: | |
| 164 sp_end = n | |
| 165 break | |
| 166 else: | |
| 167 print >> sp_file, line | |
| 168 if not(start_met): | |
| 169 start_met = True | |
| 170 sp_start = n | |
| 171 except: | |
| 172 continue | |
| 173 | |
| 174 try: | |
| 175 assert sp_end | |
| 176 except: | |
| 177 sp_end = n+1 | |
| 178 | |
| 179 sp_file.seek(0) | |
| 180 win = NiceReaderWrapper( fileinput.FileInput( int_file ), | |
| 181 chrom_col=chr_col_i, | |
| 182 start_col=start_col_i, | |
| 183 end_col=end_col_i, | |
| 184 strand_col=strand_col_i, | |
| 185 fix_strand=True) | |
| 186 | |
| 187 indel = NiceReaderWrapper( fileinput.FileInput( sp_file.name ), | |
| 188 chrom_col=1, | |
| 189 start_col=sort_col, | |
| 190 end_col=sort_col+1, | |
| 191 strand_col=-1, | |
| 192 fix_strand=True) | |
| 193 | |
| 194 indelTree = quicksect.IntervalTree() | |
| 195 for item in indel: | |
| 196 if type( item ) is GenomicInterval: | |
| 197 indelTree.insert( item, indel.linenum, item.fields ) | |
| 198 result = [] | |
| 199 | |
| 200 global full, blk_len, blk_list | |
| 201 for interval in win: | |
| 202 if type( interval ) is Header: | |
| 203 pass | |
| 204 if type( interval ) is Comment: | |
| 205 pass | |
| 206 elif type( interval ) == GenomicInterval: | |
| 207 chrom = interval.chrom | |
| 208 start = int(interval.start) | |
| 209 end = int(interval.end) | |
| 210 if start > end: | |
| 211 warn( "Interval start after end!" ) | |
| 212 ins_chr = "%s.%s_insert" % ( dbkey_i, chrom ) | |
| 213 del_chr = "%s.%s_delete" % ( dbkey_i, chrom ) | |
| 214 irate = 0 | |
| 215 drate = 0 | |
| 216 if ins_chr not in indelTree.chroms and del_chr not in indelTree.chroms: | |
| 217 pass | |
| 218 else: | |
| 219 if ins_chr in indelTree.chroms: | |
| 220 full = 0.0 | |
| 221 blk_len = 0 | |
| 222 blk_list = [] | |
| 223 root = indelTree.chroms[ins_chr] #root node for the chrom insertion tree | |
| 224 counter(root, start, end, sort_col) | |
| 225 if blk_len: | |
| 226 irate = full/blk_len | |
| 227 | |
| 228 if del_chr in indelTree.chroms: | |
| 229 full = 0.0 | |
| 230 blk_len = 0 | |
| 231 blk_list = [] | |
| 232 root = indelTree.chroms[del_chr] #root node for the chrom insertion tree | |
| 233 counter(root, start, end, sort_col) | |
| 234 if blk_len: | |
| 235 drate = full/blk_len | |
| 236 | |
| 237 interval.fields.append(str("%.2e" %irate)) | |
| 238 interval.fields.append(str("%.2e" %drate)) | |
| 239 print >> fout, "\t".join(interval.fields) | |
| 240 fout.flush() | |
| 241 | |
| 242 | |
| 243 if __name__ == "__main__": | |
| 244 main() |
