Mercurial > repos > rreumerman > snptools
comparison trams.py @ 4:bd5692103d5b draft
Uploaded
| author | rreumerman |
|---|---|
| date | Fri, 05 Apr 2013 05:00:40 -0400 |
| parents | |
| children | 8de0ffc2166f |
comparison
equal
deleted
inserted
replaced
| 3:a808647d7312 | 4:bd5692103d5b |
|---|---|
| 1 ################# | |
| 2 transl_table = 11 | |
| 3 intro_message = ''' +------------------------------------------------------------------+ | |
| 4 | Tool for Rapid Annotation of Microbial SNPs (TRAMS): a simple | | |
| 5 | program for rapid annotation of genomic variation in prokaryotes | | |
| 6 | | | |
| 7 | Developed by: Richard A. Reumerman, Paul R. Herron, | | |
| 8 | Paul A. Hoskisson and Vartul Sangal | | |
| 9 +------------------------------------------------------------------+\n''' | |
| 10 ################# | |
| 11 | |
| 12 import sys | |
| 13 import time | |
| 14 start = time.clock() | |
| 15 | |
| 16 # Command line files: SNP REF REF-TYPE ANNOT OVERL SUM; | |
| 17 if len(sys.argv) < 7: | |
| 18 exit("\nNot enough arguments given.\nUsage: TRAMS_Galaxy.py [SNP.] [REF.] [ANNOT.] [OVERL.] [SUM.]") | |
| 19 try: | |
| 20 file_snps = open(sys.argv[1], "rU") | |
| 21 except IOError as e: | |
| 22 exit("Error trying to open '"+sys.argv[1]+"': {1}".format(e.errno, e.strerror)) | |
| 23 try: | |
| 24 file_ref = open(sys.argv[2], "rU") | |
| 25 except IOError as e: | |
| 26 exit("Error trying to open '"+sys.argv[2]+"': {1}".format(e.errno, e.strerror)) | |
| 27 | |
| 28 filetype_reference = sys.argv[3] | |
| 29 | |
| 30 try: | |
| 31 file_out = open(sys.argv[4], "w") | |
| 32 except IOError as e: | |
| 33 exit("Error trying to open '"+sys.argv[4]+"': {1}".format(e.errno, e.strerror)) | |
| 34 try: | |
| 35 file_overlap = open(sys.argv[5], "w") | |
| 36 except IOError as e: | |
| 37 exit("Error trying to open '"+sys.argv[5]+"': {1}".format(e.errno, e.strerror)) | |
| 38 try: | |
| 39 file_summary = open(sys.argv[6], "w") | |
| 40 except IOError as e: | |
| 41 exit("Error trying to open '"+sys.argv[6]+"': {1}".format(e.errno, e.strerror)) | |
| 42 | |
| 43 import Bio | |
| 44 from Bio import SeqIO, SeqFeature | |
| 45 from Bio.SeqRecord import SeqRecord | |
| 46 from Bio.Seq import Seq | |
| 47 from Bio.Alphabet import generic_dna, IUPAC | |
| 48 from Bio.Data import CodonTable | |
| 49 | |
| 50 modules_loaded = time.clock() | |
| 51 | |
| 52 def non_coding_calc(gene, pos = 0): | |
| 53 '''This function takes a pseudogene and returns the number of bases | |
| 54 located in between the sub-features before 'pos'. Returns 0 if 'pseudo' = False. | |
| 55 Input: {start, subfeats, pseudo}, pos (default = 0)''' | |
| 56 if not gene['pseudo']: return 0 | |
| 57 | |
| 58 non_coding_bases = 0 | |
| 59 prev_subfeat_end = gene['start'] | |
| 60 if gene['strand'] == -1: | |
| 61 for subfeature in gene['subfeats']: | |
| 62 if subfeature.location._start.position < pos: | |
| 63 prev_subfeat_end = subfeature.location._end.position | |
| 64 continue | |
| 65 non_coding_bases += (subfeature.location._start.position - prev_subfeat_end) | |
| 66 prev_subfeat_end = subfeature.location._end.position | |
| 67 else: | |
| 68 for subfeature in gene['subfeats']: | |
| 69 non_coding_bases += (subfeature.location._start.position - prev_subfeat_end) | |
| 70 prev_subfeat_end = subfeature.location._end.position | |
| 71 if prev_subfeat_end >= pos and pos != 0: break | |
| 72 | |
| 73 return non_coding_bases | |
| 74 | |
| 75 | |
| 76 def region_calc(bounds,length): | |
| 77 regions = [] | |
| 78 lastend=i=0 | |
| 79 while i < len(bounds): | |
| 80 if bounds[i]['start'] > lastend:# Intergenic region present; | |
| 81 regions.append([lastend,bounds[i]['start'],-1]) | |
| 82 lastend = bounds[i]['start'] | |
| 83 else: | |
| 84 regions.append([bounds[i]['start'],bounds[i]['end'],i]) | |
| 85 if bounds[i]['end'] > lastend: | |
| 86 lastend = bounds[i]['end'] | |
| 87 i += 1 | |
| 88 | |
| 89 if regions[-1][1] < length:# Final tail of genome; | |
| 90 regions.append([lastend,length,-1]) | |
| 91 | |
| 92 return regions | |
| 93 | |
| 94 | |
| 95 def overlap_calc(bounds): | |
| 96 '''This function takes an array of feature starts and ends and | |
| 97 returns an array of starts and ends of all overlapping regions. | |
| 98 Input: [{start,end}]''' | |
| 99 i = 0 | |
| 100 overlaps = [] | |
| 101 while i < len(bounds) - 1: | |
| 102 for downstr in bounds[i+1:]: | |
| 103 if downstr[0] < bounds[i][1]:# Features overlap; | |
| 104 if downstr[1] < bounds[i][1]:# Complete overlap; | |
| 105 overlaps.append([downstr[0],downstr[1],bounds[i][2],downstr[2],[0,0]]) | |
| 106 else:# Partial overlap; | |
| 107 overlaps.append([downstr[0],bounds[i][1],bounds[i][2],downstr[2],[0,0]]) | |
| 108 else:# No use looking further; | |
| 109 break | |
| 110 | |
| 111 i += 1 | |
| 112 | |
| 113 return overlaps | |
| 114 | |
| 115 | |
| 116 def match_feature(bounds,pos,prev=0): | |
| 117 '''This function checks if a position is located inside a feature and | |
| 118 returns the feature's number if found or -1 if none is found. | |
| 119 Input: {start,end},pos,prev_feat (default = 0)''' | |
| 120 for i in range(prev, len(bounds)): | |
| 121 if (pos >= bounds[i]['start']) and (pos < bounds[i]['end']): | |
| 122 return i | |
| 123 elif pos < bounds[i]['start']:# No use looking further | |
| 124 return -1 | |
| 125 | |
| 126 return -1 | |
| 127 | |
| 128 | |
| 129 def write_output(line,target=file_out): | |
| 130 '''This function takes the 2 dimensional array containing all the SNP | |
| 131 data. It contains an array of information on the feature and an array | |
| 132 for each strain for which SNPs are given. | |
| 133 Input: [[pos],[ref],[cells],[cells],etc]''' | |
| 134 target.write('\n'+str(line[0][0])) | |
| 135 for cell in line[1]: | |
| 136 target.write('\t'+str(cell)) | |
| 137 for strain in line[2:]: | |
| 138 target.write('\t') | |
| 139 for cell in strain: | |
| 140 target.write('\t'+str(cell)) | |
| 141 | |
| 142 target.flush() | |
| 143 | |
| 144 | |
| 145 def new_codon_calc(ref_codon, new_base, pos_in_cod): | |
| 146 return str(ref_codon[0:pos_in_cod-1]+new_base+ref_codon[pos_in_cod:len(ref_codon)]) | |
| 147 | |
| 148 | |
| 149 def mut_type_check(ref_res, ref_codon, pos_in_gene, new_base, new_codon): | |
| 150 if str(new_codon).lower() == str(ref_codon).lower(): | |
| 151 return ['','','',''] | |
| 152 new_residue = Seq(new_codon).translate(table=transl_table) | |
| 153 if str(new_residue) == str(ref_res): | |
| 154 mut_type = 'synonymous' | |
| 155 elif (pos_in_gene / 3) < 1 and str(ref_codon).upper() in CodonTable.unambiguous_dna_by_id[transl_table].start_codons:# position 0,1 or 2 and SNP is in start codon; | |
| 156 if str(new_codon).upper() in CodonTable.unambiguous_dna_by_id[transl_table].start_codons: mut_type = 'nonsynonymous' | |
| 157 else: mut_type = 'nonstart' | |
| 158 elif str(new_residue) == '*': mut_type = 'nonsense' | |
| 159 elif str(ref_res) == '*': mut_type = 'nonstop' | |
| 160 else: mut_type = 'nonsynonymous' | |
| 161 | |
| 162 return [mut_type,new_base,new_codon,new_residue] | |
| 163 | |
| 164 | |
| 165 def codon_process(codon): | |
| 166 '''This function processes a codon. It loops through it 3 times, | |
| 167 once to determine which is the highest position mutated, once to | |
| 168 fill in the cells for the output file and once to output all lines. | |
| 169 Input: [empty,start_pos,[line1],[line2],[line3],strand] | |
| 170 It also uses global variable strain_nr''' | |
| 171 lastposition = [-1]*(strain_nr-1) | |
| 172 new_codons = ['']*(strain_nr-1) | |
| 173 if codon[-1] == -1:# Change codon position order for -1 features; | |
| 174 temp = codon [1:-1] | |
| 175 temp.reverse() | |
| 176 codon[1:-1] = temp | |
| 177 for i,line in enumerate(codon[1:-1],1): | |
| 178 if line == '': continue | |
| 179 for j,strain in enumerate(line[2:]): | |
| 180 if strain[1] in ['a','g','c','t']: | |
| 181 lastposition[j] = i | |
| 182 new_codons[j] = codon[i][1][8] | |
| 183 | |
| 184 for i,line in enumerate(codon[1:-1],1): | |
| 185 if codon[-1] == -1: pos_in_cod = 4-i | |
| 186 else: pos_in_cod = i | |
| 187 | |
| 188 if line == '': continue | |
| 189 for j,strain in enumerate(line[2:]): | |
| 190 if i == lastposition[j]: # Check for synonymous etc.; | |
| 191 new_codons[j] = new_codon_calc(new_codons[j],strain[1],pos_in_cod) | |
| 192 codon[i][j+2] = mut_type_check(line[1][9],line[1][8],codon[0],strain[1],new_codons[j]) | |
| 193 straininfo[j][codon[i][j+2][0]] += 1# Counting; | |
| 194 elif strain[1] in ['a','g','c','t']: | |
| 195 codon[i][j+2] = ['MNP',strain[1],'',''] | |
| 196 straininfo[j]['mnps'] += 1 | |
| 197 new_codons[j] = new_codon_calc(new_codons[j],strain[1],pos_in_cod) | |
| 198 elif strain[0] == 'Allele missing': codon[i][j+2] = strain | |
| 199 else: codon[i][j+2] = ['']*4 | |
| 200 | |
| 201 for line in codon[1:-1]: | |
| 202 if line != '': write_output(line) | |
| 203 | |
| 204 def feature_props(feature): | |
| 205 properties = {'type':feature.type,'strand':feature.location._strand, | |
| 206 'sequence':feature.extract(seq_record.seq),'pseudo': False, | |
| 207 'locus_tag':'','gene_name':'','product':'', | |
| 208 'start':int(feature.location._start.position), | |
| 209 'end':int(feature.location._end.position)} | |
| 210 if 'pseudo' in feature.qualifiers: | |
| 211 properties['pseudo'] = True | |
| 212 properties['type'] = 'pseudogene' | |
| 213 properties['pure_seq'] = properties['sequence'] | |
| 214 if properties['strand'] == -1: | |
| 215 properties['sequence'] = seq_record.seq[feature.location._start.position:feature.location._end.position].reverse_complement() | |
| 216 else: | |
| 217 properties['sequence'] = seq_record.seq[feature.location._start.position:feature.location._end.position] | |
| 218 if feature.sub_features: properties['subfeats'] = feature.sub_features | |
| 219 if 'locus_tag' in feature.qualifiers: properties['locus_tag'] = feature.qualifiers['locus_tag'][0] | |
| 220 if 'gene' in feature.qualifiers: properties['gene_name']= feature.qualifiers['gene'][0] | |
| 221 if feature.type in ['tRNA','rRNA','CDS']: properties['product'] = feature.qualifiers['product'][0] | |
| 222 | |
| 223 return properties | |
| 224 | |
| 225 # Read embl/genbank file for information on sequence features; | |
| 226 try: | |
| 227 seq_record = SeqIO.parse(file_ref, filetype_reference).next() | |
| 228 except: | |
| 229 file_ref.close() | |
| 230 quit("Error reading "+sys.argv[2]+", please check file for errors.") | |
| 231 file_ref.close() | |
| 232 | |
| 233 # Loop through genome features and save relevant properties; | |
| 234 feats = []# Dictionary of properties; | |
| 235 | |
| 236 feature_types = {'intergenic':0,'gene':0,'pseudogene':0} | |
| 237 feat_temp_store = '' | |
| 238 for feature in seq_record.features: | |
| 239 # Check if gene is defined as other feature (e.g. CDS). Else, save info from 'gene'; | |
| 240 if feat_temp_store != '': | |
| 241 if (feature.location._start.position == feat_temp_store.location._start.position and | |
| 242 feature.location._end.position == feat_temp_store.location._end.position):# Gene also defined as other feature; | |
| 243 feat_temp_store = '' | |
| 244 else:# Gene not also defined as CDS; | |
| 245 feats.append(feature_props(feat_temp_store)) | |
| 246 feat_temp_store = '' | |
| 247 elif feature.type == 'gene': | |
| 248 feat_temp_store = feature | |
| 249 | |
| 250 if not feature.type in ['source','gene','misc_feature']: | |
| 251 if not feature.type in feature_types and feature.type != 'CDS': feature_types[feature.type] = 0 | |
| 252 feats.append(feature_props(feature)) | |
| 253 | |
| 254 | |
| 255 feat_props = sorted(feats, key=lambda cells:int(cells['start'])) | |
| 256 feat_boundaries = [{'start':item['start'],'end':item['end']} for item in feat_props] | |
| 257 regions = region_calc(feat_boundaries,len(seq_record.seq)) | |
| 258 feat_overlap = overlap_calc(regions) | |
| 259 | |
| 260 reference_loaded = time.clock() | |
| 261 | |
| 262 # Create array of SNPs from input file for processing; | |
| 263 lines = [line.split('\t') for line in file_snps if line.strip()] | |
| 264 file_snps.close() | |
| 265 # First line contains headers, extract number of strains etc; | |
| 266 headers = lines[0] | |
| 267 snp_table = sorted(lines[1:], key=lambda cells:int(cells[0])) | |
| 268 | |
| 269 snps_loaded = time.clock() | |
| 270 | |
| 271 # Print output file headers; | |
| 272 headers[-1] = headers[-1].rstrip()# Remove newline character; | |
| 273 strain_nr = len(headers)-1 | |
| 274 strains_found = 'Found '+str(strain_nr)+' strains: '+headers[1]+' (reference)' | |
| 275 first_line = '\t'+headers[1]+'\t'*9 | |
| 276 second_line = 'Position\tFeature\tLocus tag\tGene\tProduct\tStart\tEnd\tStrand\tRef. base\tRef. codon\tRef. res.' | |
| 277 straininfo = [0]*(len(headers[2:])) | |
| 278 for i,strain in enumerate(headers[2:]): | |
| 279 straininfo[i] = {'snps':0,'mnps':0,'synonymous':0,'nonsynonymous':0,'nonstart':0,'nonstop':0,'nonsense':0} | |
| 280 straininfo[i].update(feature_types) | |
| 281 strains_found += ', '+strain | |
| 282 first_line += '\t\t'+strain+'\t'*3 | |
| 283 second_line += '\t\tSNP type\tNew base\tNew codon\tNew res.' | |
| 284 | |
| 285 file_out.write(first_line+'\n'+second_line) | |
| 286 file_out.flush() | |
| 287 | |
| 288 # Loop through SNPs from array and process them; | |
| 289 props = {}# Properties of a feature; | |
| 290 prev_snp = ''# Position of previous SNP; | |
| 291 to_write = []# Information of current SNP; | |
| 292 compl_bases = {'a':'t','t':'a','g':'c','c':'g'} | |
| 293 firstsnp = True# First snp of region, or of codon in cases of 3 positions in codon mutated; | |
| 294 prev_start=j=k=0 | |
| 295 overlap_snps = [] | |
| 296 codon = ['']*5# Array of codon positions. First item is position of first base of codon in the gene; | |
| 297 | |
| 298 for region in regions: | |
| 299 firstsnp = True | |
| 300 i = prev_start | |
| 301 while i < len(snp_table):# Loop through SNPs | |
| 302 snp_entry = snp_table[i] | |
| 303 if not str(snp_entry[0]).isdigit():# Not a valid line, skip; | |
| 304 i += 1 | |
| 305 continue | |
| 306 | |
| 307 pos = int(snp_entry[0])-1 | |
| 308 if pos < region[0]:# Not inside region yet; | |
| 309 i += 1 | |
| 310 continue | |
| 311 elif firstsnp and pos < region[1]: | |
| 312 prev_start = i | |
| 313 elif pos >= region[1]:# End of region, process and next; | |
| 314 if not firstsnp and codon != ['','','','','']: | |
| 315 codon_process(codon) | |
| 316 break | |
| 317 | |
| 318 # Documentation of SNPs in feature overlaps; | |
| 319 while j < len(feat_overlap)-1 and pos > feat_overlap[j][1]: j += 1 | |
| 320 k = j | |
| 321 while k < len(feat_overlap) and pos >= feat_overlap[k][0]: | |
| 322 if pos < feat_overlap[k][1]: | |
| 323 if feat_overlap[k][4][0] == 0: | |
| 324 feat_overlap[k][4][0] = pos | |
| 325 feat_overlap[k][4][1] = pos | |
| 326 k += 1 | |
| 327 | |
| 328 | |
| 329 snp_entry[-1] = snp_entry[-1].rstrip()# Remove newline character at end of line; | |
| 330 | |
| 331 # Prevent crash upon encounter nonstandard base character; | |
| 332 if snp_entry[1] not in ['a','g','c','t']:# Ref base | |
| 333 i += 1 | |
| 334 continue | |
| 335 for m,base in enumerate(snp_entry[2:],2): | |
| 336 if base.lower() not in ['a','g','c','t']: | |
| 337 snp_entry[m] = '' | |
| 338 # Crash prevented | |
| 339 | |
| 340 mnp=in_feat=False | |
| 341 snp_feat = region[2] | |
| 342 ref_base = snp_entry[1] | |
| 343 | |
| 344 to_write = [[pos+1]] | |
| 345 | |
| 346 # Output feature properties and reference situation; | |
| 347 if snp_feat == -1: | |
| 348 codon = ['']*5 | |
| 349 to_write.append(['intergenic','','','','','','',ref_base.upper(),'','']) | |
| 350 elif feat_props[snp_feat]['type'] not in ['CDS','gene','pseudogene']:# In feature, but non-coding; | |
| 351 codon = ['']*5 | |
| 352 props = feat_props[snp_feat] | |
| 353 if props['strand'] == -1: ref_base = (compl_bases[snp_entry[1].lower()]) | |
| 354 else: ref_base = snp_entry[1] | |
| 355 to_write.append([props['type'],props['locus_tag'],props['gene_name'], | |
| 356 props['product'],props['start']+1,props['end'], | |
| 357 '',ref_base.upper(),'','']) | |
| 358 else:# in CDS/gene feature, check codon etc; | |
| 359 props = feat_props[snp_feat] | |
| 360 sequence = props['sequence'] | |
| 361 if props['strand'] == -1: | |
| 362 pos_in_gene = props['end'] - pos - 1# Python counting | |
| 363 ref_base = (compl_bases[snp_entry[1].lower()]) | |
| 364 else: | |
| 365 pos_in_gene = pos - props['start']# Python counting | |
| 366 ref_base = snp_entry[1] | |
| 367 | |
| 368 in_feat = True | |
| 369 if props['pseudo'] and 'subfeats' in props:# Pseudogene that needs special attention; | |
| 370 in_feat = False | |
| 371 subfeat_boundaries = [{'start':item.location._start.position,'end':item.location._end.position} | |
| 372 for item in props['subfeats']] | |
| 373 snp_subfeat = match_feature(subfeat_boundaries,pos) | |
| 374 if snp_subfeat != -1: | |
| 375 in_feat = True | |
| 376 pos_in_gene -= non_coding_calc({'start':props['start'],'subfeats':props['subfeats'], | |
| 377 'pseudo':True,'strand':props['strand']},pos) | |
| 378 sequence = props['pure_seq'] | |
| 379 | |
| 380 if not in_feat:# In pseudogene non-coding region; | |
| 381 codon = ['']*5 | |
| 382 to_write.append(['non coding',props['locus_tag'],props['gene_name'],props['product'], | |
| 383 props['start']+1,props['end'],props['strand'],ref_base.upper(), | |
| 384 '','']) | |
| 385 else:# In coding region; | |
| 386 pos_in_cod = (pos_in_gene+1)%3 | |
| 387 if pos_in_cod == 0: pos_in_cod = 3# Remainder of division 0 means 3rd place in codon; | |
| 388 | |
| 389 old_codon = sequence[pos_in_gene-pos_in_cod+1:pos_in_gene-pos_in_cod+4].upper() | |
| 390 old_residue = old_codon.translate(table=transl_table) | |
| 391 to_write.append([props['type'],props['locus_tag'],props['gene_name'],props['product'], | |
| 392 props['start']+1,props['end'],props['strand'],ref_base.upper(), | |
| 393 old_codon,old_residue]) | |
| 394 | |
| 395 if in_feat and not firstsnp and (pos >= prev_snp):# Check if snp is in same codon as previous snp. Position check for overlapping features; | |
| 396 if props['strand'] == 1 and (pos - prev_snp + 1) < pos_in_cod:# Same codon (Positive strand); | |
| 397 mnp = True | |
| 398 elif props['strand'] == -1 and (pos - prev_snp + 1) <= (3 - pos_in_cod):# Same codon (negative strand); | |
| 399 mnp = True | |
| 400 | |
| 401 # Process previous codon if not MNP; | |
| 402 if in_feat and not mnp: | |
| 403 if not firstsnp: | |
| 404 codon_process(codon) | |
| 405 codon = [pos_in_gene-pos_in_cod+1,'','','',props['strand']] | |
| 406 | |
| 407 | |
| 408 for l, snp in enumerate(snp_entry[2:]):# Loop through SNPs/strains; | |
| 409 | |
| 410 snp = snp.lower() | |
| 411 if snp == '':# Empty cell; | |
| 412 to_write.append(['','','','']) | |
| 413 continue | |
| 414 | |
| 415 if snp == '-': # Feature not present in this strain; | |
| 416 to_write.append(['Allele missing','','','']) | |
| 417 continue | |
| 418 | |
| 419 if snp_feat == -1:# Intergenic; | |
| 420 if snp == ref_base.lower(): | |
| 421 to_write.append(['']*4) | |
| 422 else: | |
| 423 to_write.append(['',snp,'','']) | |
| 424 straininfo[l]['intergenic'] += 1 | |
| 425 straininfo[l]['snps'] += 1 | |
| 426 continue | |
| 427 | |
| 428 if props['strand'] == -1: | |
| 429 snp = compl_bases[snp] | |
| 430 | |
| 431 if snp == ref_base.lower(): | |
| 432 to_write.append(['']*4) | |
| 433 else: | |
| 434 to_write.append(['',snp,'','']) | |
| 435 straininfo[l]['snps'] += 1 | |
| 436 if props['type'] != 'CDS': | |
| 437 straininfo[l][props['type']] += 1 | |
| 438 | |
| 439 | |
| 440 | |
| 441 if props['type'] in ['CDS','gene','pseudogene'] and in_feat: | |
| 442 codon[pos_in_cod] = to_write | |
| 443 else: | |
| 444 write_output(to_write) | |
| 445 | |
| 446 if firstsnp: firstsnp = False | |
| 447 prev_snp = pos+1 | |
| 448 i += 1 | |
| 449 | |
| 450 | |
| 451 if codon != ['','','','','']: codon_process(codon) | |
| 452 | |
| 453 file_out.close() | |
| 454 | |
| 455 end = time.clock() | |
| 456 | |
| 457 file_summary.write("\n") | |
| 458 file_summary.write(intro_message) | |
| 459 file_summary.write('\n'+strains_found+'.\n') | |
| 460 | |
| 461 file_summary.write("\nFinished annotation. Total time: %s s\n\n" % round(end-start,1)) | |
| 462 | |
| 463 | |
| 464 file_overlap.write('SNP start\tSNP end\tFeature 1\tLocus tag\tProduct\t\tFeature 2\tLocus tag\tProduct') | |
| 465 for overlap in feat_overlap: | |
| 466 if overlap[4] != [0,0]: | |
| 467 overlap[4][0]+=1 | |
| 468 overlap[4][1]+=1 | |
| 469 if overlap[4][0] == overlap[4][1]: overlap[4][1] = '' | |
| 470 write_output([[str(overlap[4][0])],[str(overlap[4][1]),feat_props[overlap[2]]['type'],feat_props[overlap[2]]['locus_tag'],feat_props[overlap[2]]['product']], | |
| 471 [feat_props[overlap[3]]['type'],feat_props[overlap[3]]['locus_tag'],feat_props[overlap[3]]['product']]], | |
| 472 file_overlap) | |
| 473 | |
| 474 | |
| 475 for i,strain in enumerate(headers[2:]): | |
| 476 file_summary.write("\n") | |
| 477 info = straininfo[i] | |
| 478 file_summary.write("+ Strain %s:\n" % strain) | |
| 479 file_summary.write(" %s SNPs found\n" % info['snps']) | |
| 480 file_summary.write(" Number of SNPs found CDS features: %s\n" % (info['mnps']+info['nonstart']+info['nonstop']+info['nonsense']+ | |
| 481 info['synonymous']+info['nonsynonymous'])) | |
| 482 file_summary.write(" (of which in pseudogenes: %s)\n" % info['pseudogene']) | |
| 483 file_summary.write(" - MNPs: %s\n" % info['mnps']) | |
| 484 file_summary.write(" - Synonymous: %s\n" % info['synonymous']) | |
| 485 file_summary.write(" - Nonsynonymous: %s\n" % info['nonsynonymous']) | |
| 486 file_summary.write(" - Nonsense: %s\n" % info['nonsense']) | |
| 487 file_summary.write(" - Nonstart: %s\n" % info['nonstart']) | |
| 488 file_summary.write(" - Nonstop: %s\n" % info['nonstop']) | |
| 489 file_summary.write(" Intergenic: %s\n" % info['intergenic']) | |
| 490 | |
| 491 for typ in feature_types: | |
| 492 if typ not in ['intergenic','pseudogene'] and info[typ] != 0: | |
| 493 file_summary.write(" %s: %s\n" % (typ,info[typ])) | |
| 494 file_summary.flush() | |
| 495 | |
| 496 file_overlap.close() | |
| 497 file_summary.close() |
