9
|
1 # A translation of calc_fitness.pl into python! For analysis of Tn-Seq.
|
|
2 # This script requires BioPython, which in turn has a good number of dependencies (some optional but very helpful).
|
|
3 # How to install BioPython and a list of its dependencies can be found here: http://biopython.org/DIST/docs/install/Installation.html
|
|
4 # K. McCoy
|
|
5
|
|
6
|
|
7
|
|
8
|
|
9
|
|
10
|
|
11
|
|
12
|
|
13
|
|
14 ##### ARGUMENTS #####
|
|
15
|
|
16 def print_usage():
|
|
17 print "\n" + "You are missing one or more required flags. A complete list of flags accepted by calc_fitness is as follows:" + "\n\n"
|
|
18 print "\033[1m" + "Required" + "\033[0m" + "\n"
|
|
19 print "-ref" + "\t\t" + "The name of the reference genome file, in GenBank format." + "\n"
|
|
20 print "-t1" + "\t\t" + "The name of the bowtie mapfile from time 1." + "\n"
|
|
21 print "-t2" + "\t\t" + "The name of the bowtie mapfile from time 2." + "\n"
|
|
22 print "-out" + "\t\t" + "Name of a file to enter the .csv output." + "\n"
|
|
23 print "\n"
|
|
24 print "\033[1m" + "Optional" + "\033[0m" + "\n"
|
|
25 print "-expansion" + "\t\t" + "Expansion factor (default: 250)" + "\n"
|
|
26 print "-reads1" + "\t\t" + "The number of reads to be used to calculate the correction factor for time 0." + "\n\t\t" + "(default counted from bowtie output)" + "\n"
|
|
27 print "-reads2" + "\t\t" + "The number of reads to be used to calculate the correction factor for time 6." + "\n\t\t" + "(default counted from bowtie output)" + "\n"
|
|
28 print "-cutoff" + "\t\t" + "Discard any positions where the average of counted transcripts at time 0 and time 1 is below this number (default 0)" + "\n"
|
|
29 print "-cutoff2" + "\t\t" + "Discard any positions within the normalization genes where the average of counted transcripts at time 0 and time 1 is below this number (default 10)" + "\n"
|
|
30 print "-strand" + "\t\t" + "Use only the specified strand (+ or -) when counting transcripts (default: both)" + "\n"
|
|
31 print "-normalize" + "\t" + "A file that contains a list of genes that should have a fitness of 1 - used for normalization and bottleneck calculations." + "\n"
|
|
32 print "-b" + "\t" + "Calculate bottleneck value (the percentage of insertions randomly lost) from all genes (rather than only normalization genes)" + "\n"
|
|
33 print "-maxweight" + "\t" + "The maximum weight a transposon gene can have in normalization calculations" + "\n"
|
|
34 print "-multiply" + "\t" + "Multiply all fitness scores by a certain value (e.g., the fitness of a knockout). You should normalize the data." + "\n"
|
|
35 print "-ef" + "\t\t" + "Exclude insertions that occur in the first N amount (%) of gene--becuase may not affect gene function." + "\n"
|
|
36 print "-el" + "\t\t" + "Exclude insertions in the last N amount (%) of the gene--considering truncation may not affect gene function." + "\n"
|
|
37 print "-wig" + "\t\t" + "Create a wiggle file for viewing in a genome browser. Provide a filename." + "\n"
|
|
38 print "-uncol" + "\t\t" + "Use if reads were uncollapsed when mapped." + "\n"
|
|
39 print "\n"
|
|
40
|
|
41 import argparse
|
|
42 parser = argparse.ArgumentParser()
|
|
43 parser.add_argument("-ref", action="store", dest="ref_genome")
|
|
44 parser.add_argument("-t1", action="store", dest="mapfile1")
|
|
45 parser.add_argument("-t2", action="store", dest="mapfile2")
|
|
46 parser.add_argument("-out", action="store", dest="outfile")
|
|
47 parser.add_argument("-out2", action="store", dest="outfile2")
|
|
48 parser.add_argument("-expansion", action="store", dest="expansion_factor")
|
|
49 parser.add_argument("-reads1", action="store", dest="reads1")
|
|
50 parser.add_argument("-reads2", action="store", dest="reads2")
|
|
51 parser.add_argument("-cutoff", action="store", dest="cutoff")
|
|
52 parser.add_argument("-cutoff2", action="store", dest="cutoff2")
|
|
53 parser.add_argument("-strand", action="store", dest="usestrand")
|
|
54 parser.add_argument("-normalize", action="store", dest="normalize")
|
|
55 parser.add_argument("-b", action="store", dest="bottleall")
|
|
56 parser.add_argument("-maxweight", action="store", dest="max_weight")
|
|
57 parser.add_argument("-multiply", action="store", dest="multiply")
|
|
58 parser.add_argument("-ef", action="store", dest="exclude_first")
|
|
59 parser.add_argument("-el", action="store", dest="exclude_last")
|
|
60 parser.add_argument("-wig", action="store", dest="wig")
|
|
61 parser.add_argument("-uncol", action="store", dest="uncol")
|
|
62 arguments = parser.parse_args()
|
|
63
|
|
64 if (not arguments.ref_genome or not arguments.mapfile1 or not arguments.mapfile2 or not arguments.outfile):
|
|
65 print_usage()
|
|
66 quit()
|
|
67
|
|
68 # Sets the default value of the expansion factor to 250, which is a trivial placeholder number.
|
|
69
|
|
70 if (not arguments.expansion_factor):
|
|
71 arguments.expansion_factor = 250
|
|
72
|
|
73 # 75 is similarly trivial
|
|
74
|
|
75 if (not arguments.max_weight):
|
|
76 arguments.max_weight = 75
|
|
77
|
|
78 # Sets the default value of cutoff to 0; cutoff exists to discard positions with a low number of counted transcripts, because fitnesses calculated from them may not be very accurate, by the same reasoning that studies with low sample sizes are innacurate.
|
|
79
|
|
80 if (not arguments.cutoff):
|
|
81 arguments.cutoff = 0
|
|
82
|
|
83 # Sets the default value of cutoff2 to 10; cutoff2 exists to discard positions within normalization genes with a low number of counted transcripts, because fitnesses calculated from them similarly may not be very accurate.
|
|
84 # This only has an effect if it's larger than cutoff, since the normalization step references a list of insertions already affected by cutoff.
|
|
85
|
|
86 if (not arguments.cutoff2):
|
|
87 arguments.cutoff2 = 10
|
|
88
|
|
89 if (not arguments.usestrand):
|
|
90 arguments.usestrand = "both"
|
|
91
|
|
92
|
|
93
|
|
94
|
|
95
|
|
96
|
|
97 ##### PARSING THE REFERENCE GENOME #####
|
|
98
|
|
99 def get_time():
|
|
100 import datetime
|
|
101 return datetime.datetime.now().time()
|
|
102 print "\n" + "Starting: " + str(get_time()) + "\n"
|
|
103
|
|
104 from Bio import SeqIO
|
|
105 import os.path
|
|
106 handle = open(arguments.ref_genome, "rU")
|
|
107 for record in SeqIO.parse(handle, "genbank"):
|
|
108 refname = record.id
|
|
109 features = record.features
|
|
110 handle.close()
|
|
111
|
|
112 # Makes a dictionary out of each feature that's a gene - with its gene name, start location, end location, and strand as keys to their values. Then makes a list out of all those dictionaries for ease of accessing later on.
|
|
113
|
|
114 feature_list = []
|
|
115 for feature in features:
|
|
116 if feature.type == "gene":
|
|
117 gene = feature.qualifiers["locus_tag"]
|
|
118 strand = feature.location.strand
|
|
119 start = float(feature.location.start)
|
|
120 end = float(feature.location.end)
|
|
121
|
|
122 # Exclude_first and exclude_last are used here to exclude whatever percentage of the genes you like from calculations; e.g. a value of 0.1 for exclude_last would exclude the last 10% of all genes!
|
|
123 # This can be useful because insertions at the very start or end of genes often don't actually break its function.
|
|
124
|
|
125 if (arguments.exclude_first):
|
|
126 start += (end - start) * float(arguments.exclude_first)
|
|
127 if (arguments.exclude_last):
|
|
128 end -= (end - start) * float(arguments.exclude_last)
|
|
129 feature_dictionary = {"gene": gene, "start": start, "end": end, "strand": strand}
|
|
130 feature_list.append(feature_dictionary)
|
|
131
|
|
132 print "Done generating feature lookup: " + str(get_time()) + "\n"
|
|
133
|
|
134
|
|
135
|
|
136
|
|
137
|
|
138
|
|
139
|
|
140
|
|
141
|
|
142
|
|
143 ##### PARSING THE MAPFILES #####
|
|
144
|
|
145 with open(arguments.mapfile1) as file:
|
|
146 r1 = file.readlines()
|
|
147 with open(arguments.mapfile2) as file:
|
|
148 r2 = file.readlines()
|
|
149
|
|
150 # When called, goes through each line of the mapfile to find the strand (+/Watson or -/Crick), count, and position of the read. It may be helpful to look at how the mapfiles are formatted to understand how this code finds them.
|
|
151
|
|
152 def read_mapfile(reads):
|
|
153 plus_total = 0
|
|
154 minus_total = 0
|
|
155 plus_counts = {"total": 0, "sites": 0}
|
|
156 minus_counts = {"total": 0, "sites": 0}
|
|
157 for read in reads:
|
|
158 if (arguments.uncol):
|
|
159 strand = read.split()[2]
|
|
160 count = 1
|
|
161 position = float(read.split()[4])
|
|
162 if arguments.usestrand != "both" and strand != arguments.usestrand:
|
|
163 continue
|
|
164 if (strand == "+"):
|
|
165 sequence_length = len(read.split()[5])
|
|
166 position += (sequence_length - 2)
|
|
167 plus_counts["total"] += count
|
|
168 plus_counts["sites"] += 1
|
|
169 if position in plus_counts:
|
|
170 plus_counts[position] += count
|
|
171 else:
|
|
172 plus_counts[position] = count
|
|
173 else:
|
|
174 minus_counts["total"] += count
|
|
175 minus_counts["sites"] += 1
|
|
176 if position in minus_counts:
|
|
177 minus_counts[position] += count
|
|
178 else:
|
|
179 minus_counts[position] = count
|
|
180 else:
|
|
181 if "-" in read.split()[0]:
|
|
182 strand = read.split()[1]
|
|
183 count = float(read.split()[0].split("-")[1])
|
|
184 position = float(read.split()[3])
|
|
185 else:
|
|
186 continue
|
|
187
|
|
188 # If for some reason you want to skip all reads from one of the strands - for example, if you wanted to compare the two strands - that's done here.
|
|
189
|
|
190 if arguments.usestrand != "both" and strand != arguments.usestrand:
|
|
191 continue
|
|
192
|
|
193 # Makes dictionaries for the + & - strands, with each insert position as a key and the number of insertions there as its corresponding value.
|
|
194
|
|
195 if (strand == "+"):
|
|
196 sequence_length = len(read.split()[4])
|
|
197
|
|
198 # The -2 in "(sequence_length -2)" comes from a fake "TA" in the read; see how the libraries are constructed for further on this
|
|
199
|
|
200 position += (sequence_length - 2)
|
|
201 plus_counts["total"] += count
|
|
202 plus_counts["sites"] += 1
|
|
203 if position in plus_counts:
|
|
204 plus_counts[position] += count
|
|
205 else:
|
|
206 plus_counts[position] = count
|
|
207 else:
|
|
208 minus_counts["total"] += count
|
|
209 minus_counts["sites"] += 1
|
|
210 if position in minus_counts:
|
|
211 minus_counts[position] += count
|
|
212 else:
|
|
213 minus_counts[position] = count
|
|
214 return (plus_counts, minus_counts)
|
|
215
|
|
216 # Calls read_mapfile(reads) to parse arguments.reads1 and arguments.reads2 (your reads from t1 and t2).
|
|
217
|
|
218
|
|
219
|
|
220
|
|
221
|
|
222 (plus_ref_1, minus_ref_1) = read_mapfile(r1)
|
|
223 print "Read first file: " + str(get_time()) + "\n"
|
|
224 (plus_ref_2, minus_ref_2) = read_mapfile(r2)
|
|
225 print "Read second file: " + str(get_time()) + "\n"
|
|
226
|
|
227 # The lines below are just printed for reference. The number of sites is the length of a given dictionary of sites - 1 because its last key, "total", isn't actually a site.
|
|
228
|
|
229 print "Reads:" + "\n"
|
|
230 print "1: + " + str(plus_ref_1["total"]) + " - " + str(minus_ref_1["total"]) + "\n"
|
|
231 print "2: + " + str(plus_ref_2["total"]) + " - " + str(minus_ref_2["total"]) + "\n"
|
|
232 print "Sites:" + "\n"
|
|
233 print "1: + " + str(plus_ref_1["sites"]) + " - " + str(minus_ref_1["sites"]) + "\n"
|
|
234 print "2: + " + str(plus_ref_2["sites"]) + " - " + str(minus_ref_2["sites"]) + "\n"
|
|
235
|
|
236
|
|
237
|
|
238
|
|
239
|
|
240
|
|
241
|
|
242
|
|
243
|
|
244
|
|
245 ##### FITNESS CALCULATIONS #####
|
|
246
|
|
247 # If reads1 and reads2 weren't specified in the command line, sets them as the total number of reads (found in read_mapfile())
|
|
248
|
|
249 if not arguments.reads1:
|
|
250 arguments.reads1 = plus_ref_1["total"] + minus_ref_1["total"]
|
|
251 if not arguments.reads2:
|
|
252 arguments.reads2 = plus_ref_2["total"] + minus_ref_2["total"]
|
|
253
|
|
254 # Calculates the correction factors for reads from t1 and t2; cfactor1 and cfactor2 are the number of reads from t1 and t2 respectively divided by total, which is the average number of reads between the two.
|
|
255 # This is used later on to correct for pipetting errors, or any other error that would cause unequal amounts of DNA from t1 and t2 to be sequenced so that an unequal amount of reads is produced
|
|
256
|
|
257 total = (float(arguments.reads1) + float(arguments.reads2))/2
|
|
258 cfactor1 = float(arguments.reads1)/total
|
|
259 cfactor2 = float(arguments.reads2)/total
|
|
260 print "Cfactor 1: " + str(cfactor1) + "\n"
|
|
261 print "Cfactor 2: " + str(cfactor2) + "\n"
|
|
262 import math
|
|
263 import csv
|
|
264 results = [["position", "strand", "count_1", "count_2", "ratio", "mt_freq_t1", "mt_freq_t2", "pop_freq_t1", "pop_freq_t2", "gene", "D", "W", "nW"]]
|
|
265 genic = 0
|
|
266 total_inserts = 0
|
|
267 with open(arguments.ref_genome, "r") as file:
|
|
268 firstline = file.readline()
|
|
269 genomelength = firstline.split()[2]
|
|
270 i = 0
|
|
271 while i < float(genomelength):
|
|
272
|
|
273 # At each possible location for an insertion in the genome, counts the number of actual insertions at t1 and which strand(s) the corresponding reads came from.
|
|
274
|
|
275 c1 = 0
|
|
276 if i in plus_ref_1:
|
|
277 c1 = float(plus_ref_1[i])
|
|
278 strand = "+/"
|
|
279 if i in minus_ref_1:
|
|
280 c1 += float(minus_ref_1[i])
|
|
281 strand = "b/"
|
|
282 elif i in minus_ref_1:
|
|
283 c1 = float(minus_ref_1[i])
|
|
284 strand = "-/"
|
|
285
|
|
286 # If there were no insertions at a certain location at t1 just continues to the next location; there can't be any comparison to make between t1 and t2 if there are no t1 insertions!
|
|
287
|
|
288 else:
|
|
289 i += 1
|
|
290 continue
|
|
291
|
|
292 # At each location where there was an insertion at t1, counts the number of insertions at t2 and which strand(s) the corresponding reads came from.
|
|
293
|
|
294 c2 = 0
|
|
295 if i in plus_ref_2:
|
|
296 c2 = float(plus_ref_2[i])
|
|
297 if i in minus_ref_2:
|
|
298 c2 += float(minus_ref_2[i])
|
|
299 strand += "b"
|
|
300 else:
|
|
301 strand += "+"
|
|
302 elif i in minus_ref_2:
|
|
303 c2 = float(minus_ref_2[i])
|
|
304 strand += "-"
|
|
305
|
|
306 # Corrects with cfactor1 and cfactor2
|
|
307
|
|
308 c1 /= cfactor1
|
|
309 if c2 != 0:
|
|
310 c2 /= cfactor2
|
|
311 ratio = c2/c1
|
|
312 else:
|
|
313 c2 = 0
|
|
314 ratio = 0
|
|
315
|
|
316 # Passes by all insertions with a number of reads smaller than the cutoff, as they may lead to inaccurate fitness calculations.
|
|
317
|
|
318 if (c1 + c2)/2 < float(arguments.cutoff):
|
|
319 i+= 1
|
|
320 continue
|
|
321
|
|
322 # Calculates each insertion's frequency within the populations at t1 and t2.
|
|
323
|
|
324 mt_freq_t1 = c1/total
|
|
325 mt_freq_t2 = c2/total
|
|
326 pop_freq_t1 = 1 - mt_freq_t1
|
|
327 pop_freq_t2 = 1 - mt_freq_t2
|
|
328
|
|
329 # Calculates each insertion's fitness! This is from the fitness equation log((frequency of mutation @ time 2 / frequency of mutation @ time 1)*expansion factor)/log((frequency of population without the mutation @ time 2 / frequency of population without the mutation @ time 1)*expansion factor)
|
|
330
|
|
331 w = 0
|
|
332 if mt_freq_t2 != 0:
|
|
333 top_w = math.log(mt_freq_t2*(float(arguments.expansion_factor)/mt_freq_t1))
|
|
334 bot_w = math.log(pop_freq_t2*(float(arguments.expansion_factor)/pop_freq_t1))
|
|
335 w = top_w/bot_w
|
|
336
|
|
337 # Checks which gene locus the insertion falls within, and records that.
|
|
338
|
|
339 gene = ''
|
|
340 for feature_dictionary in feature_list:
|
|
341 if feature_dictionary["start"] <= i and feature_dictionary["end"] >= i:
|
|
342 gene = "".join(feature_dictionary["gene"])
|
|
343 genic += 1
|
|
344 break
|
|
345 total_inserts += 1
|
|
346
|
|
347 # Writes all relevant information on each insertion and its fitness to a cvs file: the location of the insertion, its strand, c1, c2, etc. (the variable names are self-explanatiory)
|
|
348 # w is written twice, because the second w will be normalized if normalization is called for, thus becoming nW.
|
|
349
|
|
350 row = [i, strand, c1, c2, ratio, mt_freq_t1, mt_freq_t2, pop_freq_t1, pop_freq_t2, gene, arguments.expansion_factor, w, w]
|
|
351 results.append(row)
|
|
352 i += 1
|
|
353 with open(arguments.outfile, "wb") as csvfile:
|
|
354 writer = csv.writer(csvfile)
|
|
355 writer.writerows(results)
|
|
356
|
|
357 print "Done comparing mapfiles " + str(get_time()) + "\n"
|
|
358 print "Genic: " + str(genic) + "\n"
|
|
359 print "Total: " + str(total_inserts) + "\n"
|
|
360
|
|
361
|
|
362
|
|
363
|
|
364
|
|
365
|
|
366
|
|
367
|
|
368
|
|
369
|
|
370 ##### BOTTLENECK VALUE CALCULATION #####
|
|
371
|
|
372 #the bottleneck value is calculated here if done from all genes - otherwise it's done in the normalization section if only taken from normalization genes
|
|
373
|
|
374 if (arguments.bottleall):
|
|
375 overall_blank_count = 0
|
|
376 for list in results:
|
|
377 if (list[2] != 0 and list[3] == 0):
|
|
378 overall_blank_count += 1
|
|
379 overall_original_count = len(results)
|
|
380
|
|
381 pc_blank_normals = float(overall_blank_count) / float(overall_original_count)
|
|
382
|
|
383 with open(arguments.outfile2, "w") as f:
|
|
384 f.write("bottleneck_value: " + str(pc_blank_normals) + "\n" + "total: " + str(total) + "\n" + "refname: " + refname)
|
|
385
|
|
386
|
|
387
|
|
388
|
|
389
|
|
390
|
|
391
|
|
392
|
|
393
|
|
394
|
|
395 ##### NORMALIZATION #####
|
|
396
|
|
397 # If making a WIG file is requested in the arguments, starts a string to be added to and then written to the WIG file with a typical WIG file header.
|
|
398 # The header is just in a typical WIG file format; if you'd like to look into this more UCSC has notes on formatting WIG files on their site.
|
|
399
|
|
400 if (arguments.wig):
|
|
401 wigstring = "track type=wiggle_0 name=" + arguments.wig + "\n" + "variableStep chrom=" + refname + "\n"
|
|
402
|
|
403 # Takes normalization genes (which should all be predicted or known to have fitness values of exactly 1.0, like transposons for example) and uses them to normalize the fitnesses of all insertion locations
|
|
404
|
|
405 if (arguments.normalize):
|
|
406 with open(arguments.normalize) as file:
|
|
407 transposon_genes = file.read().splitlines()
|
|
408 print "Normalize genes loaded" + "\n"
|
|
409 blank_ws = 0
|
|
410 sum = 0
|
|
411 count = 0
|
|
412 weights = []
|
|
413 scores = []
|
|
414 for list in results:
|
|
415 if list[9] != '' and list[9] in transposon_genes:
|
|
416 c1 = list[2]
|
|
417 c2 = list[3]
|
|
418 score = list[11]
|
|
419 avg = (c1 + c2)/2
|
|
420
|
|
421 # Skips over those insertion locations with too few insertions - their fitness values are less accurate because they're based on such small insertion numbers.
|
|
422
|
|
423 if float(c1) >= float(arguments.cutoff2):
|
|
424
|
|
425 # Sets a max weight, to prevent insertion location scores with huge weights from unbalancing the normalization.
|
|
426
|
|
427 if (avg >= float(arguments.max_weight)):
|
|
428 avg = float(arguments.max_weight)
|
|
429
|
|
430 # Tallies how many w values are 0 within the blank_ws value; you might get many transposon genes with a w value of 0 if a bottleneck occurs, for example, which is especially common with in vivo experiments. This is used later by aggregate.py
|
|
431 # For example, when studying a nasal infection in a mouse model, what bacteria "sticks" and is able to survive and what bacteria is swallowed and killed or otherwise flushed out tends to be a matter of chance not fitness; all mutants with an insertion in a specific transposon gene could be flushed out by chance!
|
|
432
|
|
433 if score == 0:
|
|
434 blank_ws += 1
|
|
435
|
|
436 sum += score
|
|
437 count += 1
|
|
438 weights.append(avg)
|
|
439 scores.append(score)
|
|
440
|
|
441 print str(list[9]) + " " + str(score) + " " + str(c1)
|
|
442
|
|
443 # Counts and removes all "blank" fitness values of normalization genes - those that = 0 - because they most likely don't really have a fitness value of 0, and you just happened to not get any reads from that location at t2.
|
|
444
|
|
445 blank_count = 0
|
|
446 original_count = len(scores)
|
|
447 curr_count = original_count
|
|
448 i = 0
|
|
449 while i < curr_count:
|
|
450 w_value = scores[i]
|
|
451 if w_value == 0:
|
|
452 blank_count += 1
|
|
453 weights.pop(i)
|
|
454 scores.pop(i)
|
|
455 i -= 1
|
|
456 curr_count = len(scores)
|
|
457 i += 1
|
|
458
|
|
459 # If no normalization genes can pass the cutoff, normalization cannot occur, so this ends the script advises the user to try again and lower cutoff and/or cutoff2.
|
|
460
|
|
461 if len(scores) == 0:
|
|
462 print 'ERROR: The normalization genes do not have enough reads to pass cutoff and/or cutoff2; please lower one or both of those arguments.' + "\n"
|
|
463 quit()
|
|
464
|
|
465 pc_blank_normals = float(blank_count) / float(original_count)
|
|
466 print "# blank out of " + str(original_count) + ": " + str(pc_blank_normals) + "\n"
|
|
467 if (not arguments.bottleall):
|
|
468 with open(arguments.outfile2, "w") as f:
|
|
469 f.write("bottleneck_value: " + str(pc_blank_normals) + "\n" + "total: " + str(total) + "\n" + "refname: " + refname)
|
|
470
|
|
471 average = sum / count
|
|
472 i = 0
|
|
473 weighted_sum = 0
|
|
474 weight_sum = 0
|
|
475 while i < len(weights):
|
|
476 weighted_sum += weights[i]*scores[i]
|
|
477 weight_sum += weights[i]
|
|
478 i += 1
|
|
479 weighted_average = weighted_sum/weight_sum
|
|
480
|
|
481 print "Normalization step:" + "\n"
|
|
482 print "Regular average: " + str(average) + "\n"
|
|
483 print "Weighted Average: " + str(weighted_average) + "\n"
|
|
484 print "Total Insertions: " + str(count) + "\n"
|
|
485
|
|
486 old_ws = 0
|
|
487 new_ws = 0
|
|
488 wcount = 0
|
|
489
|
|
490 for list in results:
|
|
491 if list[11] == 'W':
|
|
492 continue
|
|
493 new_w = float(list[11])/weighted_average
|
|
494
|
|
495 # Sometimes you want to multiply all the fitness values by a constant; this does that.
|
|
496 # For example you might multiply all the values by a constant for a genetic interaction screen - where Tn-Seq is performed as usual except there's one background knockout all the mutants share.
|
|
497
|
|
498 if arguments.multiply:
|
|
499 new_w *= float(arguments.multiply)
|
|
500
|
|
501 if float(list[11]) > 0:
|
|
502 old_ws += float(list[11])
|
|
503 new_ws += new_w
|
|
504 wcount += 1
|
|
505
|
|
506 list[12] = new_w
|
|
507
|
|
508 if (arguments.wig):
|
|
509 wigstring += str(list[0]) + " " + str(new_w) + "\n"
|
|
510
|
|
511 old_w_mean = old_ws / wcount
|
|
512 new_w_mean = new_ws / wcount
|
|
513 print "Old W Average: " + str(old_w_mean) + "\n"
|
|
514 print "New W Average: " + str(new_w_mean) + "\n"
|
|
515
|
|
516 with open(arguments.outfile, "wb") as csvfile:
|
|
517 writer = csv.writer(csvfile)
|
|
518 writer.writerows(results)
|
|
519
|
|
520 if (arguments.wig):
|
|
521 if (arguments.normalize):
|
|
522 with open(arguments.wig, "wb") as wigfile:
|
|
523 wigfile.write(wigstring)
|
|
524 else:
|
|
525 for list in results:
|
|
526 wigstring += str(list[0]) + " " + str(list[11]) + "\n"
|
|
527 with open(arguments.wig, "wb") as wigfile:
|
|
528 wigfile.write(wigstring)
|
|
529
|
|
530
|
|
531
|
|
532
|
|
533
|
|
534
|
|
535
|
|
536
|
|
537
|
|
538
|
|
539
|
|
540
|
|
541
|
|
542
|
|
543
|
|
544
|
|
545
|
|
546
|
|
547
|
|
548
|
|
549
|
|
550
|
|
551
|
|
552
|
|
553
|
|
554
|
|
555
|
|
556
|
|
557
|
|
558
|
|
559
|
|
560
|
|
561
|
|
562
|
|
563
|
|
564
|
|
565
|
|
566
|
|
567
|
|
568
|
|
569
|
|
570
|
|
571
|
|
572
|
|
573
|
|
574
|
|
575
|
|
576
|
|
577
|
|
578
|
|
579
|
|
580
|
|
581
|
|
582
|
|
583
|
|
584
|
|
585
|
|
586
|
|
587
|
|
588
|
|
589
|
|
590
|
|
591
|
|
592
|
|
593
|
|
594
|
|
595
|
|
596
|
|
597
|
|
598
|
|
599
|
|
600
|
|
601
|
|
602
|
|
603
|
|
604
|
|
605
|
|
606
|
|
607
|
|
608
|
|
609
|
|
610
|
|
611
|
|
612
|
|
613
|
|
614
|
|
615
|
|
616
|
|
617
|
|
618
|
|
619
|
|
620
|
|
621
|
|
622
|
|
623
|
|
624
|
|
625
|
|
626
|
|
627
|
|
628
|
|
629
|
|
630 # `````````````
|
|
631 # `````````````
|
|
632 # ``@@@@@@@@@``
|
|
633 # ``@@@@@@@@@```
|
|
634 # ``@@@@@@@@@``
|
|
635 # ``@@@@@@@@@``
|
|
636 # ``@@@@@@@@@``
|
|
637 # ``@@@@@@@@@``
|
|
638 # ```@@@@@@@@#``
|
|
639 # ```@@@@@@@@#``
|
|
640 # ```@@@@@@@@+``
|
|
641 # ```@@@@@@@@'``
|
|
642 # ```@@@@@@@@;``
|
|
643 # ```@@@@@@@@;``
|
|
644 # ```@@@@@@@@:``
|
|
645 # ```@@@@@@@@,``
|
|
646 # ``.@@@@@@@@.``
|
|
647 # ``.@@@@@@@@```
|
|
648 # ``.@@@@@@@@```
|
|
649 # ``.@@@@@@@@```
|
|
650 # ``.@@@@@@@@``
|
|
651 # ``,@@@@@@@@``
|
|
652 # ``,@@@@@@@@``
|
|
653 # ``.@@@@@@@@``
|
|
654 # ```@@@@@@@@``
|
|
655 # ``:@@@@@@@@``
|
|
656 # ``:@@@@@@@@``
|
|
657 # ``:@@@@@@@@``
|
|
658 # ``:@@@@@@@@``
|
|
659 # ``'@@@@@@@@``
|
|
660 # ``;@@@@@@@@``
|
|
661 # ``:@@@@@@@@``
|
|
662 # ``:@@@@@@@@``
|
|
663 # ``:@@@@@@@@``
|
|
664 # ``;@@@@@@@#``
|
|
665 # ````+@@@@@@@#`````
|
|
666 # ```````#@@@@@@@#``````
|
|
667 # `````.,@@@@@@@@@...````
|
|
668 # ``@@@@@@@@@@@@@@@@@@;``
|
|
669 # ``@@@@@@@@@@@@@@@@@@;``
|
|
670 # ```````````````````````
|
|
671 # `````````````````````
|
|
672 # ``````.```````
|
|
673 # ````@.''```
|
|
674 # ```# `;```
|
|
675 # ``.+ @```
|
|
676 # ```@ ````,+```
|
|
677 # ```;;````` @```
|
|
678 # ```@ ``````,@```
|
|
679 # ```,+```..```@```
|
|
680 # ```@ ``....```@```
|
|
681 # ```+' ``....```#'``
|
|
682 # ```@```......`` @```
|
|
683 # ```'+```......```'@```
|
|
684 # ```@ ``........```@```
|
|
685 # ```'#```........````@```
|
|
686 # ```@ ``..........```#,``
|
|
687 # ```'#```...........`` @```
|
|
688 # ```@``.............```.+```
|
|
689 # ```:#```.............`` #```
|
|
690 # ``````` ```@ ```.......#......``.@```
|
|
691 # `````````` ```:@```#`......@......```@```
|
|
692 # ``````#@@@`` ```@ `.`:.......@.......`` @```
|
|
693 # ```.#@###@`` ```:@``..`+`....`@.......```@,``
|
|
694 # ```'@####@``` ```@````..@@@@@@@@#,`..#```` @```
|
|
695 # ```#####@@``` ``;@ ,`.,@@. `@@..#..```''``
|
|
696 # ``:####@#```` ```@``@`@@ @@:...`` @```
|
|
697 # ```@#####```` ``,@``.@, ,@`...``:@```
|
|
698 # ``.####@``` ```@.` @` @....``@```
|
|
699 # ``####@``` ``,@ @.` @`.````@```
|
|
700 # ``@##@```` ```@, @: ;# `@..```@.``
|
|
701 # ```@##```` ``.@`,@ @@, #...`` @```
|
|
702 # ```@#@``` ```@, # `@@@ @`.```;'``
|
|
703 # ```##:`` ``.@ +, .@@@ ,'..`` @```
|
|
704 # ``.##``` ```@, @ `@@@ @`.```,+```
|
|
705 # `````@##``` ```@`'. @@@ :...```@``` ``````````
|
|
706 # ````````````````````````````````````````##@``` ```@:`@ @@@ #...`` #``` `````````````````
|
|
707 # ```````````````````````````````````````.###@``` ```@ `, .@@@++'++#@@'` #`..```#``` ````````````'@@@@@.````
|
|
708 # `````+@####################@@@@@@@@@@@@#####@``` ```#;`,. `@#...,.,,,,,,..;@, @....`` @``````````````+@@@########@.``
|
|
709 # `+@##########################################,```` ```````````````@```@ +@,.,,,,,,,,,,,,,,,,,@ @....```#`````````'@@@##############@```
|
|
710 # `@###########################################@``````````````````````````````````````+'``.,'.#.,,,,,,,,,,,,,,,,,,,,.++@......`` @````+@@@#######@+``````'###'``
|
|
711 # ``:@@########@@@@@@@@@@@@@@@@@@@@@@#@@@@@@@##@``````````````````````````......,`,,.,@ ```.##.,.,,,,,,,,,,,,,,,,,,,,.##......`` :.+@@#######@@:``````````###@```
|
|
712 # ````````````````````````````````,#########@###@@@@#################################@'```...@.,,,,,,,,,,,,,,,,,,,,,,,#.........`'@######@@+```````````````@##```
|
|
713 # ```````````````````````````````@#########@#########################################```.....@:,,,,,,,,,,,,,,,,,,..;@..........`@####@@:```````` ```@##@``
|
|
714 # `````@@####@@@##########@@@@@@@@@@@@@@@@@@@@@@@@@@@@#+@+```......@#.,,,,,,,,,,,,,,,,.##..........`` #@#````````` ```##@```
|
|
715 # ``.#@######@####:```````````````````````````````````@ ``.......@:#,,,,,,,,,,,,,,,;@@`............`` @`````` ```@##:``
|
|
716 # ``:########@###@```````````````````````````````````#;```......+..`##,.,,,,,,,,.#@#..'............`` @```` ``;##@```
|
|
717 # ```@@####@@##@'```` ````@ ``.......'.....@@#+;:;'#@@;`...#`............`` @``` ```@##```
|
|
718 # ```````````````` ```@,```.............'..:''':.@`......:............```@.`` ``@###```
|
|
719 # `````````````` ``.@```..............#........'`....................```@``` ``.##@``````
|
|
720 # ```@.``..............`#........,.....................```@.`` ```@#+,``````
|
|
721 # ``.@``.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,.................```@``` ````+#####@````
|
|
722 # ```@````......,........................,....,,.,.,,,,,..` @,`` ```;@@######````
|
|
723 # ``.@```.......,......................`......,...........```@``` ```+#########@````
|
|
724 # ```@```...........`@@+...............+@@`....,...........```@:`` ``;@#########@'```
|
|
725 # ``.@ ``............@@@@@`.........,@@@@@.....,............```@``` ``@#########@#@@```
|
|
726 # ```@.```............@@@@@@@`.....'@@@@@@@.....,............```#'`` ``@###@###@#@#@@@``
|
|
727 # ``.@```.............@@@@@@@@@..+#@@@@@@@@.....,.............`` #``` ``@#@@@@##@#@#@@@``
|
|
728 # ```@````.............@@@@@@@@@@@@@@@@@@@@@.....,.............```'#``` ``'#@@@@##@#@@@@@,`
|
|
729 # ``.@ ``.........,....@@@@@@@@@',##@@@@@@@@`....,..............```@``` ```@@@@@##@@'#@@@@`
|
|
730 # ```@.```.........,....@@@@@@@#`...`#@@@@@@@`....,..............```.@``` ``#@@@@##;```@@@@`
|
|
731 # ``.@ ``.....,,,,,,,.,.@@@@@#.,,,,,,,.#@@@@@,,,,,,,,,,,,:,,,,,,,,.``@``` ``#@@@@#.````@@@@`
|
|
732 # ```@. ``...............@@@;......,......#@@@`...........,.........```@``` ``#@@@;``````@@@@`
|
|
733 # ```@```................@,........,........+#`...........,.........```@.`` ``#@@@;``````@@@@`
|
|
734 # ```@.``...........................,.........`............,..........```@``` ``#@@@'`` ``#@@;`
|
|
735 # ``.@ ``................,..........,......................,..........```#:`` ``#@@@'`` ```#@``
|
|
736 # ```@,``............................,......................,...........`` @``` ``+@@@'`` `````
|
|
737 # ``.@```............................,......................,...........```#+`` ``;@@@+`` ``
|
|
738 # ```@,``.............................,......................,............```@``` ``'@@@+``
|
|
739 # ``.@```.........,...................,......................,............```'#``` ``;@@@+``
|
|
740 # ```@:```.........,...................,......................,.............`` @``` ``:@@@+``
|
|
741 # ```@`..,,,,,,,,,,,,,,,,,,..,.........,......................,.............```'@``` ``;@@@#``
|
|
742 # ``+'```...,.................,....,,,,,,,,,,,,,,,,,,,,,,,,,,,,,..........,...``@``` ``;@@@#``
|
|
743 # ```@ ``....,.................,.......................................,.......``;@``` ``:@@@#``
|
|
744 # ``'#```....,.................,.......................................,.......```@``` ``:@@@@``
|
|
745 # ```@```.....,.................,.......................................,........``;#`` ``:@@@@``
|
|
746 # ```@ ``.....,.................,.......................................,........`` @``` ``:@@@@``
|
|
747 # ``@````...............................................................,........`` @.`` ``;@@@@``
|
|
748 # ``@ ```..............................................................,........`` .#`` ``'@@@@``
|
|
749 # ``# ``````.`.```````````````..````````````````````..`````````````````.`````````` @`` ``'@@@@``
|
|
750 # ``. `````````````````````````````````````````````````````````````````````````` .;`` ``'@@@@``
|
|
751 # ``@;` `` ` ` ` ` ```` ` ````` ` ` `,+@``` ``+@@@@``
|
|
752 # `````:;'++##@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@+.```` ``+@@@@``
|
|
753 # ```````````````````````````+##@``````````````````@#@``````````````````````````````` ``+@@@@``
|
|
754 # `````````````````````````@##@``````````````````@##;```````````````````````````` ``+@@@@``
|
|
755 # ````###,```` ````+##@``` ``+@@@@``
|
|
756 # ``,###``` ``.@##``` ``'@@@@``
|
|
757 # ``###@`` ```@##``` ``'@@@@``
|
|
758 # ```@##@`` ``@##+`` ``'@@@@``
|
|
759 # ```###.`` ``:##@`` ``'@@@@``
|
|
760 # ``:###``` ```##@``` ``'@@@@``
|
|
761 # ``@##@`` ```@##``` ``'@@@@``
|
|
762 # ```@##'`` ``@###`` ``'@@@@``
|
|
763 # ```@##``` ```##@`` ``'@@@@``
|
|
764 # ``,###``` ```@#@``` ``'@@@@``
|
|
765 # ``####`` ``@##.`` ``'@@@@``
|
|
766 # ``@##@`` ``;##@`` ``'@@@@``
|
|
767 # `````````@##@`` ```##@```` ``;@@@@``
|
|
768 # ``````````````@##;`` ```###`````````````` ``;@@@@``
|
|
769 # `````````.,;.```###``` ``@##:`````````````` ``;@@@@``
|
|
770 # `````#@#########@@##``` ``###@@@@@@###@#@'``` ``;@@@@``
|
|
771 # ```@@###############@`` ``,################`` ``;@@@@``
|
|
772 # ``'@################+`` ```###############+`` ``;@@@@``
|
|
773 # `````````````````````` ``###########@#,```` ``.@@@@``
|
|
774 # ````````````````````` ``````````````````` ```@@@.`
|
|
775 # ```````````````` ```````
|
|
776 #
|
|
777 # |