2
|
1 import string
|
|
2 from syngenic import *
|
|
3 from Bio.Seq import Seq
|
|
4 from Bio.SeqFeature import SeqFeature, FeatureLocation
|
|
5 from pprint import pprint
|
|
6
|
|
7 from itertools import izip
|
|
8
|
|
9 import numpy as np
|
|
10 import pandas as pd
|
|
11
|
|
12 def all_patterns(input_ = []):
|
|
13
|
|
14 patts = []
|
|
15 n_patts = []
|
|
16
|
|
17 for patt in input_:
|
|
18 tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)
|
|
19 tmp_revc = tmp_patt.reverse_complement()
|
|
20
|
|
21 patts.append(str(tmp_patt))
|
|
22 patts.append(str(tmp_revc))
|
|
23
|
|
24 n_patts.append(pattern(tmp_patt).plan_ambiguity())
|
|
25 n_patts.append(pattern(tmp_revc).plan_ambiguity())
|
|
26
|
|
27
|
|
28 return patts, n_patts
|
|
29
|
|
30 def fake_from_real(path = None, id_ = None, name = None):
|
|
31
|
|
32 plasmid_seq = SeqIO.read(open(path, "r"), "genbank")
|
|
33
|
|
34 f_p = plasmid_seq.seq[:10]
|
|
35 f_CDS = []
|
|
36 for f in plasmid_seq.features:
|
|
37 if f.type == "CDS":
|
|
38 tmp_start = len(f_p)
|
|
39 tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]
|
|
40 tmp_end = tmp_start + len(tmp_cds)
|
|
41 f_p += tmp_cds
|
|
42 f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))
|
|
43 #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]
|
|
44 f_p += plasmid_seq.seq[-10:]
|
|
45
|
|
46 for feat in f_CDS:
|
|
47 f_p.features.append(feat)
|
|
48 f_p.id = id_
|
|
49 f_p.name = name
|
|
50
|
|
51 #feature_seq_0 = f_CDS[0].extract(f_p)
|
|
52
|
|
53 return f_p
|
|
54
|
|
55 def punctuate_targets(f_patts, n_pl):
|
|
56
|
|
57 n_poss = {}
|
|
58 max_len = len(n_pl)
|
|
59 for key in f_patts.keys():
|
|
60 for el in f_patts[key]:
|
|
61 if not el[2] < el[1]:
|
|
62 tmp = range(el[1], el[2])
|
|
63 for i in range(len(tmp)):
|
|
64 if not tmp[i] in n_poss.keys():
|
|
65 n_poss[tmp[i]] = [key[i]]
|
|
66 else:
|
|
67 n_poss[tmp[i]].append(key[i])
|
|
68 else:
|
|
69 tmp = range(el[1], max_len) + range(0, el[2])
|
|
70 for i in range(len(tmp)):
|
|
71 if not tmp[i] in n_poss.keys():
|
|
72 n_poss[tmp[i]] = [key[i]]
|
|
73 else:
|
|
74 n_poss[tmp[i]].append(key[i])
|
|
75
|
|
76 for key in n_poss.keys():
|
|
77 n_poss[key] = set(n_poss[key])
|
|
78
|
|
79 #print(n_poss)
|
|
80
|
|
81 return n_poss
|
|
82
|
|
83
|
|
84 def print_seq(n_pl, ind_range = None):
|
|
85
|
|
86 if ind_range == None:
|
|
87
|
|
88 data = filter(None, re.split(r'(\w{1})', n_pl))
|
|
89 index = range(len(n_pl))
|
|
90
|
|
91 seq = []
|
|
92 ind = []
|
|
93
|
|
94 j = 0
|
|
95
|
|
96 seq.append("")
|
|
97 ind.append("")
|
|
98
|
|
99 for i in range(len(data)):
|
|
100
|
|
101 if (i % 9 == 0) & (i > 0):
|
|
102 j += 1
|
|
103 seq.append("")
|
|
104 ind.append("")
|
|
105 print("\n")
|
|
106 print(seq[j-1])
|
|
107 print(ind[j-1])
|
|
108
|
|
109
|
|
110 seq[j] += " "
|
|
111 ind[j] += " "
|
|
112 for n in range(len(str(index[i]))-1):
|
|
113 seq[j] += " "
|
|
114 seq[j] += data[i]
|
|
115 ind[j] += str(index[i])
|
|
116 print("\n")
|
|
117 print(seq[j])
|
|
118 print(ind[j])
|
|
119 else:
|
|
120 data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]]))
|
|
121 index = range(ind_range[0], ind_range[1])
|
|
122
|
|
123 seq = []
|
|
124 ind = []
|
|
125
|
|
126 j = 0
|
|
127
|
|
128 seq.append("")
|
|
129 ind.append("")
|
|
130
|
|
131 for i in range(len(data)):
|
|
132
|
|
133 if (i % 9 == 0) & (i > 0):
|
|
134 j += 1
|
|
135 seq.append("")
|
|
136 ind.append("")
|
|
137 print("\n")
|
|
138 print(seq[j-1])
|
|
139 print(ind[j-1])
|
|
140
|
|
141
|
|
142 seq[j] += " "
|
|
143 ind[j] += " "
|
|
144 for n in range(len(str(index[i]))-1):
|
|
145 seq[j] += " "
|
|
146 seq[j] += data[i]
|
|
147 ind[j] += str(index[i])
|
|
148
|
|
149 print("\n")
|
|
150 print(seq[j])
|
|
151 print(ind[j])
|
|
152
|
|
153
|
|
154
|
|
155 return None
|
|
156
|
|
157
|
|
158 def generalization(n_poss, n_pl, synonims_tables, reduced=False):
|
|
159
|
|
160
|
|
161 transversions = {"A": "[AT]",
|
|
162 "T": "[TA]",
|
|
163 "C": "[CG]",
|
|
164 "G": "[GC]"}
|
|
165
|
|
166 count_codon_switch = 0
|
|
167 count_transversion = 0
|
|
168
|
|
169 new_poss = {}
|
|
170
|
|
171 for pos in n_poss.keys():
|
|
172 in_cds = False
|
|
173 for feat in n_pl.features:
|
|
174 if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]):
|
|
175 in_cds = True
|
|
176 count_codon_switch += 1
|
|
177 tmp_count_transversion = 0
|
|
178 #print("\n")
|
|
179 #print("operate codon switch " + str(count_codon_switch))
|
|
180 #
|
|
181 #print("Real position: " + str(pos))
|
|
182 #print(n_poss[pos])
|
|
183 #print(feat.location)
|
|
184 #print(pos - feat.location.start)
|
|
185 #print((pos - feat.location.start)%3)
|
|
186
|
|
187
|
|
188 if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}):
|
|
189 # first basis of a codon
|
|
190 #print("first basis of a codon")
|
|
191 #print(n_pl.seq[pos:pos+3])
|
|
192
|
|
193 tmp_codon = n_pl.seq[pos:pos+3]
|
|
194 bases = []
|
|
195 if feat.strand == +1:
|
|
196 # check the codon table
|
|
197 for codon in synonims_tables["synonims"][tmp_codon]:
|
|
198 bases.append(codon[0])
|
|
199 elif feat.strand == -1:
|
|
200 # check the anticodon table
|
|
201 for codon in synonims_tables["anti_synonims"][tmp_codon]:
|
|
202 bases.append(codon[0])
|
|
203 if len(set(bases)) > 1:
|
|
204 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
|
|
205
|
|
206
|
|
207 elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}):
|
|
208 # second basis of a codon
|
|
209 #print("second basis of a codon")
|
|
210 #print(n_pl.seq[pos-1:pos+2])
|
|
211
|
|
212 tmp_codon = n_pl.seq[pos-1:pos+2]
|
|
213
|
|
214 bases = []
|
|
215 if feat.strand == +1:
|
|
216 # check the codon table
|
|
217 for codon in synonims_tables["synonims"][tmp_codon]:
|
|
218 bases.append(codon[1])
|
|
219 elif feat.strand == -1:
|
|
220 # check the anticodon table
|
|
221 for codon in synonims_tables["anti_synonims"][tmp_codon]:
|
|
222 bases.append(codon[1])
|
|
223 if len(set(bases)) > 1:
|
|
224 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
|
|
225
|
|
226 elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}):
|
|
227 # third basis of a codon
|
|
228 #print("third basis of a codon")
|
|
229 #print(n_pl.seq[pos-2:pos+1])
|
|
230
|
|
231 tmp_codon = n_pl.seq[pos-2:pos+1]
|
|
232
|
|
233 bases = []
|
|
234 if feat.strand == +1:
|
|
235 # check the codon table
|
|
236 for codon in synonims_tables["synonims"][tmp_codon]:
|
|
237 bases.append(codon[2])
|
|
238 elif feat.strand == -1:
|
|
239 # check the anticodon table
|
|
240 for codon in synonims_tables["anti_synonims"][tmp_codon]:
|
|
241 bases.append(codon[2])
|
|
242 if len(set(bases)) > 1:
|
|
243 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
|
|
244
|
|
245 tmp = n_pl.extract(feat)
|
|
246 #print_seq(tmp, ind_range = [feat.location.start,feat.location.start])
|
|
247
|
|
248 if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()):
|
|
249 # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {})
|
|
250 # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}
|
|
251 # n_poss[pos] != {"N"}
|
|
252
|
|
253 if reduced == False:
|
|
254
|
|
255 count_transversion += 1
|
|
256 #print("operate transversion " + str(count_transversion))
|
|
257
|
|
258 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
|
|
259
|
|
260 else:
|
|
261
|
|
262 count_transversion += 1
|
|
263 #print("operate transversion " + str(count_transversion))
|
|
264
|
|
265 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
|
|
266
|
|
267 #if tmp_count_transversion == 0:
|
|
268
|
|
269 # count_transversion += 1
|
|
270 # tmp_count_transversion += 1
|
|
271 # print("operate transversion " + str(count_transversion))
|
|
272 #
|
|
273 # new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
|
|
274
|
|
275 #print(new_poss)
|
|
276
|
|
277 n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq)))
|
|
278 n_ind = range(len(n_seq))
|
|
279
|
|
280 new_obj = {}
|
|
281
|
|
282 for pos in n_ind:
|
|
283 if pos in new_poss.keys():
|
|
284 new_obj[pos] = new_poss[pos]
|
|
285 else:
|
|
286 new_obj[pos] = n_seq[pos]
|
|
287
|
|
288 #pprint(new_obj)
|
|
289
|
|
290
|
|
291 new_plasmid_generalized = ""
|
|
292
|
|
293
|
|
294 for pos in n_ind:
|
|
295 new_plasmid_generalized += new_obj[pos]
|
|
296
|
|
297 #print(new_plasmid_generalized)
|
|
298 #print(len(new_plasmid_generalized))
|
|
299
|
|
300 t = sre_yield.AllStrings(new_plasmid_generalized)
|
|
301
|
|
302 #print(len(t))
|
|
303
|
|
304
|
|
305
|
|
306 return t
|
|
307
|
|
308
|
|
309 def evaluate_plasmids(plasmids = None,
|
|
310 original_plasmid = None,
|
|
311 codon_usage_table = None,
|
|
312 n_patts = None,
|
|
313 f_patts = None):
|
|
314
|
|
315 from syngenic import plasmid
|
|
316 from Bio.Seq import Seq
|
|
317 from Bio.SeqFeature import SeqFeature, FeatureLocation
|
|
318 from itertools import izip
|
|
319 import numpy as np
|
|
320
|
|
321 useful = {}
|
|
322
|
|
323 i = 0
|
|
324
|
|
325 for tmp_pl in plasmids:
|
|
326
|
|
327 if tmp_pl != original_plasmid.seq:
|
|
328
|
|
329 identical_proteic_sequence = []
|
|
330
|
|
331 for feat in original_plasmid.features:
|
|
332 if feat.type.lower() in ["gene", "cds"]:
|
|
333 identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate())
|
|
334 identical_proteic_sequence = all(identical_proteic_sequence)
|
|
335 if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}):
|
|
336 print("\t" + str(i) + "/" + str(len(plasmids)))
|
|
337 #print(tmp_pl)
|
|
338 tmp = [j for j,(a1,a2) in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2]
|
|
339 #print(tmp)
|
|
340 useful["Plasmid_" + str(i)] = {}
|
|
341 useful["Plasmid_" + str(i)]["modified_positions"] = tmp
|
|
342 useful["Plasmid_" + str(i)]["codon_usage"] = []
|
|
343 useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp)
|
|
344 useful["Plasmid_" + str(i)]["sequence"] = tmp_pl
|
|
345 for modified_position in tmp:
|
|
346 in_cds = False
|
|
347 for feat in original_plasmid.features:
|
|
348 if feat.type.lower() in ["gene", "cds"]:
|
|
349 if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]):
|
|
350 in_cds = True
|
|
351 if (modified_position - feat.location.start) % 3 == 0:
|
|
352 # first basis of a codon
|
|
353 if feat.strand == +1:
|
|
354 tmp_codon = tmp_pl[modified_position:modified_position+3]
|
|
355 else:
|
|
356 tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement())
|
|
357 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
|
|
358 elif (modified_position - feat.location.start) % 3 == 1:
|
|
359 # second basis of a codon
|
|
360 if feat.strand == +1:
|
|
361 tmp_codon = tmp_pl[modified_position-1:modified_position+2]
|
|
362 else:
|
|
363 tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement())
|
|
364 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
|
|
365 elif (modified_position - feat.location.start) % 3 == 2:
|
|
366 # third basis of a codon
|
|
367 if feat.strand == +1:
|
|
368 tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1]
|
|
369 else:
|
|
370 tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement())
|
|
371 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
|
|
372
|
|
373 useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"])
|
|
374 useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"])
|
|
375 else:
|
|
376 next
|
|
377
|
|
378 i += 1
|
|
379
|
|
380 useful["original_plasmids"] = original_plasmid
|
|
381
|
|
382 return useful
|
|
383
|
|
384
|
|
385
|
|
386 def rank_plasmids(original_useful_plasmids = None):
|
|
387
|
|
388 # Rank according to codon usage and less number of modifications introduced
|
|
389
|
|
390 tmp_useful_plasmids = {}
|
|
391
|
|
392 #print(len(original_useful_plasmids.keys()))
|
|
393 tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"}))
|
|
394 #print(len(tmp_keys))
|
|
395 for key in tmp_keys:
|
|
396 #print(key)
|
|
397 #print(original_useful_plasmids[key])
|
|
398 tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"],
|
|
399 "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"],
|
|
400 "number_of_modification":original_useful_plasmids[key]["number_of_modification"]}
|
|
401
|
|
402 dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T
|
|
403
|
|
404 dat_plasmids.shape
|
|
405
|
|
406 dat_plasmids.head()
|
|
407
|
|
408 dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True])
|
|
409
|
|
410 dat_plasmids.index
|
|
411
|
|
412 return dat_plasmids
|
|
413 #return tmp_useful_plasmids
|
|
414
|
|
415
|
|
416 def print_color_seq(original = None,
|
|
417 others = None,
|
|
418 annotation_information = None,
|
|
419 tot = None,
|
|
420 ind_range = None,
|
|
421 patterns = None,
|
|
422 f_patterns = None,
|
|
423 patts = None,
|
|
424 max_row = 18):
|
|
425
|
|
426 """
|
|
427
|
|
428 original = plasmids["original_plasmid"],
|
|
429 others = def_pls,
|
|
430 annotation_information = useful_plasmids,
|
|
431 tot = plasmids,
|
|
432 ind_range = None
|
|
433
|
|
434 """
|
|
435
|
|
436 ################################################################
|
|
437 # Single Targets
|
|
438 ################################################################
|
|
439
|
|
440 targets = {}
|
|
441
|
|
442 t_keys = f_patterns.keys()
|
|
443
|
|
444 for l in range(len(t_keys)):
|
|
445 if f_patterns[t_keys[l]] != []:
|
|
446 targets["Target" + str(l)] = t_keys[l]
|
|
447
|
|
448 #print(targets)
|
|
449 #print("\n")
|
|
450 tars = {}
|
|
451
|
|
452 for tar in targets.keys():
|
|
453 #print(tar)
|
|
454 tars[tar] = ["|" for i in range(len(original.seq))]
|
|
455
|
|
456 for tar1 in f_patterns[targets[tar]]:
|
|
457 #print(tar1)
|
|
458 if tar1[1] < tar1[2]:
|
|
459 for l in range(tar1[1], tar1[2]):
|
|
460 tars[tar][l] = tar1[0][l-tar1[1]]
|
|
461 else:
|
|
462 for l in range(tar1[1], len(original.seq)):
|
|
463 tars[tar][l] = tar1[0][l-tar1[1]]
|
|
464 for l in range(tar1[2]):
|
|
465 tars[tar][l] = tar1[0][-tar1[2]:][l]
|
|
466
|
|
467 #print(tars)
|
|
468 kkk = tars.keys()
|
|
469 kkk.sort()
|
|
470 target_lists = [[key]+tars[key] for key in kkk]
|
|
471 #print(target_lists); print(len(target_lists[0]))
|
|
472
|
|
473
|
|
474 ################################################################
|
|
475 # Aggregate Targets
|
|
476 ################################################################
|
|
477 target_positions = ["TargetPositions"]
|
|
478 for k in range(len(original)):
|
|
479 if k in patterns.keys():
|
|
480 if len(patterns[k]) > 1:
|
|
481 target_positions += "+"#"T"
|
|
482 else:
|
|
483 target_positions += "T"
|
|
484 else:
|
|
485 target_positions += " "
|
|
486 #print(target_positions); print(len(target_positions))
|
|
487 ################################################################
|
|
488 # Annotation
|
|
489 ################################################################
|
|
490 direction = []
|
|
491 annot = ["Annotation"]
|
|
492
|
|
493 distance = 0
|
|
494 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
|
|
495 for space in range(distance, feat.location.start):
|
|
496 direction.append("_")
|
|
497 annot.append("_")
|
|
498 annot.append("*")
|
|
499 for an_space in range(feat.location.end - feat.location.start-2):
|
|
500 annot.append("_")
|
|
501 distance = feat.location.end
|
|
502 annot.append("/")
|
|
503 for space in range(distance, len(original)):
|
|
504 direction.append("_")
|
|
505 annot.append("_")
|
|
506 #print(annot)
|
|
507
|
|
508 ################################################################
|
|
509 # CDS
|
|
510 ################################################################
|
|
511
|
|
512 if ind_range == None:
|
|
513 ind_range = [0, len(original)]
|
|
514
|
|
515 sequences = {}
|
|
516 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
|
|
517 direction = ["CDS_Orientation"]
|
|
518 distance = 0
|
|
519
|
|
520 alternating = 0
|
|
521
|
|
522 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
|
|
523 for space in range(distance, feat.location.start):
|
|
524 direction.append("_")
|
|
525 if feat.type.lower() in ["gene", "cds"]:
|
|
526 for counter in range(feat.location.start, feat.location.end, 3):
|
|
527 if alternating % 2 == 1:
|
|
528 sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m"
|
|
529 sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m"
|
|
530 sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m"
|
|
531 alternating += 1
|
|
532
|
|
533 if feat.strand == +1:
|
|
534 direction.append("-")
|
|
535 direction.append("-")
|
|
536 direction.append(">")
|
|
537 if feat.strand == -1:
|
|
538 direction.append("<")
|
|
539 direction.append("-")
|
|
540 direction.append("-")
|
|
541
|
|
542 else:
|
|
543 sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m"
|
|
544 sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m"
|
|
545 sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m"
|
|
546 alternating += 1
|
|
547
|
|
548 if feat.strand == +1:
|
|
549 direction.append("-")
|
|
550 direction.append("-")
|
|
551 direction.append(">")
|
|
552 if feat.strand == -1:
|
|
553 direction.append("<")
|
|
554 direction.append("-")
|
|
555 direction.append("-")
|
|
556 distance = feat.location.end
|
|
557 for space in range(distance, len(original)):
|
|
558 direction.append("_")
|
|
559
|
|
560 #print(direction); print(len(direction))
|
|
561 ################################################################
|
|
562 # Plasmids_ids
|
|
563 ################################################################
|
|
564 f = 0
|
|
565 new_plasmids = []
|
|
566 for s in others:
|
|
567 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
|
|
568 for k in range(len(original)):
|
|
569 if k in annotation_information[s]["modified_positions"]:
|
|
570 new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m"
|
|
571 f += 1
|
|
572
|
|
573 #print(new_plasmids)
|
|
574
|
|
575 ################################################################
|
|
576 # Index
|
|
577 ################################################################
|
|
578
|
|
579 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
|
|
580
|
|
581 ################################################################
|
|
582 # Create the pdf file
|
|
583 ################################################################
|
|
584
|
|
585 #print(target_lists); print(len(target_lists[0]))
|
|
586 #print(target_positions); print(len(target_positions))
|
|
587 #print(annot); print(len(annot))
|
|
588 #print(direction); print(len(direction))
|
|
589 #print(new_plasmids); print(len(new_plasmids[0]))
|
|
590 #print(index)
|
|
591
|
|
592 data = {0:target_lists,
|
|
593 1:target_positions,
|
|
594 2:annot,
|
|
595 3:direction,
|
|
596 4:["Original"] + sequences["original"],
|
|
597 5:new_plasmids,
|
|
598 6:index}
|
|
599
|
|
600 elements = []
|
|
601 #max_row = 18
|
|
602 blocks = {}
|
|
603
|
|
604 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
|
|
605 n_blocks = len(range(max_row, len(original.seq)+1, max_row))
|
|
606 else:
|
|
607 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
|
|
608
|
|
609 j = 0
|
|
610
|
|
611 for i in range(n_blocks):
|
|
612 blocks[i] = []
|
|
613 for l in range(7):
|
|
614 if l in [0, 5]:
|
|
615 for el in data[l]:
|
|
616 if len(el[j:]) > max_row:
|
|
617 if i >= 1:
|
|
618 blocks[i].append([el[0]] + el[j:j+max_row])
|
|
619 else:
|
|
620 blocks[i].append(el[j:j+max_row])
|
|
621 else:
|
|
622 blocks[i].append([el[0]] + el[j:])
|
|
623 else:
|
|
624 if len(data[l][j:]) > max_row:
|
|
625 if i >= 1:
|
|
626 blocks[i].append([data[l][0]] + data[l][j:j+max_row])
|
|
627 else:
|
|
628 blocks[i].append(data[l][j:j+max_row])
|
|
629 else:
|
|
630 blocks[i].append([data[l][0]] + data[l][j:])
|
|
631 j += max_row
|
|
632 print("\n")
|
|
633 #print(blocks[i])
|
|
634
|
|
635 fff = []
|
|
636 for f in range(len(blocks[i])):
|
|
637 fff.append(len(blocks[i][f][0]))
|
|
638 fff = max(fff)
|
|
639 for f in range(len(blocks[i])):
|
|
640 for r in range(fff-len(blocks[i][f][0])):
|
|
641 blocks[i][f][0] += " "
|
|
642 if f < len(blocks[i])-1:
|
|
643 for l in range(1,len(blocks[i][f])):
|
|
644 tmp = ""
|
|
645 #print(blocks[i][-1][l])
|
|
646 if l < len(blocks[i][-1]):
|
|
647 for g in range(len(str(blocks[i][-1][l]))):
|
|
648 #print(g)
|
|
649 tmp += " "
|
|
650 blocks[i][f][l] = tmp + blocks[i][f][l]
|
|
651 #print(blocks[i][f][l])
|
|
652 blocks[i][f] = " ".join(blocks[i][f])
|
|
653 else:
|
|
654 blocks[i][f] = " ".join(blocks[i][f])
|
|
655 print(blocks[i][f])
|
|
656 #print(" ".join(blocks[i][-1]))
|
|
657
|
|
658 print("\n")
|
|
659 print([f for f in original.features if f.type.lower() in ["gene", "cds"]])
|
|
660 print("\n")
|
|
661 print(f_patterns)
|
|
662
|
|
663 return
|
|
664
|
|
665 def print_to_pdf(original = None,
|
|
666 others = None,
|
|
667 annotation_information = None,
|
|
668 tot = None,
|
|
669 ind_range = None,
|
|
670 patterns = None,
|
|
671 f_patterns = None,
|
|
672 patts = None,
|
|
673 max_row = 9):
|
|
674
|
|
675 """
|
|
676
|
|
677 original = plasmids["original_plasmid"],
|
|
678 others = def_pls,
|
|
679 annotation_information = useful_plasmids,
|
|
680 tot = plasmids,
|
|
681 ind_range = None
|
|
682
|
|
683 """
|
|
684
|
|
685 from reportlab.lib import colors
|
|
686 from reportlab.lib.pagesizes import letter
|
|
687 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
|
|
688 from reportlab.pdfgen import canvas
|
|
689
|
|
690 ################################################################
|
|
691 # Single Targets
|
|
692 ################################################################
|
|
693
|
|
694 targets = {}
|
|
695
|
|
696 t_keys = f_patterns.keys()
|
|
697
|
|
698 for l in range(len(t_keys)):
|
|
699 if f_patterns[t_keys[l]] != []:
|
|
700 targets["Target" + str(l)] = t_keys[l]
|
|
701
|
|
702 #print(targets)
|
|
703 #print("\n")
|
|
704 tars = {}
|
|
705
|
|
706 for tar in targets.keys():
|
|
707 #print(tar)
|
|
708 tars[tar] = ["|" for i in range(len(original.seq))]
|
|
709
|
|
710 for tar1 in f_patterns[targets[tar]]:
|
|
711 #print(tar1)
|
|
712 if tar1[1] < tar1[2]:
|
|
713 for l in range(tar1[1], tar1[2]):
|
|
714 tars[tar][l] = tar1[0][l-tar1[1]]
|
|
715 else:
|
|
716 for l in range(tar1[1], len(original.seq)):
|
|
717 tars[tar][l] = tar1[0][l-tar1[1]]
|
|
718 for l in range(tar1[2]):
|
|
719 tars[tar][l] = tar1[0][-tar1[2]:][l]
|
|
720
|
|
721 #print(tars)
|
|
722 kkk = tars.keys()
|
|
723 kkk.sort()
|
|
724 target_lists = [[key]+tars[key] for key in kkk]
|
|
725 #print(target_lists); print(len(target_lists[0]))
|
|
726
|
|
727
|
|
728 ################################################################
|
|
729 # Aggregate Targets
|
|
730 ################################################################
|
|
731 target_positions = ["TargetPositions"]
|
|
732 for k in range(len(original)):
|
|
733 if k in patterns.keys():
|
|
734 if len(patterns[k]) > 1:
|
|
735 target_positions += "+"#"T"
|
|
736 else:
|
|
737 target_positions += "T"
|
|
738 else:
|
|
739 target_positions += " "
|
|
740 #print(target_positions); print(len(target_positions))
|
|
741 ################################################################
|
|
742 # Annotation
|
|
743 ################################################################
|
|
744 direction = []
|
|
745 annot = ["Annotation"]
|
|
746
|
|
747 distance = 0
|
|
748 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
|
|
749 for space in range(distance, feat.location.start):
|
|
750 direction.append("_")
|
|
751 annot.append("_")
|
|
752 annot.append("*")
|
|
753 for an_space in range(feat.location.end - feat.location.start-2):
|
|
754 annot.append("_")
|
|
755 distance = feat.location.end
|
|
756 annot.append("/")
|
|
757 for space in range(distance, len(original)):
|
|
758 direction.append("_")
|
|
759 annot.append("_")
|
|
760 #print(annot)
|
|
761
|
|
762 ################################################################
|
|
763 # CDS
|
|
764 ################################################################
|
|
765
|
|
766 if ind_range == None:
|
|
767 ind_range = [0, len(original)]
|
|
768
|
|
769 sequences = {}
|
|
770 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
|
|
771 direction = ["CDS_Orientation"]
|
|
772 distance = 0
|
|
773
|
|
774 alternating = 0
|
|
775
|
|
776 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
|
|
777 for space in range(distance, feat.location.start):
|
|
778 direction.append("_")
|
|
779 if feat.type.lower() in ["gene", "cds"]:
|
|
780 for counter in range(feat.location.start, feat.location.end, 3):
|
|
781 if alternating % 2 == 1:
|
|
782 sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>'
|
|
783 sequences["original"][counter+1] = 'f' + sequences["original"][counter+1]
|
|
784 sequences["original"][counter+2] = 'f' + sequences["original"][counter+2]
|
|
785 alternating += 1
|
|
786
|
|
787 if feat.strand == +1:
|
|
788 direction.append("-")
|
|
789 direction.append("-")
|
|
790 direction.append(">")
|
|
791 if feat.strand == -1:
|
|
792 direction.append("<")
|
|
793 direction.append("-")
|
|
794 direction.append("-")
|
|
795
|
|
796 else:
|
|
797 sequences["original"][counter] = 's' + sequences["original"][counter]
|
|
798 sequences["original"][counter+1] = 's' + sequences["original"][counter+1]
|
|
799 sequences["original"][counter+2] = 's' + sequences["original"][counter+2]
|
|
800 alternating += 1
|
|
801
|
|
802 if feat.strand == +1:
|
|
803 direction.append("-")
|
|
804 direction.append("-")
|
|
805 direction.append(">")
|
|
806 if feat.strand == -1:
|
|
807 direction.append("<")
|
|
808 direction.append("-")
|
|
809 direction.append("-")
|
|
810 distance = feat.location.end
|
|
811 for space in range(distance, len(original)):
|
|
812 direction.append("_")
|
|
813
|
|
814 #print(direction); print(len(direction))
|
|
815 ################################################################
|
|
816 # Plasmids_ids
|
|
817 ################################################################
|
|
818 f = 0
|
|
819 new_plasmids = []
|
|
820 for s in others:
|
|
821 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
|
|
822 for k in range(len(original)):
|
|
823 if k in annotation_information[s]["modified_positions"]:
|
|
824 new_plasmids[f][k+1] += "m"
|
|
825 f += 1
|
|
826
|
|
827 #print(new_plasmids)
|
|
828
|
|
829 ################################################################
|
|
830 # Index
|
|
831 ################################################################
|
|
832
|
|
833 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
|
|
834
|
|
835 ################################################################
|
|
836 # Create the pdf file
|
|
837 ################################################################
|
|
838
|
|
839 #print(target_lists); print(len(target_lists[0]))
|
|
840 #print(target_positions); print(len(target_positions))
|
|
841 #print(annot); print(len(annot))
|
|
842 #print(direction); print(len(direction))
|
|
843 #print(new_plasmids); print(len(new_plasmids[0]))
|
|
844 #print(index)
|
|
845
|
|
846 #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen),
|
|
847 # ('BACKGROUND',(1,1),(1,1),colors.palegreen),
|
|
848 # ('BACKGROUND',(2,2),(3,2),colors.palegreen)]
|
|
849
|
|
850 data = {0:target_lists,
|
|
851 1:target_positions,
|
|
852 2:annot,
|
|
853 3:direction,
|
|
854 4:["Original"] + sequences["original"],
|
|
855 5:new_plasmids,
|
|
856 6:index}
|
|
857
|
|
858 doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,
|
|
859 rightMargin=30,leftMargin=30,
|
|
860 topMargin=30,bottomMargin=30)
|
|
861
|
|
862 elements = []
|
|
863 #max_row = 18
|
|
864 blocks = {}
|
|
865
|
|
866 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
|
|
867 n_blocks = len(range(max_row, len(original.seq)+1, max_row))
|
|
868 else:
|
|
869 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
|
|
870
|
|
871 j = 0
|
|
872
|
|
873 for i in range(n_blocks):
|
|
874 blocks[i] = []
|
|
875 for l in range(7):
|
|
876 if l in [0, 5]:
|
|
877 for el in data[l]:
|
|
878 if len(el[j:]) > max_row:
|
|
879 if i >= 1:
|
|
880 blocks[i].append([el[0]] + el[j:j+max_row])
|
|
881 else:
|
|
882 blocks[i].append(el[j:j+max_row])
|
|
883 else:
|
|
884 blocks[i].append([el[0]] + el[j:])
|
|
885 else:
|
|
886 if len(data[l][j:]) > max_row:
|
|
887 if i >= 1:
|
|
888 blocks[i].append([data[l][0]] + data[l][j:j+max_row])
|
|
889 else:
|
|
890 blocks[i].append(data[l][j:j+max_row])
|
|
891 else:
|
|
892 blocks[i].append([data[l][0]] + data[l][j:])
|
|
893 j += max_row
|
|
894 #print("\n")
|
|
895 #print(blocks[i])
|
|
896
|
|
897 elements.append(Table(blocks[i], hAlign='LEFT'))#,
|
|
898 #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen),
|
|
899 # ('BACKGROUND',(1,1),(1,1),colors.palegreen),
|
|
900 # ('TEXTCOLOR',(2,2),(3,2),colors.palegreen),
|
|
901 # ('BOX',(0,0),(0,0),2,colors.red)]))
|
|
902 elements.append(Table([["", "", "", "", ""]]))
|
|
903
|
|
904 doc.build(elements)
|
|
905
|
|
906
|
|
907 #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,
|
|
908 # rightMargin=30,leftMargin=30,
|
|
909 # topMargin=30,bottomMargin=30)
|
|
910 #new_elements = []
|
|
911
|
|
912 #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])
|
|
913 #new_elements.append(f_patterns)
|
|
914
|
|
915 #doc.build(new_elements)
|
|
916
|
|
917 c = canvas.Canvas("./further_information.pdf")
|
|
918 c.drawString(100,750,"CDS regions:")
|
|
919 upper_bound = 750
|
|
920 for feat in original.features:
|
|
921 if feat.type.lower() in ["gene", "cds"]:
|
|
922 upper_bound -= 15
|
|
923 if feat.location.strand == -1:
|
|
924 sign = "-"
|
|
925 else:
|
|
926 sign = "+"
|
|
927 c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")
|
|
928 upper_bound -= 30
|
|
929 c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")
|
|
930 for f_pattern in f_patterns.keys():
|
|
931 upper_bound -= 15
|
|
932 c.drawString(115,upper_bound,f_pattern + ":")
|
|
933 for val in f_patterns[f_pattern]:
|
|
934 upper_bound -= 15
|
|
935 c.drawString(130,upper_bound,str(val))
|
|
936 upper_bound -= 5
|
|
937
|
|
938 upper_bound -= 30
|
|
939 c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")
|
|
940 for target in targets.keys():
|
|
941 upper_bound -= 15
|
|
942 c.drawString(115,upper_bound,target + ": " + targets[target])
|
|
943
|
|
944 c.save()
|
|
945
|
|
946
|
|
947 return
|
|
948
|
|
949
|
|
950 def produce_random_targets(sequence):
|
|
951
|
|
952 # Produce a target on two continous CDS
|
|
953 # Produce a target in a non-coding region
|
|
954 # Produce a target in coding region
|
|
955 # Produce a target on a overlapping left
|
|
956 # Produce a target on a overlapping right
|
|
957
|
|
958
|
|
959
|
|
960 return
|