comparison CodonSwitchTool/functions.py @ 2:aad5e435e4dc draft default tip

Uploaded
author gianmarco_piccinno
date Tue, 21 May 2019 05:24:56 -0400
parents
children
comparison
equal deleted inserted replaced
1:1c31d6d25429 2:aad5e435e4dc
1 import string
2 from syngenic import *
3 from Bio.Seq import Seq
4 from Bio.SeqFeature import SeqFeature, FeatureLocation
5 from pprint import pprint
6
7 from itertools import izip
8
9 import numpy as np
10 import pandas as pd
11
12 def all_patterns(input_ = []):
13
14 patts = []
15 n_patts = []
16
17 for patt in input_:
18 tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)
19 tmp_revc = tmp_patt.reverse_complement()
20
21 patts.append(str(tmp_patt))
22 patts.append(str(tmp_revc))
23
24 n_patts.append(pattern(tmp_patt).plan_ambiguity())
25 n_patts.append(pattern(tmp_revc).plan_ambiguity())
26
27
28 return patts, n_patts
29
30 def fake_from_real(path = None, id_ = None, name = None):
31
32 plasmid_seq = SeqIO.read(open(path, "r"), "genbank")
33
34 f_p = plasmid_seq.seq[:10]
35 f_CDS = []
36 for f in plasmid_seq.features:
37 if f.type == "CDS":
38 tmp_start = len(f_p)
39 tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]
40 tmp_end = tmp_start + len(tmp_cds)
41 f_p += tmp_cds
42 f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))
43 #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]
44 f_p += plasmid_seq.seq[-10:]
45
46 for feat in f_CDS:
47 f_p.features.append(feat)
48 f_p.id = id_
49 f_p.name = name
50
51 #feature_seq_0 = f_CDS[0].extract(f_p)
52
53 return f_p
54
55 def punctuate_targets(f_patts, n_pl):
56
57 n_poss = {}
58 max_len = len(n_pl)
59 for key in f_patts.keys():
60 for el in f_patts[key]:
61 if not el[2] < el[1]:
62 tmp = range(el[1], el[2])
63 for i in range(len(tmp)):
64 if not tmp[i] in n_poss.keys():
65 n_poss[tmp[i]] = [key[i]]
66 else:
67 n_poss[tmp[i]].append(key[i])
68 else:
69 tmp = range(el[1], max_len) + range(0, el[2])
70 for i in range(len(tmp)):
71 if not tmp[i] in n_poss.keys():
72 n_poss[tmp[i]] = [key[i]]
73 else:
74 n_poss[tmp[i]].append(key[i])
75
76 for key in n_poss.keys():
77 n_poss[key] = set(n_poss[key])
78
79 #print(n_poss)
80
81 return n_poss
82
83
84 def print_seq(n_pl, ind_range = None):
85
86 if ind_range == None:
87
88 data = filter(None, re.split(r'(\w{1})', n_pl))
89 index = range(len(n_pl))
90
91 seq = []
92 ind = []
93
94 j = 0
95
96 seq.append("")
97 ind.append("")
98
99 for i in range(len(data)):
100
101 if (i % 9 == 0) & (i > 0):
102 j += 1
103 seq.append("")
104 ind.append("")
105 print("\n")
106 print(seq[j-1])
107 print(ind[j-1])
108
109
110 seq[j] += " "
111 ind[j] += " "
112 for n in range(len(str(index[i]))-1):
113 seq[j] += " "
114 seq[j] += data[i]
115 ind[j] += str(index[i])
116 print("\n")
117 print(seq[j])
118 print(ind[j])
119 else:
120 data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]]))
121 index = range(ind_range[0], ind_range[1])
122
123 seq = []
124 ind = []
125
126 j = 0
127
128 seq.append("")
129 ind.append("")
130
131 for i in range(len(data)):
132
133 if (i % 9 == 0) & (i > 0):
134 j += 1
135 seq.append("")
136 ind.append("")
137 print("\n")
138 print(seq[j-1])
139 print(ind[j-1])
140
141
142 seq[j] += " "
143 ind[j] += " "
144 for n in range(len(str(index[i]))-1):
145 seq[j] += " "
146 seq[j] += data[i]
147 ind[j] += str(index[i])
148
149 print("\n")
150 print(seq[j])
151 print(ind[j])
152
153
154
155 return None
156
157
158 def generalization(n_poss, n_pl, synonims_tables, reduced=False):
159
160
161 transversions = {"A": "[AT]",
162 "T": "[TA]",
163 "C": "[CG]",
164 "G": "[GC]"}
165
166 count_codon_switch = 0
167 count_transversion = 0
168
169 new_poss = {}
170
171 for pos in n_poss.keys():
172 in_cds = False
173 for feat in n_pl.features:
174 if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]):
175 in_cds = True
176 count_codon_switch += 1
177 tmp_count_transversion = 0
178 #print("\n")
179 #print("operate codon switch " + str(count_codon_switch))
180 #
181 #print("Real position: " + str(pos))
182 #print(n_poss[pos])
183 #print(feat.location)
184 #print(pos - feat.location.start)
185 #print((pos - feat.location.start)%3)
186
187
188 if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}):
189 # first basis of a codon
190 #print("first basis of a codon")
191 #print(n_pl.seq[pos:pos+3])
192
193 tmp_codon = n_pl.seq[pos:pos+3]
194 bases = []
195 if feat.strand == +1:
196 # check the codon table
197 for codon in synonims_tables["synonims"][tmp_codon]:
198 bases.append(codon[0])
199 elif feat.strand == -1:
200 # check the anticodon table
201 for codon in synonims_tables["anti_synonims"][tmp_codon]:
202 bases.append(codon[0])
203 if len(set(bases)) > 1:
204 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
205
206
207 elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}):
208 # second basis of a codon
209 #print("second basis of a codon")
210 #print(n_pl.seq[pos-1:pos+2])
211
212 tmp_codon = n_pl.seq[pos-1:pos+2]
213
214 bases = []
215 if feat.strand == +1:
216 # check the codon table
217 for codon in synonims_tables["synonims"][tmp_codon]:
218 bases.append(codon[1])
219 elif feat.strand == -1:
220 # check the anticodon table
221 for codon in synonims_tables["anti_synonims"][tmp_codon]:
222 bases.append(codon[1])
223 if len(set(bases)) > 1:
224 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
225
226 elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}):
227 # third basis of a codon
228 #print("third basis of a codon")
229 #print(n_pl.seq[pos-2:pos+1])
230
231 tmp_codon = n_pl.seq[pos-2:pos+1]
232
233 bases = []
234 if feat.strand == +1:
235 # check the codon table
236 for codon in synonims_tables["synonims"][tmp_codon]:
237 bases.append(codon[2])
238 elif feat.strand == -1:
239 # check the anticodon table
240 for codon in synonims_tables["anti_synonims"][tmp_codon]:
241 bases.append(codon[2])
242 if len(set(bases)) > 1:
243 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
244
245 tmp = n_pl.extract(feat)
246 #print_seq(tmp, ind_range = [feat.location.start,feat.location.start])
247
248 if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()):
249 # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {})
250 # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}
251 # n_poss[pos] != {"N"}
252
253 if reduced == False:
254
255 count_transversion += 1
256 #print("operate transversion " + str(count_transversion))
257
258 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
259
260 else:
261
262 count_transversion += 1
263 #print("operate transversion " + str(count_transversion))
264
265 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
266
267 #if tmp_count_transversion == 0:
268
269 # count_transversion += 1
270 # tmp_count_transversion += 1
271 # print("operate transversion " + str(count_transversion))
272 #
273 # new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
274
275 #print(new_poss)
276
277 n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq)))
278 n_ind = range(len(n_seq))
279
280 new_obj = {}
281
282 for pos in n_ind:
283 if pos in new_poss.keys():
284 new_obj[pos] = new_poss[pos]
285 else:
286 new_obj[pos] = n_seq[pos]
287
288 #pprint(new_obj)
289
290
291 new_plasmid_generalized = ""
292
293
294 for pos in n_ind:
295 new_plasmid_generalized += new_obj[pos]
296
297 #print(new_plasmid_generalized)
298 #print(len(new_plasmid_generalized))
299
300 t = sre_yield.AllStrings(new_plasmid_generalized)
301
302 #print(len(t))
303
304
305
306 return t
307
308
309 def evaluate_plasmids(plasmids = None,
310 original_plasmid = None,
311 codon_usage_table = None,
312 n_patts = None,
313 f_patts = None):
314
315 from syngenic import plasmid
316 from Bio.Seq import Seq
317 from Bio.SeqFeature import SeqFeature, FeatureLocation
318 from itertools import izip
319 import numpy as np
320
321 useful = {}
322
323 i = 0
324
325 for tmp_pl in plasmids:
326
327 if tmp_pl != original_plasmid.seq:
328
329 identical_proteic_sequence = []
330
331 for feat in original_plasmid.features:
332 if feat.type.lower() in ["gene", "cds"]:
333 identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate())
334 identical_proteic_sequence = all(identical_proteic_sequence)
335 if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}):
336 print("\t" + str(i) + "/" + str(len(plasmids)))
337 #print(tmp_pl)
338 tmp = [j for j,(a1,a2) in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2]
339 #print(tmp)
340 useful["Plasmid_" + str(i)] = {}
341 useful["Plasmid_" + str(i)]["modified_positions"] = tmp
342 useful["Plasmid_" + str(i)]["codon_usage"] = []
343 useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp)
344 useful["Plasmid_" + str(i)]["sequence"] = tmp_pl
345 for modified_position in tmp:
346 in_cds = False
347 for feat in original_plasmid.features:
348 if feat.type.lower() in ["gene", "cds"]:
349 if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]):
350 in_cds = True
351 if (modified_position - feat.location.start) % 3 == 0:
352 # first basis of a codon
353 if feat.strand == +1:
354 tmp_codon = tmp_pl[modified_position:modified_position+3]
355 else:
356 tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement())
357 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
358 elif (modified_position - feat.location.start) % 3 == 1:
359 # second basis of a codon
360 if feat.strand == +1:
361 tmp_codon = tmp_pl[modified_position-1:modified_position+2]
362 else:
363 tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement())
364 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
365 elif (modified_position - feat.location.start) % 3 == 2:
366 # third basis of a codon
367 if feat.strand == +1:
368 tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1]
369 else:
370 tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement())
371 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
372
373 useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"])
374 useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"])
375 else:
376 next
377
378 i += 1
379
380 useful["original_plasmids"] = original_plasmid
381
382 return useful
383
384
385
386 def rank_plasmids(original_useful_plasmids = None):
387
388 # Rank according to codon usage and less number of modifications introduced
389
390 tmp_useful_plasmids = {}
391
392 #print(len(original_useful_plasmids.keys()))
393 tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"}))
394 #print(len(tmp_keys))
395 for key in tmp_keys:
396 #print(key)
397 #print(original_useful_plasmids[key])
398 tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"],
399 "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"],
400 "number_of_modification":original_useful_plasmids[key]["number_of_modification"]}
401
402 dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T
403
404 dat_plasmids.shape
405
406 dat_plasmids.head()
407
408 dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True])
409
410 dat_plasmids.index
411
412 return dat_plasmids
413 #return tmp_useful_plasmids
414
415
416 def print_color_seq(original = None,
417 others = None,
418 annotation_information = None,
419 tot = None,
420 ind_range = None,
421 patterns = None,
422 f_patterns = None,
423 patts = None,
424 max_row = 18):
425
426 """
427
428 original = plasmids["original_plasmid"],
429 others = def_pls,
430 annotation_information = useful_plasmids,
431 tot = plasmids,
432 ind_range = None
433
434 """
435
436 ################################################################
437 # Single Targets
438 ################################################################
439
440 targets = {}
441
442 t_keys = f_patterns.keys()
443
444 for l in range(len(t_keys)):
445 if f_patterns[t_keys[l]] != []:
446 targets["Target" + str(l)] = t_keys[l]
447
448 #print(targets)
449 #print("\n")
450 tars = {}
451
452 for tar in targets.keys():
453 #print(tar)
454 tars[tar] = ["|" for i in range(len(original.seq))]
455
456 for tar1 in f_patterns[targets[tar]]:
457 #print(tar1)
458 if tar1[1] < tar1[2]:
459 for l in range(tar1[1], tar1[2]):
460 tars[tar][l] = tar1[0][l-tar1[1]]
461 else:
462 for l in range(tar1[1], len(original.seq)):
463 tars[tar][l] = tar1[0][l-tar1[1]]
464 for l in range(tar1[2]):
465 tars[tar][l] = tar1[0][-tar1[2]:][l]
466
467 #print(tars)
468 kkk = tars.keys()
469 kkk.sort()
470 target_lists = [[key]+tars[key] for key in kkk]
471 #print(target_lists); print(len(target_lists[0]))
472
473
474 ################################################################
475 # Aggregate Targets
476 ################################################################
477 target_positions = ["TargetPositions"]
478 for k in range(len(original)):
479 if k in patterns.keys():
480 if len(patterns[k]) > 1:
481 target_positions += "+"#"T"
482 else:
483 target_positions += "T"
484 else:
485 target_positions += " "
486 #print(target_positions); print(len(target_positions))
487 ################################################################
488 # Annotation
489 ################################################################
490 direction = []
491 annot = ["Annotation"]
492
493 distance = 0
494 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
495 for space in range(distance, feat.location.start):
496 direction.append("_")
497 annot.append("_")
498 annot.append("*")
499 for an_space in range(feat.location.end - feat.location.start-2):
500 annot.append("_")
501 distance = feat.location.end
502 annot.append("/")
503 for space in range(distance, len(original)):
504 direction.append("_")
505 annot.append("_")
506 #print(annot)
507
508 ################################################################
509 # CDS
510 ################################################################
511
512 if ind_range == None:
513 ind_range = [0, len(original)]
514
515 sequences = {}
516 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
517 direction = ["CDS_Orientation"]
518 distance = 0
519
520 alternating = 0
521
522 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
523 for space in range(distance, feat.location.start):
524 direction.append("_")
525 if feat.type.lower() in ["gene", "cds"]:
526 for counter in range(feat.location.start, feat.location.end, 3):
527 if alternating % 2 == 1:
528 sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m"
529 sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m"
530 sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m"
531 alternating += 1
532
533 if feat.strand == +1:
534 direction.append("-")
535 direction.append("-")
536 direction.append(">")
537 if feat.strand == -1:
538 direction.append("<")
539 direction.append("-")
540 direction.append("-")
541
542 else:
543 sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m"
544 sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m"
545 sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m"
546 alternating += 1
547
548 if feat.strand == +1:
549 direction.append("-")
550 direction.append("-")
551 direction.append(">")
552 if feat.strand == -1:
553 direction.append("<")
554 direction.append("-")
555 direction.append("-")
556 distance = feat.location.end
557 for space in range(distance, len(original)):
558 direction.append("_")
559
560 #print(direction); print(len(direction))
561 ################################################################
562 # Plasmids_ids
563 ################################################################
564 f = 0
565 new_plasmids = []
566 for s in others:
567 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
568 for k in range(len(original)):
569 if k in annotation_information[s]["modified_positions"]:
570 new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m"
571 f += 1
572
573 #print(new_plasmids)
574
575 ################################################################
576 # Index
577 ################################################################
578
579 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
580
581 ################################################################
582 # Create the pdf file
583 ################################################################
584
585 #print(target_lists); print(len(target_lists[0]))
586 #print(target_positions); print(len(target_positions))
587 #print(annot); print(len(annot))
588 #print(direction); print(len(direction))
589 #print(new_plasmids); print(len(new_plasmids[0]))
590 #print(index)
591
592 data = {0:target_lists,
593 1:target_positions,
594 2:annot,
595 3:direction,
596 4:["Original"] + sequences["original"],
597 5:new_plasmids,
598 6:index}
599
600 elements = []
601 #max_row = 18
602 blocks = {}
603
604 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
605 n_blocks = len(range(max_row, len(original.seq)+1, max_row))
606 else:
607 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
608
609 j = 0
610
611 for i in range(n_blocks):
612 blocks[i] = []
613 for l in range(7):
614 if l in [0, 5]:
615 for el in data[l]:
616 if len(el[j:]) > max_row:
617 if i >= 1:
618 blocks[i].append([el[0]] + el[j:j+max_row])
619 else:
620 blocks[i].append(el[j:j+max_row])
621 else:
622 blocks[i].append([el[0]] + el[j:])
623 else:
624 if len(data[l][j:]) > max_row:
625 if i >= 1:
626 blocks[i].append([data[l][0]] + data[l][j:j+max_row])
627 else:
628 blocks[i].append(data[l][j:j+max_row])
629 else:
630 blocks[i].append([data[l][0]] + data[l][j:])
631 j += max_row
632 print("\n")
633 #print(blocks[i])
634
635 fff = []
636 for f in range(len(blocks[i])):
637 fff.append(len(blocks[i][f][0]))
638 fff = max(fff)
639 for f in range(len(blocks[i])):
640 for r in range(fff-len(blocks[i][f][0])):
641 blocks[i][f][0] += " "
642 if f < len(blocks[i])-1:
643 for l in range(1,len(blocks[i][f])):
644 tmp = ""
645 #print(blocks[i][-1][l])
646 if l < len(blocks[i][-1]):
647 for g in range(len(str(blocks[i][-1][l]))):
648 #print(g)
649 tmp += " "
650 blocks[i][f][l] = tmp + blocks[i][f][l]
651 #print(blocks[i][f][l])
652 blocks[i][f] = " ".join(blocks[i][f])
653 else:
654 blocks[i][f] = " ".join(blocks[i][f])
655 print(blocks[i][f])
656 #print(" ".join(blocks[i][-1]))
657
658 print("\n")
659 print([f for f in original.features if f.type.lower() in ["gene", "cds"]])
660 print("\n")
661 print(f_patterns)
662
663 return
664
665 def print_to_pdf(original = None,
666 others = None,
667 annotation_information = None,
668 tot = None,
669 ind_range = None,
670 patterns = None,
671 f_patterns = None,
672 patts = None,
673 max_row = 9):
674
675 """
676
677 original = plasmids["original_plasmid"],
678 others = def_pls,
679 annotation_information = useful_plasmids,
680 tot = plasmids,
681 ind_range = None
682
683 """
684
685 from reportlab.lib import colors
686 from reportlab.lib.pagesizes import letter
687 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
688 from reportlab.pdfgen import canvas
689
690 ################################################################
691 # Single Targets
692 ################################################################
693
694 targets = {}
695
696 t_keys = f_patterns.keys()
697
698 for l in range(len(t_keys)):
699 if f_patterns[t_keys[l]] != []:
700 targets["Target" + str(l)] = t_keys[l]
701
702 #print(targets)
703 #print("\n")
704 tars = {}
705
706 for tar in targets.keys():
707 #print(tar)
708 tars[tar] = ["|" for i in range(len(original.seq))]
709
710 for tar1 in f_patterns[targets[tar]]:
711 #print(tar1)
712 if tar1[1] < tar1[2]:
713 for l in range(tar1[1], tar1[2]):
714 tars[tar][l] = tar1[0][l-tar1[1]]
715 else:
716 for l in range(tar1[1], len(original.seq)):
717 tars[tar][l] = tar1[0][l-tar1[1]]
718 for l in range(tar1[2]):
719 tars[tar][l] = tar1[0][-tar1[2]:][l]
720
721 #print(tars)
722 kkk = tars.keys()
723 kkk.sort()
724 target_lists = [[key]+tars[key] for key in kkk]
725 #print(target_lists); print(len(target_lists[0]))
726
727
728 ################################################################
729 # Aggregate Targets
730 ################################################################
731 target_positions = ["TargetPositions"]
732 for k in range(len(original)):
733 if k in patterns.keys():
734 if len(patterns[k]) > 1:
735 target_positions += "+"#"T"
736 else:
737 target_positions += "T"
738 else:
739 target_positions += " "
740 #print(target_positions); print(len(target_positions))
741 ################################################################
742 # Annotation
743 ################################################################
744 direction = []
745 annot = ["Annotation"]
746
747 distance = 0
748 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
749 for space in range(distance, feat.location.start):
750 direction.append("_")
751 annot.append("_")
752 annot.append("*")
753 for an_space in range(feat.location.end - feat.location.start-2):
754 annot.append("_")
755 distance = feat.location.end
756 annot.append("/")
757 for space in range(distance, len(original)):
758 direction.append("_")
759 annot.append("_")
760 #print(annot)
761
762 ################################################################
763 # CDS
764 ################################################################
765
766 if ind_range == None:
767 ind_range = [0, len(original)]
768
769 sequences = {}
770 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
771 direction = ["CDS_Orientation"]
772 distance = 0
773
774 alternating = 0
775
776 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
777 for space in range(distance, feat.location.start):
778 direction.append("_")
779 if feat.type.lower() in ["gene", "cds"]:
780 for counter in range(feat.location.start, feat.location.end, 3):
781 if alternating % 2 == 1:
782 sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>'
783 sequences["original"][counter+1] = 'f' + sequences["original"][counter+1]
784 sequences["original"][counter+2] = 'f' + sequences["original"][counter+2]
785 alternating += 1
786
787 if feat.strand == +1:
788 direction.append("-")
789 direction.append("-")
790 direction.append(">")
791 if feat.strand == -1:
792 direction.append("<")
793 direction.append("-")
794 direction.append("-")
795
796 else:
797 sequences["original"][counter] = 's' + sequences["original"][counter]
798 sequences["original"][counter+1] = 's' + sequences["original"][counter+1]
799 sequences["original"][counter+2] = 's' + sequences["original"][counter+2]
800 alternating += 1
801
802 if feat.strand == +1:
803 direction.append("-")
804 direction.append("-")
805 direction.append(">")
806 if feat.strand == -1:
807 direction.append("<")
808 direction.append("-")
809 direction.append("-")
810 distance = feat.location.end
811 for space in range(distance, len(original)):
812 direction.append("_")
813
814 #print(direction); print(len(direction))
815 ################################################################
816 # Plasmids_ids
817 ################################################################
818 f = 0
819 new_plasmids = []
820 for s in others:
821 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
822 for k in range(len(original)):
823 if k in annotation_information[s]["modified_positions"]:
824 new_plasmids[f][k+1] += "m"
825 f += 1
826
827 #print(new_plasmids)
828
829 ################################################################
830 # Index
831 ################################################################
832
833 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
834
835 ################################################################
836 # Create the pdf file
837 ################################################################
838
839 #print(target_lists); print(len(target_lists[0]))
840 #print(target_positions); print(len(target_positions))
841 #print(annot); print(len(annot))
842 #print(direction); print(len(direction))
843 #print(new_plasmids); print(len(new_plasmids[0]))
844 #print(index)
845
846 #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen),
847 # ('BACKGROUND',(1,1),(1,1),colors.palegreen),
848 # ('BACKGROUND',(2,2),(3,2),colors.palegreen)]
849
850 data = {0:target_lists,
851 1:target_positions,
852 2:annot,
853 3:direction,
854 4:["Original"] + sequences["original"],
855 5:new_plasmids,
856 6:index}
857
858 doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,
859 rightMargin=30,leftMargin=30,
860 topMargin=30,bottomMargin=30)
861
862 elements = []
863 #max_row = 18
864 blocks = {}
865
866 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
867 n_blocks = len(range(max_row, len(original.seq)+1, max_row))
868 else:
869 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
870
871 j = 0
872
873 for i in range(n_blocks):
874 blocks[i] = []
875 for l in range(7):
876 if l in [0, 5]:
877 for el in data[l]:
878 if len(el[j:]) > max_row:
879 if i >= 1:
880 blocks[i].append([el[0]] + el[j:j+max_row])
881 else:
882 blocks[i].append(el[j:j+max_row])
883 else:
884 blocks[i].append([el[0]] + el[j:])
885 else:
886 if len(data[l][j:]) > max_row:
887 if i >= 1:
888 blocks[i].append([data[l][0]] + data[l][j:j+max_row])
889 else:
890 blocks[i].append(data[l][j:j+max_row])
891 else:
892 blocks[i].append([data[l][0]] + data[l][j:])
893 j += max_row
894 #print("\n")
895 #print(blocks[i])
896
897 elements.append(Table(blocks[i], hAlign='LEFT'))#,
898 #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen),
899 # ('BACKGROUND',(1,1),(1,1),colors.palegreen),
900 # ('TEXTCOLOR',(2,2),(3,2),colors.palegreen),
901 # ('BOX',(0,0),(0,0),2,colors.red)]))
902 elements.append(Table([["", "", "", "", ""]]))
903
904 doc.build(elements)
905
906
907 #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,
908 # rightMargin=30,leftMargin=30,
909 # topMargin=30,bottomMargin=30)
910 #new_elements = []
911
912 #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])
913 #new_elements.append(f_patterns)
914
915 #doc.build(new_elements)
916
917 c = canvas.Canvas("./further_information.pdf")
918 c.drawString(100,750,"CDS regions:")
919 upper_bound = 750
920 for feat in original.features:
921 if feat.type.lower() in ["gene", "cds"]:
922 upper_bound -= 15
923 if feat.location.strand == -1:
924 sign = "-"
925 else:
926 sign = "+"
927 c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")
928 upper_bound -= 30
929 c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")
930 for f_pattern in f_patterns.keys():
931 upper_bound -= 15
932 c.drawString(115,upper_bound,f_pattern + ":")
933 for val in f_patterns[f_pattern]:
934 upper_bound -= 15
935 c.drawString(130,upper_bound,str(val))
936 upper_bound -= 5
937
938 upper_bound -= 30
939 c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")
940 for target in targets.keys():
941 upper_bound -= 15
942 c.drawString(115,upper_bound,target + ": " + targets[target])
943
944 c.save()
945
946
947 return
948
949
950 def produce_random_targets(sequence):
951
952 # Produce a target on two continous CDS
953 # Produce a target in a non-coding region
954 # Produce a target in coding region
955 # Produce a target on a overlapping left
956 # Produce a target on a overlapping right
957
958
959
960 return